blob: 90bb0e32b29d7908452c20df64aa56d97389f71f [file] [log] [blame]
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringX8632 class, which
11// consists almost entirely of the lowering sequence for each
Jim Stichnoth70d0a052014-11-14 15:53:46 -080012// high-level instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070013//
14//===----------------------------------------------------------------------===//
15
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070016#include "llvm/Support/MathExtras.h"
17
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070018#include "IceCfg.h"
19#include "IceCfgNode.h"
Jim Stichnothbfb03e52014-08-26 10:29:05 -070020#include "IceClFlags.h"
Jim Stichnotha18cc9c2014-09-30 19:10:22 -070021#include "IceDefs.h"
Jan Voungec270732015-01-12 17:00:22 -080022#include "IceELFObjectWriter.h"
Karl Schimpfe3f64d02014-10-07 10:38:22 -070023#include "IceGlobalInits.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070024#include "IceInstX8632.h"
Jim Stichnoth336f6c42014-10-30 15:01:31 -070025#include "IceLiveness.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070026#include "IceOperand.h"
Jan Voungbd385e42014-09-18 18:18:10 -070027#include "IceRegistersX8632.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070028#include "IceTargetLoweringX8632.def"
29#include "IceTargetLoweringX8632.h"
Jan Voung8acded02014-09-22 18:02:25 -070030#include "IceUtils.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070031
32namespace Ice {
33
34namespace {
35
Matt Walace0ca8f2014-07-24 12:34:20 -070036// The following table summarizes the logic for lowering the fcmp
37// instruction. There is one table entry for each of the 16 conditions.
38//
39// The first four columns describe the case when the operands are
40// floating point scalar values. A comment in lowerFcmp() describes the
41// lowering template. In the most general case, there is a compare
42// followed by two conditional branches, because some fcmp conditions
43// don't map to a single x86 conditional branch. However, in many cases
44// it is possible to swap the operands in the comparison and have a
45// single conditional branch. Since it's quite tedious to validate the
46// table by hand, good execution tests are helpful.
47//
48// The last two columns describe the case when the operands are vectors
49// of floating point values. For most fcmp conditions, there is a clear
50// mapping to a single x86 cmpps instruction variant. Some fcmp
51// conditions require special code to handle and these are marked in the
52// table with a Cmpps_Invalid predicate.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070053const struct TableFcmp_ {
54 uint32_t Default;
Matt Walace0ca8f2014-07-24 12:34:20 -070055 bool SwapScalarOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070056 CondX86::BrCond C1, C2;
Matt Walace0ca8f2014-07-24 12:34:20 -070057 bool SwapVectorOperands;
Jan Voungbd385e42014-09-18 18:18:10 -070058 CondX86::CmppsCond Predicate;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070059} TableFcmp[] = {
Matt Walace0ca8f2014-07-24 12:34:20 -070060#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jan Voungbd385e42014-09-18 18:18:10 -070061 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070062 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080063 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070064#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080065};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070066const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
67
68// The following table summarizes the logic for lowering the icmp instruction
69// for i32 and narrower types. Each icmp condition has a clear mapping to an
70// x86 conditional branch instruction.
71
72const struct TableIcmp32_ {
Jan Voungbd385e42014-09-18 18:18:10 -070073 CondX86::BrCond Mapping;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070074} TableIcmp32[] = {
75#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070076 { CondX86::C_32 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070077 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080078 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070079#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080080};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070081const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
82
83// The following table summarizes the logic for lowering the icmp instruction
84// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
85// conditional branches are needed. For the other conditions, three separate
86// conditional branches are needed.
87const struct TableIcmp64_ {
Jan Voungbd385e42014-09-18 18:18:10 -070088 CondX86::BrCond C1, C2, C3;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070089} TableIcmp64[] = {
90#define X(val, C_32, C1_64, C2_64, C3_64) \
Jan Voungbd385e42014-09-18 18:18:10 -070091 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070092 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -080093 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070094#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -080095};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070096const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
97
Jan Voungbd385e42014-09-18 18:18:10 -070098CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070099 size_t Index = static_cast<size_t>(Cond);
100 assert(Index < TableIcmp32Size);
101 return TableIcmp32[Index].Mapping;
102}
103
Matt Wala49889232014-07-18 12:45:09 -0700104const struct TableTypeX8632Attributes_ {
105 Type InVectorElementType;
106} TableTypeX8632Attributes[] = {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700107#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Matt Wala49889232014-07-18 12:45:09 -0700108 { elementty } \
109 ,
Jim Stichnothd9dc82e2015-03-03 17:06:33 -0800110 ICETYPEX8632_TABLE
Matt Wala49889232014-07-18 12:45:09 -0700111#undef X
Jim Stichnothdd842db2015-01-27 12:53:53 -0800112};
Matt Wala49889232014-07-18 12:45:09 -0700113const size_t TableTypeX8632AttributesSize =
114 llvm::array_lengthof(TableTypeX8632Attributes);
115
116// Return the type which the elements of the vector have in the X86
117// representation of the vector.
118Type getInVectorElementType(Type Ty) {
119 assert(isVectorType(Ty));
120 size_t Index = static_cast<size_t>(Ty);
Jim Stichnoth6e992142014-07-30 14:45:20 -0700121 (void)Index;
Matt Wala49889232014-07-18 12:45:09 -0700122 assert(Index < TableTypeX8632AttributesSize);
123 return TableTypeX8632Attributes[Ty].InVectorElementType;
124}
125
Matt Wala45a06232014-07-09 16:33:22 -0700126// The maximum number of arguments to pass in XMM registers
Matt Wala105b7042014-08-11 19:56:19 -0700127const uint32_t X86_MAX_XMM_ARGS = 4;
Matt Wala83b80362014-07-16 10:21:30 -0700128// The number of bits in a byte
Matt Wala105b7042014-08-11 19:56:19 -0700129const uint32_t X86_CHAR_BIT = 8;
130// Stack alignment
131const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
132// Size of the return address on the stack
133const uint32_t X86_RET_IP_SIZE_BYTES = 4;
Matt Walac3302742014-08-15 16:21:56 -0700134// The number of different NOP instructions
135const uint32_t X86_NUM_NOP_VARIANTS = 5;
Matt Wala105b7042014-08-11 19:56:19 -0700136
Matt Walad4799f42014-08-14 14:24:12 -0700137// Value is in bytes. Return Value adjusted to the next highest multiple
138// of the stack alignment.
139uint32_t applyStackAlignment(uint32_t Value) {
Jan Voung55500db2015-05-26 14:25:40 -0700140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
Matt Wala105b7042014-08-11 19:56:19 -0700141}
Matt Wala45a06232014-07-09 16:33:22 -0700142
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700143// In some cases, there are x-macros tables for both high-level and
144// low-level instructions/operands that use the same enum key value.
145// The tables are kept separate to maintain a proper separation
Jim Stichnothfac55172014-10-01 13:06:21 -0700146// between abstraction layers. There is a risk that the tables could
147// get out of sync if enum values are reordered or if entries are
148// added or deleted. The following dummy namespaces use
149// static_asserts to ensure everything is kept in sync.
150
151// Validate the enum values in FCMPX8632_TABLE.
152namespace dummy1 {
153// Define a temporary set of enum values based on low-level table
154// entries.
155enum _tmp_enum {
Matt Walace0ca8f2014-07-24 12:34:20 -0700156#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700157 FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700158#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700159 _num
160};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700161// Define a set of constants based on high-level table entries.
162#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800163ICEINSTFCMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700164#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700165// Define a set of constants based on low-level table entries, and
166// ensure the table entry keys are consistent.
Matt Walace0ca8f2014-07-24 12:34:20 -0700167#define X(val, dflt, swapS, C1, C2, swapV, pred) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700168 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700169 static_assert( \
170 _table1_##val == _table2_##val, \
171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800172FCMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700173#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700174// Repeat the static asserts with respect to the high-level table
175// entries in case the high-level table has extra entries.
176#define X(tag, str) \
177 static_assert( \
178 _table1_##tag == _table2_##tag, \
179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800180ICEINSTFCMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700181#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700182} // end of namespace dummy1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700183
Jim Stichnothfac55172014-10-01 13:06:21 -0700184// Validate the enum values in ICMPX8632_TABLE.
185namespace dummy2 {
186// Define a temporary set of enum values based on low-level table
187// entries.
188enum _tmp_enum {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700189#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
Jim Stichnothfac55172014-10-01 13:06:21 -0700190 ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700191#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700192 _num
193};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700194// Define a set of constants based on high-level table entries.
195#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800196ICEINSTICMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700197#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700198// Define a set of constants based on low-level table entries, and
199// ensure the table entry keys are consistent.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700200#define X(val, C_32, C1_64, C2_64, C3_64) \
201 static const int _table2_##val = _tmp_##val; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700202 static_assert( \
203 _table1_##val == _table2_##val, \
204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800205ICMPX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700206#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700207// Repeat the static asserts with respect to the high-level table
208// entries in case the high-level table has extra entries.
209#define X(tag, str) \
210 static_assert( \
211 _table1_##tag == _table2_##tag, \
212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800213ICEINSTICMP_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700214#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700215} // end of namespace dummy2
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700216
Jim Stichnothfac55172014-10-01 13:06:21 -0700217// Validate the enum values in ICETYPEX8632_TABLE.
218namespace dummy3 {
219// Define a temporary set of enum values based on low-level table
220// entries.
221enum _tmp_enum {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700222#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
Jim Stichnothfac55172014-10-01 13:06:21 -0700223 ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700224#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700225 _num
226};
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700227// Define a set of constants based on high-level table entries.
Matt Wala928f1292014-07-07 16:50:46 -0700228#define X(tag, size, align, elts, elty, str) \
229 static const int _table1_##tag = tag;
JF Bastien8427ea22015-01-27 12:56:49 -0800230ICETYPE_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700231#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700232// Define a set of constants based on low-level table entries, and
233// ensure the table entry keys are consistent.
Jim Stichnothbca2f652014-11-01 10:13:54 -0700234#define X(tag, elementty, cvt, sdss, pack, width, fld) \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700235 static const int _table2_##tag = _tmp_##tag; \
Jim Stichnothfac55172014-10-01 13:06:21 -0700236 static_assert(_table1_##tag == _table2_##tag, \
237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800238ICETYPEX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700239#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700240// Repeat the static asserts with respect to the high-level table
241// entries in case the high-level table has extra entries.
Matt Wala928f1292014-07-07 16:50:46 -0700242#define X(tag, size, align, elts, elty, str) \
Jim Stichnothfac55172014-10-01 13:06:21 -0700243 static_assert(_table1_##tag == _table2_##tag, \
244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
JF Bastien8427ea22015-01-27 12:56:49 -0800245ICETYPE_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700246#undef X
Jim Stichnothfac55172014-10-01 13:06:21 -0700247} // end of namespace dummy3
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700248
249} // end of anonymous namespace
250
Jim Stichnotha59ae6f2015-05-17 10:11:41 -0700251BoolFoldingEntry::BoolFoldingEntry(Inst *I)
252 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true),
253 NumUses(0) {}
254
255BoolFolding::BoolFoldingProducerKind
256BoolFolding::getProducerKind(const Inst *Instr) {
257 if (llvm::isa<InstIcmp>(Instr)) {
258 if (Instr->getSrc(0)->getType() != IceType_i64)
259 return PK_Icmp32;
260 return PK_None; // TODO(stichnot): actually PK_Icmp64;
261 }
262 return PK_None; // TODO(stichnot): remove this
263
264 if (llvm::isa<InstFcmp>(Instr))
265 return PK_Fcmp;
266 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
267 switch (Cast->getCastKind()) {
268 default:
269 return PK_None;
270 case InstCast::Trunc:
271 return PK_Trunc;
272 }
273 }
274 return PK_None;
275}
276
277BoolFolding::BoolFoldingConsumerKind
278BoolFolding::getConsumerKind(const Inst *Instr) {
279 if (llvm::isa<InstBr>(Instr))
280 return CK_Br;
281 if (llvm::isa<InstSelect>(Instr))
282 return CK_Select;
283 return CK_None; // TODO(stichnot): remove this
284
285 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
286 switch (Cast->getCastKind()) {
287 default:
288 return CK_None;
289 case InstCast::Sext:
290 return CK_Sext;
291 case InstCast::Zext:
292 return CK_Zext;
293 }
294 }
295 return CK_None;
296}
297
298// Returns true if the producing instruction has a "complex" lowering
299// sequence. This generally means that its lowering sequence requires
300// more than one conditional branch, namely 64-bit integer compares
301// and some floating-point compares. When this is true, and there is
302// more than one consumer, we prefer to disable the folding
303// optimization because it minimizes branches.
304bool BoolFolding::hasComplexLowering(const Inst *Instr) {
305 switch (getProducerKind(Instr)) {
306 default:
307 return false;
308 case PK_Icmp64:
309 return true;
310 case PK_Fcmp:
311 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
312 CondX86::Br_None;
313 }
314}
315
316void BoolFolding::init(CfgNode *Node) {
317 Producers.clear();
318 for (Inst &Instr : Node->getInsts()) {
319 // Check whether Instr is a valid producer.
320 Variable *Var = Instr.getDest();
321 if (!Instr.isDeleted() // only consider non-deleted instructions
322 && Var // only instructions with an actual dest var
323 && Var->getType() == IceType_i1 // only bool-type dest vars
324 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
325 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
326 }
327 // Check each src variable against the map.
328 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
329 Operand *Src = Instr.getSrc(I);
330 SizeT NumVars = Src->getNumVars();
331 for (SizeT J = 0; J < NumVars; ++J) {
332 const Variable *Var = Src->getVar(J);
333 SizeT VarNum = Var->getIndex();
334 if (containsValid(VarNum)) {
335 if (I != 0 // All valid consumers use Var as the first source operand
336 || getConsumerKind(&Instr) == CK_None // must be white-listed
337 || (Producers[VarNum].IsComplex && // complex can't be multi-use
338 Producers[VarNum].NumUses > 0)) {
339 setInvalid(VarNum);
340 continue;
341 }
342 ++Producers[VarNum].NumUses;
343 if (Instr.isLastUse(Var)) {
344 Producers[VarNum].IsLiveOut = false;
345 }
346 }
347 }
348 }
349 }
350 for (auto &I : Producers) {
351 // Ignore entries previously marked invalid.
352 if (I.second.Instr == nullptr)
353 continue;
354 // Disable the producer if its dest may be live beyond this block.
355 if (I.second.IsLiveOut) {
356 setInvalid(I.first);
357 continue;
358 }
359 // Mark as "dead" rather than outright deleting. This is so that
360 // other peephole style optimizations during or before lowering
361 // have access to this instruction in undeleted form. See for
362 // example tryOptimizedCmpxchgCmpBr().
363 I.second.Instr->setDead();
364 }
365}
366
367const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
368 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
369 if (Var == nullptr)
370 return nullptr;
371 SizeT VarNum = Var->getIndex();
372 auto Element = Producers.find(VarNum);
373 if (Element == Producers.end())
374 return nullptr;
375 return Element->second.Instr;
376}
377
378void BoolFolding::dump(const Cfg *Func) const {
379 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
380 return;
381 OstreamLocker L(Func->getContext());
382 Ostream &Str = Func->getContext()->getStrDump();
383 for (auto &I : Producers) {
384 if (I.second.Instr == nullptr)
385 continue;
386 Str << "Found foldable producer:\n ";
387 I.second.Instr->dump(Func);
388 Str << "\n";
389 }
390}
391
392void TargetX8632::initNodeForLowering(CfgNode *Node) {
393 FoldingInfo.init(Node);
394 FoldingInfo.dump(Func);
395}
396
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700397TargetX8632::TargetX8632(Cfg *Func)
Jan Voung1f47ad02015-03-20 15:01:26 -0700398 : TargetLowering(Func),
399 InstructionSet(static_cast<X86InstructionSet>(
400 Func->getContext()->getFlags().getTargetInstructionSet() -
401 TargetInstructionSet::X86InstructionSet_Begin)),
Jan Voung0d9faea2015-05-27 10:04:17 -0700402 IsEbpBasedFrame(false), NeedsStackAlignment(false),
Jan Voungb36ad9b2015-04-21 17:01:49 -0700403 SpillAreaSizeBytes(0) {
Jan Voung1f47ad02015-03-20 15:01:26 -0700404 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==
405 (TargetInstructionSet::X86InstructionSet_End -
406 TargetInstructionSet::X86InstructionSet_Begin),
407 "X86InstructionSet range different from TargetInstructionSet");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700408 // TODO: Don't initialize IntegerRegisters and friends every time.
409 // Instead, initialize in some sort of static initializer for the
410 // class.
Jan Voungbd385e42014-09-18 18:18:10 -0700411 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
412 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
413 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
414 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
415 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
416 ScratchRegs.resize(RegX8632::Reg_NUM);
417#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700418 frameptr, isI8, isInt, isFP) \
Jan Voungbd385e42014-09-18 18:18:10 -0700419 IntegerRegisters[RegX8632::val] = isInt; \
420 IntegerRegistersI8[RegX8632::val] = isI8; \
421 FloatRegisters[RegX8632::val] = isFP; \
422 VectorRegisters[RegX8632::val] = isFP; \
423 ScratchRegs[RegX8632::val] = scratch;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700424 REGX8632_TABLE;
425#undef X
426 TypeToRegisterSet[IceType_void] = InvalidRegisters;
427 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
428 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
429 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
430 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
431 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
432 TypeToRegisterSet[IceType_f32] = FloatRegisters;
433 TypeToRegisterSet[IceType_f64] = FloatRegisters;
Matt Wala928f1292014-07-07 16:50:46 -0700434 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
435 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
436 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
437 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
438 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
439 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
440 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700441}
442
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700443void TargetX8632::translateO2() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700444 TimerMarker T(TimerStack::TT_O2, Func);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700445
Karl Schimpfdf80eb82015-02-09 14:20:22 -0800446 if (!Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700447 // Lower Phi instructions.
448 Func->placePhiLoads();
449 if (Func->hasError())
450 return;
451 Func->placePhiStores();
452 if (Func->hasError())
453 return;
454 Func->deletePhis();
455 if (Func->hasError())
456 return;
457 Func->dump("After Phi lowering");
458 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700459
460 // Address mode optimization.
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700461 Func->getVMetadata()->init(VMK_SingleDefs);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700462 Func->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700463
Matt Wala45a06232014-07-09 16:33:22 -0700464 // Argument lowering
Matt Wala45a06232014-07-09 16:33:22 -0700465 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700466
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700467 // Target lowering. This requires liveness analysis for some parts
468 // of the lowering decisions, such as compare/branch fusing. If
469 // non-lightweight liveness analysis is used, the instructions need
470 // to be renumbered first. TODO: This renumbering should only be
471 // necessary if we're actually calculating live intervals, which we
472 // only do for register allocation.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700473 Func->renumberInstructions();
474 if (Func->hasError())
475 return;
Matt Wala45a06232014-07-09 16:33:22 -0700476
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700477 // TODO: It should be sufficient to use the fastest liveness
478 // calculation, i.e. livenessLightweight(). However, for some
479 // reason that slows down the rest of the translation. Investigate.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700480 Func->liveness(Liveness_Basic);
481 if (Func->hasError())
482 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700483 Func->dump("After x86 address mode opt");
Matt Wala45a06232014-07-09 16:33:22 -0700484
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -0700485 doLoadOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700486 Func->genCode();
487 if (Func->hasError())
488 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700489 Func->dump("After x86 codegen");
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700490
491 // Register allocation. This requires instruction renumbering and
492 // full liveness analysis.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700493 Func->renumberInstructions();
494 if (Func->hasError())
495 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700496 Func->liveness(Liveness_Intervals);
497 if (Func->hasError())
498 return;
Jim Stichnoth9c234e22014-10-01 09:28:21 -0700499 // Validate the live range computations. The expensive validation
500 // call is deliberately only made when assertions are enabled.
501 assert(Func->validateLiveness());
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700502 // The post-codegen dump is done here, after liveness analysis and
503 // associated cleanup, to make the dump cleaner and more useful.
504 Func->dump("After initial x8632 codegen");
Jim Stichnoth877b04e2014-10-15 15:13:06 -0700505 Func->getVMetadata()->init(VMK_All);
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800506 regAlloc(RAK_Global);
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700507 if (Func->hasError())
508 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700509 Func->dump("After linear scan regalloc");
510
Karl Schimpfdf80eb82015-02-09 14:20:22 -0800511 if (Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700512 Func->advancedPhiLowering();
513 Func->dump("After advanced Phi lowering");
514 }
515
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700516 // Stack frame mapping.
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700517 Func->genFrame();
518 if (Func->hasError())
519 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700520 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700521
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700522 Func->contractEmptyNodes();
523 Func->reorderNodes();
Jim Stichnoth98712a32014-10-24 10:59:02 -0700524
Jim Stichnothff9c7062014-09-18 04:50:49 -0700525 // Branch optimization. This needs to be done just before code
526 // emission. In particular, no transformations that insert or
527 // reorder CfgNodes should be done after branch optimization. We go
528 // ahead and do it before nop insertion to reduce the amount of work
529 // needed for searching for opportunities.
530 Func->doBranchOpt();
531 Func->dump("After branch optimization");
532
Matt Walac3302742014-08-15 16:21:56 -0700533 // Nop insertion
Jan Voung1f47ad02015-03-20 15:01:26 -0700534 if (Ctx->getFlags().shouldDoNopInsertion()) {
Matt Walac3302742014-08-15 16:21:56 -0700535 Func->doNopInsertion();
536 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700537}
538
539void TargetX8632::translateOm1() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700540 TimerMarker T(TimerStack::TT_Om1, Func);
Jim Stichnothbb8b6242014-11-04 09:10:01 -0800541
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700542 Func->placePhiLoads();
543 if (Func->hasError())
544 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700545 Func->placePhiStores();
546 if (Func->hasError())
547 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700548 Func->deletePhis();
549 if (Func->hasError())
550 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700551 Func->dump("After Phi lowering");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700552
Matt Wala45a06232014-07-09 16:33:22 -0700553 Func->doArgLowering();
Matt Wala45a06232014-07-09 16:33:22 -0700554
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700555 Func->genCode();
556 if (Func->hasError())
557 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700558 Func->dump("After initial x8632 codegen");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700559
Jim Stichnoth70d0a052014-11-14 15:53:46 -0800560 regAlloc(RAK_InfOnly);
561 if (Func->hasError())
562 return;
563 Func->dump("After regalloc of infinite-weight variables");
564
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700565 Func->genFrame();
566 if (Func->hasError())
567 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700568 Func->dump("After stack frame mapping");
Matt Walac3302742014-08-15 16:21:56 -0700569
570 // Nop insertion
Jan Voung1f47ad02015-03-20 15:01:26 -0700571 if (Ctx->getFlags().shouldDoNopInsertion()) {
Matt Walac3302742014-08-15 16:21:56 -0700572 Func->doNopInsertion();
573 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700574}
575
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -0700576namespace {
577
578// Converts a ConstantInteger32 operand into its constant value, or
579// MemoryOrderInvalid if the operand is not a ConstantInteger32.
580uint64_t getConstantMemoryOrder(Operand *Opnd) {
581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
582 return Integer->getValue();
583 return Intrinsics::MemoryOrderInvalid;
584}
585
586// Determines whether the dest of a Load instruction can be folded
587// into one of the src operands of a 2-operand instruction. This is
588// true as long as the load dest matches exactly one of the binary
589// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
590// the answer is true.
591bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
592 Operand *&Src0, Operand *&Src1) {
593 if (Src0 == LoadDest && Src1 != LoadDest) {
594 Src0 = LoadSrc;
595 return true;
596 }
597 if (Src0 != LoadDest && Src1 == LoadDest) {
598 Src1 = LoadSrc;
599 return true;
600 }
601 return false;
602}
603
604} // end of anonymous namespace
605
606void TargetX8632::doLoadOpt() {
607 for (CfgNode *Node : Func->getNodes()) {
608 Context.init(Node);
609 while (!Context.atEnd()) {
610 Variable *LoadDest = nullptr;
611 Operand *LoadSrc = nullptr;
612 Inst *CurInst = Context.getCur();
613 Inst *Next = Context.getNextInst();
614 // Determine whether the current instruction is a Load
615 // instruction or equivalent.
616 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
617 // An InstLoad always qualifies.
618 LoadDest = Load->getDest();
619 const bool DoLegalize = false;
620 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
621 LoadDest->getType(), DoLegalize);
622 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
623 // An AtomicLoad intrinsic qualifies as long as it has a valid
624 // memory ordering, and can be implemented in a single
625 // instruction (i.e., not i64).
626 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
627 if (ID == Intrinsics::AtomicLoad &&
628 Intrin->getDest()->getType() != IceType_i64 &&
629 Intrinsics::isMemoryOrderValid(
630 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
631 LoadDest = Intrin->getDest();
632 const bool DoLegalize = false;
633 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
634 DoLegalize);
635 }
636 }
637 // A Load instruction can be folded into the following
638 // instruction only if the following instruction ends the Load's
639 // Dest variable's live range.
640 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
641 assert(LoadSrc);
642 Inst *NewInst = nullptr;
643 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
644 Operand *Src0 = Arith->getSrc(0);
645 Operand *Src1 = Arith->getSrc(1);
646 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
647 NewInst = InstArithmetic::create(Func, Arith->getOp(),
648 Arith->getDest(), Src0, Src1);
649 }
650 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
651 Operand *Src0 = Icmp->getSrc(0);
652 Operand *Src1 = Icmp->getSrc(1);
653 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
654 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
655 Icmp->getDest(), Src0, Src1);
656 }
657 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
658 Operand *Src0 = Fcmp->getSrc(0);
659 Operand *Src1 = Fcmp->getSrc(1);
660 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
661 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
662 Fcmp->getDest(), Src0, Src1);
663 }
664 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
665 Operand *Src0 = Select->getTrueOperand();
666 Operand *Src1 = Select->getFalseOperand();
667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
668 NewInst = InstSelect::create(Func, Select->getDest(),
669 Select->getCondition(), Src0, Src1);
670 }
671 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
672 // The load dest can always be folded into a Cast
673 // instruction.
674 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
675 if (Src0 == LoadDest) {
676 NewInst = InstCast::create(Func, Cast->getCastKind(),
677 Cast->getDest(), LoadSrc);
678 }
679 }
680 if (NewInst) {
681 CurInst->setDeleted();
682 Next->setDeleted();
683 Context.insert(NewInst);
684 // Update NewInst->LiveRangesEnded so that target lowering
685 // may benefit. Also update NewInst->HasSideEffects.
686 NewInst->spliceLivenessInfo(Next, CurInst);
687 }
688 }
689 Context.advanceCur();
690 Context.advanceNext();
691 }
692 }
693 Func->dump("After load optimization");
694}
695
Jim Stichnothff9c7062014-09-18 04:50:49 -0700696bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
697 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
698 return Br->optimizeBranch(NextNode);
699 }
700 return false;
701}
702
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700703IceString TargetX8632::RegNames[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700704#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700705 frameptr, isI8, isInt, isFP) \
706 name,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800707 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700708#undef X
709};
710
Jim Stichnoth98712a32014-10-24 10:59:02 -0700711Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
712 if (Ty == IceType_void)
713 Ty = IceType_i32;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700714 if (PhysicalRegisters[Ty].empty())
715 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
716 assert(RegNum < PhysicalRegisters[Ty].size());
717 Variable *Reg = PhysicalRegisters[Ty][RegNum];
Jim Stichnothae953202014-12-20 06:17:49 -0800718 if (Reg == nullptr) {
Jim Stichnoth98712a32014-10-24 10:59:02 -0700719 Reg = Func->makeVariable(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700720 Reg->setRegNum(RegNum);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700721 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700722 // Specially mark esp as an "argument" so that it is considered
723 // live upon function entry.
Jim Stichnoth47752552014-10-13 17:15:08 -0700724 if (RegNum == RegX8632::Reg_esp) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700725 Func->addImplicitArg(Reg);
Jim Stichnoth47752552014-10-13 17:15:08 -0700726 Reg->setIgnoreLiveness();
727 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700728 }
729 return Reg;
730}
731
732IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
Jan Voungbd385e42014-09-18 18:18:10 -0700733 assert(RegNum < RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700734 static IceString RegNames8[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700735#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700736 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700737 name8,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800738 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700739#undef X
740 };
741 static IceString RegNames16[] = {
Jan Voungbd385e42014-09-18 18:18:10 -0700742#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700743 frameptr, isI8, isInt, isFP) \
Jim Stichnoth4376d292014-05-23 13:39:02 -0700744 name16,
Jim Stichnothdd842db2015-01-27 12:53:53 -0800745 REGX8632_TABLE
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700746#undef X
747 };
748 switch (Ty) {
749 case IceType_i1:
750 case IceType_i8:
751 return RegNames8[RegNum];
752 case IceType_i16:
753 return RegNames16[RegNum];
754 default:
755 return RegNames[RegNum];
756 }
757}
758
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700759void TargetX8632::emitVariable(const Variable *Var) const {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700760 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700761 if (Var->hasReg()) {
Jim Stichnothbca2f652014-11-01 10:13:54 -0700762 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700763 return;
764 }
Jim Stichnothdd165072014-11-02 09:41:45 -0800765 if (Var->getWeight().isInf())
766 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jim Stichnothb0e142b2014-06-12 15:28:56 -0700767 int32_t Offset = Var->getStackOffset();
768 if (!hasFramePointer())
769 Offset += getStackAdjustment();
Jim Stichnothbca2f652014-11-01 10:13:54 -0700770 if (Offset)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700771 Str << Offset;
Jan Voungb2d50842015-05-12 09:53:50 -0700772 const Type FrameSPTy = IceType_i32;
773 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700774}
775
Jan Voung90ccc3f2015-04-30 14:15:10 -0700776X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
Jim Stichnothdd165072014-11-02 09:41:45 -0800777 if (Var->hasReg())
778 llvm_unreachable("Stack Variable has a register assigned");
779 if (Var->getWeight().isInf())
780 llvm_unreachable("Infinite-weight Variable has no register assigned");
Jan Voung8acded02014-09-22 18:02:25 -0700781 int32_t Offset = Var->getStackOffset();
782 if (!hasFramePointer())
783 Offset += getStackAdjustment();
Jan Voung90ccc3f2015-04-30 14:15:10 -0700784 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
Jan Voung8acded02014-09-22 18:02:25 -0700785}
786
Matt Wala45a06232014-07-09 16:33:22 -0700787void TargetX8632::lowerArguments() {
788 VarList &Args = Func->getArgs();
789 // The first four arguments of vector type, regardless of their
790 // position relative to the other arguments in the argument list, are
791 // passed in registers xmm0 - xmm3.
792 unsigned NumXmmArgs = 0;
793
794 Context.init(Func->getEntryNode());
795 Context.setInsertPoint(Context.getCur());
796
797 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
798 ++I) {
799 Variable *Arg = Args[I];
800 Type Ty = Arg->getType();
801 if (!isVectorType(Ty))
802 continue;
803 // Replace Arg in the argument list with the home register. Then
804 // generate an instruction in the prolog to copy the home register
805 // to the assigned location of Arg.
Jan Voungbd385e42014-09-18 18:18:10 -0700806 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
Matt Wala45a06232014-07-09 16:33:22 -0700807 ++NumXmmArgs;
Jim Stichnoth9a04c072014-12-11 15:51:42 -0800808 Variable *RegisterArg = Func->makeVariable(Ty);
809 if (ALLOW_DUMP)
810 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
Matt Wala45a06232014-07-09 16:33:22 -0700811 RegisterArg->setRegNum(RegNum);
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700812 RegisterArg->setIsArg();
813 Arg->setIsArg(false);
Matt Wala45a06232014-07-09 16:33:22 -0700814
815 Args[I] = RegisterArg;
816 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
817 }
818}
819
820// Helper function for addProlog().
821//
822// This assumes Arg is an argument passed on the stack. This sets the
823// frame offset for Arg and updates InArgsSizeBytes according to Arg's
824// width. For an I64 arg that has been split into Lo and Hi components,
825// it calls itself recursively on the components, taking care to handle
826// Lo first because of the little-endian architecture. Lastly, this
827// function generates an instruction to copy Arg into its assigned
828// register if applicable.
829void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
830 size_t BasicFrameOffset,
831 size_t &InArgsSizeBytes) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700832 Variable *Lo = Arg->getLo();
833 Variable *Hi = Arg->getHi();
834 Type Ty = Arg->getType();
835 if (Lo && Hi && Ty == IceType_i64) {
836 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
837 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
Matt Wala45a06232014-07-09 16:33:22 -0700838 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
839 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700840 return;
841 }
Matt Wala105b7042014-08-11 19:56:19 -0700842 if (isVectorType(Ty)) {
843 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
844 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700845 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
Matt Wala45a06232014-07-09 16:33:22 -0700846 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700847 if (Arg->hasReg()) {
848 assert(Ty != IceType_i64);
849 OperandX8632Mem *Mem = OperandX8632Mem::create(
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800850 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
Matt Wala45a06232014-07-09 16:33:22 -0700851 if (isVectorType(Arg->getType())) {
852 _movp(Arg, Mem);
853 } else {
854 _mov(Arg, Mem);
855 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700856 // This argument-copying instruction uses an explicit
857 // OperandX8632Mem operand instead of a Variable, so its
858 // fill-from-stack operation has to be tracked separately for
859 // statistics.
860 Ctx->statsUpdateFills();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700861 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700862}
863
Jan Voung3bd9f1a2014-06-18 10:50:57 -0700864Type TargetX8632::stackSlotType() { return IceType_i32; }
865
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700866void TargetX8632::addProlog(CfgNode *Node) {
Matt Walad4799f42014-08-14 14:24:12 -0700867 // Stack frame layout:
868 //
869 // +------------------------+
870 // | 1. return address |
871 // +------------------------+
872 // | 2. preserved registers |
873 // +------------------------+
874 // | 3. padding |
875 // +------------------------+
876 // | 4. global spill area |
877 // +------------------------+
878 // | 5. padding |
879 // +------------------------+
880 // | 6. local spill area |
881 // +------------------------+
882 // | 7. padding |
883 // +------------------------+
884 // | 8. allocas |
885 // +------------------------+
886 //
887 // The following variables record the size in bytes of the given areas:
888 // * X86_RET_IP_SIZE_BYTES: area 1
889 // * PreservedRegsSizeBytes: area 2
890 // * SpillAreaPaddingBytes: area 3
891 // * GlobalsSize: area 4
892 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
893 // * LocalsSpillAreaSize: area 6
894 // * SpillAreaSizeBytes: areas 3 - 7
895
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700896 // Determine stack frame offsets for each Variable without a
897 // register assignment. This can be done as one variable per stack
898 // slot. Or, do coalescing by running the register allocator again
899 // with an infinite set of registers (as a side effect, this gives
900 // variables a second chance at physical register assignment).
901 //
902 // A middle ground approach is to leverage sparsity and allocate one
903 // block of space on the frame for globals (variables with
904 // multi-block lifetime), and one block to share for locals
905 // (single-block lifetime).
906
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700907 Context.init(Node);
908 Context.setInsertPoint(Context.getCur());
909
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700910 llvm::SmallBitVector CalleeSaves =
911 getRegisterSet(RegSet_CalleeSave, RegSet_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700912 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700913 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
914 size_t GlobalsSize = 0;
915 // If there is a separate locals area, this represents that area.
916 // Otherwise it counts any variable not counted by GlobalsSize.
917 SpillAreaSizeBytes = 0;
Matt Walad4799f42014-08-14 14:24:12 -0700918 // If there is a separate locals area, this specifies the alignment
919 // for it.
920 uint32_t LocalsSlotsAlignmentBytes = 0;
921 // The entire spill locations area gets aligned to largest natural
922 // alignment of the variables that have a spill slot.
923 uint32_t SpillAreaAlignmentBytes = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700924 // A spill slot linked to a variable with a stack slot should reuse
925 // that stack slot.
926 std::function<bool(Variable *)> TargetVarHook =
927 [&VariablesLinkedToSpillSlots](Variable *Var) {
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -0700928 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
929 assert(Var->getWeight().isZero());
930 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
931 VariablesLinkedToSpillSlots.push_back(Var);
932 return true;
933 }
934 }
935 return false;
936 };
Matt Walad4799f42014-08-14 14:24:12 -0700937
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700938 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
939 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
940 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
941 &LocalsSlotsAlignmentBytes, TargetVarHook);
Matt Walad4799f42014-08-14 14:24:12 -0700942 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
Matt Walad4799f42014-08-14 14:24:12 -0700943 SpillAreaSizeBytes += GlobalsSize;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700944
945 // Add push instructions for preserved registers.
Jim Stichnoth18735602014-09-16 19:59:35 -0700946 uint32_t NumCallee = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700947 size_t PreservedRegsSizeBytes = 0;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700948 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
949 if (CalleeSaves[i] && RegsUsed[i]) {
Jim Stichnoth18735602014-09-16 19:59:35 -0700950 ++NumCallee;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700951 PreservedRegsSizeBytes += 4;
Jan Voung0b9eee52014-10-07 11:20:10 -0700952 _push(getPhysicalRegister(i));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700953 }
954 }
Jim Stichnoth18735602014-09-16 19:59:35 -0700955 Ctx->statsUpdateRegistersSaved(NumCallee);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700956
957 // Generate "push ebp; mov ebp, esp"
958 if (IsEbpBasedFrame) {
959 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
960 .count() == 0);
961 PreservedRegsSizeBytes += 4;
Jan Voungbd385e42014-09-18 18:18:10 -0700962 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
963 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung0b9eee52014-10-07 11:20:10 -0700964 _push(ebp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700965 _mov(ebp, esp);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -0700966 // Keep ebp live for late-stage liveness analysis
967 // (e.g. asm-verbose mode).
968 Context.insert(InstFakeUse::create(Func, ebp));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700969 }
970
Matt Walad4799f42014-08-14 14:24:12 -0700971 // Align the variables area. SpillAreaPaddingBytes is the size of
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700972 // the region after the preserved registers and before the spill areas.
973 // LocalsSlotsPaddingBytes is the amount of padding between the globals
974 // and locals area if they are separate.
975 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
976 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
Matt Walad4799f42014-08-14 14:24:12 -0700977 uint32_t SpillAreaPaddingBytes = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700978 uint32_t LocalsSlotsPaddingBytes = 0;
979 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
980 SpillAreaAlignmentBytes, GlobalsSize,
981 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
982 &LocalsSlotsPaddingBytes);
983 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
984 uint32_t GlobalsAndSubsequentPaddingSize =
985 GlobalsSize + LocalsSlotsPaddingBytes;
Matt Walad4799f42014-08-14 14:24:12 -0700986
987 // Align esp if necessary.
988 if (NeedsStackAlignment) {
989 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
990 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
991 SpillAreaSizeBytes = StackSize - StackOffset;
992 }
993
994 // Generate "sub esp, SpillAreaSizeBytes"
995 if (SpillAreaSizeBytes)
Jan Voungbd385e42014-09-18 18:18:10 -0700996 _sub(getPhysicalRegister(RegX8632::Reg_esp),
Jim Stichnothd2cb4362014-11-20 11:24:42 -0800997 Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth18735602014-09-16 19:59:35 -0700998 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700999
1000 resetStackAdjustment();
1001
Matt Wala45a06232014-07-09 16:33:22 -07001002 // Fill in stack offsets for stack args, and copy args into registers
1003 // for those that were register-allocated. Args are pushed right to
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001004 // left, so Arg[0] is closest to the stack/frame pointer.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001005 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
Matt Wala105b7042014-08-11 19:56:19 -07001006 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001007 if (!IsEbpBasedFrame)
Matt Walad4799f42014-08-14 14:24:12 -07001008 BasicFrameOffset += SpillAreaSizeBytes;
Matt Wala45a06232014-07-09 16:33:22 -07001009
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001010 const VarList &Args = Func->getArgs();
1011 size_t InArgsSizeBytes = 0;
Matt Wala45a06232014-07-09 16:33:22 -07001012 unsigned NumXmmArgs = 0;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001013 for (Variable *Arg : Args) {
Matt Wala45a06232014-07-09 16:33:22 -07001014 // Skip arguments passed in registers.
1015 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
1016 ++NumXmmArgs;
1017 continue;
1018 }
1019 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001020 }
1021
1022 // Fill in stack offsets for locals.
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001023 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1024 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1025 IsEbpBasedFrame);
Matt Walad4799f42014-08-14 14:24:12 -07001026 // Assign stack offsets to variables that have been linked to spilled
1027 // variables.
Jim Stichnothf44f3712014-10-01 14:05:51 -07001028 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07001029 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
Matt Walad4799f42014-08-14 14:24:12 -07001030 Var->setStackOffset(Linked->getStackOffset());
1031 }
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001032 this->HasComputedFrame = true;
Matt Walad4799f42014-08-14 14:24:12 -07001033
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001034 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08001035 OstreamLocker L(Func->getContext());
Matt Walad4799f42014-08-14 14:24:12 -07001036 Ostream &Str = Func->getContext()->getStrDump();
1037
1038 Str << "Stack layout:\n";
1039 uint32_t EspAdjustmentPaddingSize =
1040 SpillAreaSizeBytes - LocalsSpillAreaSize -
1041 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1042 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1043 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
1044 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1045 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1046 << " globals spill area = " << GlobalsSize << " bytes\n"
1047 << " globals-locals spill areas intermediate padding = "
1048 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1049 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1050 << " esp alignment padding = " << EspAdjustmentPaddingSize
1051 << " bytes\n";
1052
1053 Str << "Stack details:\n"
1054 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1055 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1056 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1057 << " bytes\n"
1058 << " is ebp based = " << IsEbpBasedFrame << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001059 }
1060}
1061
1062void TargetX8632::addEpilog(CfgNode *Node) {
1063 InstList &Insts = Node->getInsts();
1064 InstList::reverse_iterator RI, E;
1065 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1066 if (llvm::isa<InstX8632Ret>(*RI))
1067 break;
1068 }
1069 if (RI == E)
1070 return;
1071
1072 // Convert the reverse_iterator position into its corresponding
1073 // (forward) iterator position.
1074 InstList::iterator InsertPoint = RI.base();
1075 --InsertPoint;
1076 Context.init(Node);
1077 Context.setInsertPoint(InsertPoint);
1078
Jan Voungbd385e42014-09-18 18:18:10 -07001079 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001080 if (IsEbpBasedFrame) {
Jan Voungbd385e42014-09-18 18:18:10 -07001081 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07001082 // For late-stage liveness analysis (e.g. asm-verbose mode),
1083 // adding a fake use of esp before the assignment of esp=ebp keeps
1084 // previous esp adjustments from being dead-code eliminated.
1085 Context.insert(InstFakeUse::create(Func, esp));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001086 _mov(esp, ebp);
1087 _pop(ebp);
1088 } else {
Matt Walad4799f42014-08-14 14:24:12 -07001089 // add esp, SpillAreaSizeBytes
1090 if (SpillAreaSizeBytes)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001091 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001092 }
1093
1094 // Add pop instructions for preserved registers.
1095 llvm::SmallBitVector CalleeSaves =
1096 getRegisterSet(RegSet_CalleeSave, RegSet_None);
1097 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1098 SizeT j = CalleeSaves.size() - i - 1;
Jan Voungbd385e42014-09-18 18:18:10 -07001099 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001100 continue;
1101 if (CalleeSaves[j] && RegsUsed[j]) {
1102 _pop(getPhysicalRegister(j));
1103 }
1104 }
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08001105
1106 if (!Ctx->getFlags().getUseSandboxing())
1107 return;
1108 // Change the original ret instruction into a sandboxed return sequence.
1109 // t:ecx = pop
1110 // bundle_lock
1111 // and t, ~31
1112 // jmp *t
1113 // bundle_unlock
1114 // FakeUse <original_ret_operand>
1115 const SizeT BundleSize = 1
1116 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
1117 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
1118 _pop(T_ecx);
1119 _bundle_lock();
1120 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1121 _jmp(T_ecx);
1122 _bundle_unlock();
1123 if (RI->getSrcSize()) {
1124 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1125 Context.insert(InstFakeUse::create(Func, RetValue));
1126 }
1127 RI->setDeleted();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001128}
1129
1130void TargetX8632::split64(Variable *Var) {
1131 switch (Var->getType()) {
1132 default:
1133 return;
1134 case IceType_i64:
1135 // TODO: Only consider F64 if we need to push each half when
1136 // passing as an argument to a function call. Note that each half
1137 // is still typed as I32.
1138 case IceType_f64:
1139 break;
1140 }
1141 Variable *Lo = Var->getLo();
1142 Variable *Hi = Var->getHi();
1143 if (Lo) {
1144 assert(Hi);
1145 return;
1146 }
Jim Stichnothae953202014-12-20 06:17:49 -08001147 assert(Hi == nullptr);
Jim Stichnoth9a04c072014-12-11 15:51:42 -08001148 Lo = Func->makeVariable(IceType_i32);
1149 Hi = Func->makeVariable(IceType_i32);
1150 if (ALLOW_DUMP) {
1151 Lo->setName(Func, Var->getName(Func) + "__lo");
1152 Hi->setName(Func, Var->getName(Func) + "__hi");
1153 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001154 Var->setLoHi(Lo, Hi);
1155 if (Var->getIsArg()) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001156 Lo->setIsArg();
1157 Hi->setIsArg();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001158 }
1159}
1160
1161Operand *TargetX8632::loOperand(Operand *Operand) {
Jim Stichnothc77f8172015-05-31 23:34:44 -07001162 assert(Operand->getType() == IceType_i64 ||
1163 Operand->getType() == IceType_f64);
1164 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001165 return Operand;
1166 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1167 split64(Var);
1168 return Var->getLo();
1169 }
Jan Voungbc004632014-09-16 15:09:10 -07001170 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001171 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001172 }
1173 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1174 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1175 Mem->getOffset(), Mem->getIndex(),
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001176 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001177 }
1178 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001179 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001180}
1181
1182Operand *TargetX8632::hiOperand(Operand *Operand) {
Jim Stichnothc77f8172015-05-31 23:34:44 -07001183 assert(Operand->getType() == IceType_i64 ||
1184 Operand->getType() == IceType_f64);
1185 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001186 return Operand;
1187 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1188 split64(Var);
1189 return Var->getHi();
1190 }
Jan Voungbc004632014-09-16 15:09:10 -07001191 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1192 return Ctx->getConstantInt32(
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001193 static_cast<uint32_t>(Const->getValue() >> 32));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001194 }
1195 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1196 Constant *Offset = Mem->getOffset();
Jim Stichnothae953202014-12-20 06:17:49 -08001197 if (Offset == nullptr) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001198 Offset = Ctx->getConstantInt32(4);
1199 } else if (ConstantInteger32 *IntOffset =
1200 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1201 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001202 } else if (ConstantRelocatable *SymOffset =
1203 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
Jan Voungfe14fb82014-10-13 15:56:32 -07001204 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001205 Offset =
1206 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1207 SymOffset->getSuppressMangling());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001208 }
1209 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07001210 Mem->getIndex(), Mem->getShift(),
1211 Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001212 }
1213 llvm_unreachable("Unsupported operand type");
Jim Stichnothae953202014-12-20 06:17:49 -08001214 return nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001215}
1216
1217llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1218 RegSetMask Exclude) const {
Jan Voungbd385e42014-09-18 18:18:10 -07001219 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001220
Jan Voungbd385e42014-09-18 18:18:10 -07001221#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001222 frameptr, isI8, isInt, isFP) \
1223 if (scratch && (Include & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001224 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001225 if (preserved && (Include & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001226 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001227 if (stackptr && (Include & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001228 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001229 if (frameptr && (Include & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001230 Registers[RegX8632::val] = true; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001231 if (scratch && (Exclude & RegSet_CallerSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001232 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001233 if (preserved && (Exclude & RegSet_CalleeSave)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001234 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001235 if (stackptr && (Exclude & RegSet_StackPointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001236 Registers[RegX8632::val] = false; \
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001237 if (frameptr && (Exclude & RegSet_FramePointer)) \
Jan Voungbd385e42014-09-18 18:18:10 -07001238 Registers[RegX8632::val] = false;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001239
1240 REGX8632_TABLE
1241
1242#undef X
1243
1244 return Registers;
1245}
1246
1247void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1248 IsEbpBasedFrame = true;
Matt Wala105b7042014-08-11 19:56:19 -07001249 // Conservatively require the stack to be aligned. Some stack
1250 // adjustment operations implemented below assume that the stack is
1251 // aligned before the alloca. All the alloca code ensures that the
1252 // stack alignment is preserved after the alloca. The stack alignment
1253 // restriction can be relaxed in some cases.
1254 NeedsStackAlignment = true;
1255
Jan Voung55500db2015-05-26 14:25:40 -07001256 // TODO(stichnot): minimize the number of adjustments of esp, etc.
Jan Voungbd385e42014-09-18 18:18:10 -07001257 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001258 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1259 Variable *Dest = Inst->getDest();
Matt Wala105b7042014-08-11 19:56:19 -07001260 uint32_t AlignmentParam = Inst->getAlignInBytes();
Jim Stichnoth72a8f8d2014-09-08 17:56:50 -07001261 // For default align=0, set it to the real value 1, to avoid any
1262 // bit-manipulation problems below.
1263 AlignmentParam = std::max(AlignmentParam, 1u);
Matt Wala105b7042014-08-11 19:56:19 -07001264
1265 // LLVM enforces power of 2 alignment.
Jan Voung55500db2015-05-26 14:25:40 -07001266 assert(llvm::isPowerOf2_32(AlignmentParam));
1267 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));
Matt Wala105b7042014-08-11 19:56:19 -07001268
1269 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1270 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001271 _and(esp, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001272 }
Jan Voung55500db2015-05-26 14:25:40 -07001273 if (const auto *ConstantTotalSize =
Jan Voungbc004632014-09-16 15:09:10 -07001274 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
Matt Wala105b7042014-08-11 19:56:19 -07001275 uint32_t Value = ConstantTotalSize->getValue();
Jan Voung55500db2015-05-26 14:25:40 -07001276 Value = Utils::applyAlignment(Value, Alignment);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001277 _sub(esp, Ctx->getConstantInt32(Value));
Matt Wala105b7042014-08-11 19:56:19 -07001278 } else {
1279 // Non-constant sizes need to be adjusted to the next highest
1280 // multiple of the required alignment at runtime.
1281 Variable *T = makeReg(IceType_i32);
1282 _mov(T, TotalSize);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001283 _add(T, Ctx->getConstantInt32(Alignment - 1));
1284 _and(T, Ctx->getConstantInt32(-Alignment));
Matt Wala105b7042014-08-11 19:56:19 -07001285 _sub(esp, T);
1286 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001287 _mov(Dest, esp);
1288}
1289
1290void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1291 Variable *Dest = Inst->getDest();
1292 Operand *Src0 = legalize(Inst->getSrc(0));
1293 Operand *Src1 = legalize(Inst->getSrc(1));
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07001294 if (Inst->isCommutative()) {
1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1296 std::swap(Src0, Src1);
1297 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001298 if (Dest->getType() == IceType_i64) {
1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1301 Operand *Src0Lo = loOperand(Src0);
1302 Operand *Src0Hi = hiOperand(Src0);
1303 Operand *Src1Lo = loOperand(Src1);
1304 Operand *Src1Hi = hiOperand(Src1);
Jim Stichnothae953202014-12-20 06:17:49 -08001305 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001306 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001307 case InstArithmetic::_num:
1308 llvm_unreachable("Unknown arithmetic operator");
1309 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001310 case InstArithmetic::Add:
1311 _mov(T_Lo, Src0Lo);
1312 _add(T_Lo, Src1Lo);
1313 _mov(DestLo, T_Lo);
1314 _mov(T_Hi, Src0Hi);
1315 _adc(T_Hi, Src1Hi);
1316 _mov(DestHi, T_Hi);
1317 break;
1318 case InstArithmetic::And:
1319 _mov(T_Lo, Src0Lo);
1320 _and(T_Lo, Src1Lo);
1321 _mov(DestLo, T_Lo);
1322 _mov(T_Hi, Src0Hi);
1323 _and(T_Hi, Src1Hi);
1324 _mov(DestHi, T_Hi);
1325 break;
1326 case InstArithmetic::Or:
1327 _mov(T_Lo, Src0Lo);
1328 _or(T_Lo, Src1Lo);
1329 _mov(DestLo, T_Lo);
1330 _mov(T_Hi, Src0Hi);
1331 _or(T_Hi, Src1Hi);
1332 _mov(DestHi, T_Hi);
1333 break;
1334 case InstArithmetic::Xor:
1335 _mov(T_Lo, Src0Lo);
1336 _xor(T_Lo, Src1Lo);
1337 _mov(DestLo, T_Lo);
1338 _mov(T_Hi, Src0Hi);
1339 _xor(T_Hi, Src1Hi);
1340 _mov(DestHi, T_Hi);
1341 break;
1342 case InstArithmetic::Sub:
1343 _mov(T_Lo, Src0Lo);
1344 _sub(T_Lo, Src1Lo);
1345 _mov(DestLo, T_Lo);
1346 _mov(T_Hi, Src0Hi);
1347 _sbb(T_Hi, Src1Hi);
1348 _mov(DestHi, T_Hi);
1349 break;
1350 case InstArithmetic::Mul: {
Jim Stichnothae953202014-12-20 06:17:49 -08001351 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jan Voungbd385e42014-09-18 18:18:10 -07001352 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1353 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001354 // gcc does the following:
1355 // a=b*c ==>
1356 // t1 = b.hi; t1 *=(imul) c.lo
1357 // t2 = c.hi; t2 *=(imul) b.lo
1358 // t3:eax = b.lo
1359 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1360 // a.lo = t4.lo
1361 // t4.hi += t1
1362 // t4.hi += t2
1363 // a.hi = t4.hi
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07001364 // The mul instruction cannot take an immediate operand.
1365 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001366 _mov(T_1, Src0Hi);
1367 _imul(T_1, Src1Lo);
1368 _mov(T_2, Src1Hi);
1369 _imul(T_2, Src0Lo);
Jan Voungbd385e42014-09-18 18:18:10 -07001370 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001371 _mul(T_4Lo, T_3, Src1Lo);
1372 // The mul instruction produces two dest variables, edx:eax. We
1373 // create a fake definition of edx to account for this.
1374 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1375 _mov(DestLo, T_4Lo);
1376 _add(T_4Hi, T_1);
1377 _add(T_4Hi, T_2);
1378 _mov(DestHi, T_4Hi);
1379 } break;
1380 case InstArithmetic::Shl: {
1381 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1382 // gcc does the following:
1383 // a=b<<c ==>
1384 // t1:ecx = c.lo & 0xff
1385 // t2 = b.lo
1386 // t3 = b.hi
1387 // t3 = shld t3, t2, t1
1388 // t2 = shl t2, t1
1389 // test t1, 0x20
1390 // je L1
1391 // use(t3)
1392 // t3 = t2
1393 // t2 = 0
1394 // L1:
1395 // a.lo = t2
1396 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001397 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001398 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001399 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001400 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001401 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001402 _mov(T_2, Src0Lo);
1403 _mov(T_3, Src0Hi);
1404 _shld(T_3, T_2, T_1);
1405 _shl(T_2, T_1);
1406 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001407 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001408 // T_2 and T_3 are being assigned again because of the
1409 // intra-block control flow, so we need the _mov_nonkillable
1410 // variant to avoid liveness problems.
1411 _mov_nonkillable(T_3, T_2);
1412 _mov_nonkillable(T_2, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001413 Context.insert(Label);
1414 _mov(DestLo, T_2);
1415 _mov(DestHi, T_3);
1416 } break;
1417 case InstArithmetic::Lshr: {
1418 // a=b>>c (unsigned) ==>
1419 // t1:ecx = c.lo & 0xff
1420 // t2 = b.lo
1421 // t3 = b.hi
1422 // t2 = shrd t2, t3, t1
1423 // t3 = shr t3, t1
1424 // test t1, 0x20
1425 // je L1
1426 // use(t2)
1427 // t2 = t3
1428 // t3 = 0
1429 // L1:
1430 // a.lo = t2
1431 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001432 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001433 Constant *BitTest = Ctx->getConstantInt32(0x20);
Matt Wala43ff7eb2014-06-18 10:30:07 -07001434 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001435 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001436 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001437 _mov(T_2, Src0Lo);
1438 _mov(T_3, Src0Hi);
1439 _shrd(T_2, T_3, T_1);
1440 _shr(T_3, T_1);
1441 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001442 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001443 // T_2 and T_3 are being assigned again because of the
1444 // intra-block control flow, so we need the _mov_nonkillable
1445 // variant to avoid liveness problems.
1446 _mov_nonkillable(T_2, T_3);
1447 _mov_nonkillable(T_3, Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001448 Context.insert(Label);
1449 _mov(DestLo, T_2);
1450 _mov(DestHi, T_3);
1451 } break;
1452 case InstArithmetic::Ashr: {
1453 // a=b>>c (signed) ==>
1454 // t1:ecx = c.lo & 0xff
1455 // t2 = b.lo
1456 // t3 = b.hi
1457 // t2 = shrd t2, t3, t1
1458 // t3 = sar t3, t1
1459 // test t1, 0x20
1460 // je L1
1461 // use(t2)
1462 // t2 = t3
1463 // t3 = sar t3, 0x1f
1464 // L1:
1465 // a.lo = t2
1466 // a.hi = t3
Jim Stichnothae953202014-12-20 06:17:49 -08001467 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001468 Constant *BitTest = Ctx->getConstantInt32(0x20);
1469 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001470 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jan Voungbd385e42014-09-18 18:18:10 -07001471 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001472 _mov(T_2, Src0Lo);
1473 _mov(T_3, Src0Hi);
1474 _shrd(T_2, T_3, T_1);
1475 _sar(T_3, T_1);
1476 _test(T_1, BitTest);
Jan Voungbd385e42014-09-18 18:18:10 -07001477 _br(CondX86::Br_e, Label);
Jim Stichnoth47752552014-10-13 17:15:08 -07001478 // T_2 and T_3 are being assigned again because of the
1479 // intra-block control flow, so T_2 needs the _mov_nonkillable
1480 // variant to avoid liveness problems. T_3 doesn't need special
1481 // treatment because it is reassigned via _sar instead of _mov.
1482 _mov_nonkillable(T_2, T_3);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001483 _sar(T_3, SignExtend);
1484 Context.insert(Label);
1485 _mov(DestLo, T_2);
1486 _mov(DestHi, T_3);
1487 } break;
1488 case InstArithmetic::Udiv: {
1489 const SizeT MaxSrcs = 2;
Jim Stichnothc4508792015-03-01 23:12:55 -08001490 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001491 Call->addArg(Inst->getSrc(0));
1492 Call->addArg(Inst->getSrc(1));
1493 lowerCall(Call);
1494 } break;
1495 case InstArithmetic::Sdiv: {
1496 const SizeT MaxSrcs = 2;
Jim Stichnothc4508792015-03-01 23:12:55 -08001497 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001498 Call->addArg(Inst->getSrc(0));
1499 Call->addArg(Inst->getSrc(1));
1500 lowerCall(Call);
1501 } break;
1502 case InstArithmetic::Urem: {
1503 const SizeT MaxSrcs = 2;
Jim Stichnothc4508792015-03-01 23:12:55 -08001504 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001505 Call->addArg(Inst->getSrc(0));
1506 Call->addArg(Inst->getSrc(1));
1507 lowerCall(Call);
1508 } break;
1509 case InstArithmetic::Srem: {
1510 const SizeT MaxSrcs = 2;
Jim Stichnothc4508792015-03-01 23:12:55 -08001511 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001512 Call->addArg(Inst->getSrc(0));
1513 Call->addArg(Inst->getSrc(1));
1514 lowerCall(Call);
1515 } break;
1516 case InstArithmetic::Fadd:
1517 case InstArithmetic::Fsub:
1518 case InstArithmetic::Fmul:
1519 case InstArithmetic::Fdiv:
1520 case InstArithmetic::Frem:
1521 llvm_unreachable("FP instruction with i64 type");
1522 break;
1523 }
Matt Wala8d1072e2014-07-11 15:43:51 -07001524 } else if (isVectorType(Dest->getType())) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001525 // TODO: Trap on integer divide and integer modulo by zero.
1526 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07001527 if (llvm::isa<OperandX8632Mem>(Src1))
1528 Src1 = legalizeToVar(Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001529 switch (Inst->getOp()) {
1530 case InstArithmetic::_num:
1531 llvm_unreachable("Unknown arithmetic operator");
1532 break;
Matt Wala7fa22d82014-07-17 12:41:31 -07001533 case InstArithmetic::Add: {
1534 Variable *T = makeReg(Dest->getType());
1535 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001536 _padd(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001537 _movp(Dest, T);
1538 } break;
1539 case InstArithmetic::And: {
1540 Variable *T = makeReg(Dest->getType());
1541 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001542 _pand(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001543 _movp(Dest, T);
1544 } break;
1545 case InstArithmetic::Or: {
1546 Variable *T = makeReg(Dest->getType());
1547 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001548 _por(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001549 _movp(Dest, T);
1550 } break;
1551 case InstArithmetic::Xor: {
1552 Variable *T = makeReg(Dest->getType());
1553 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001554 _pxor(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001555 _movp(Dest, T);
1556 } break;
1557 case InstArithmetic::Sub: {
1558 Variable *T = makeReg(Dest->getType());
1559 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001560 _psub(T, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001561 _movp(Dest, T);
1562 } break;
1563 case InstArithmetic::Mul: {
Matt Wala0a450512014-07-30 12:44:39 -07001564 bool TypesAreValidForPmull =
1565 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1566 bool InstructionSetIsValidForPmull =
1567 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1568 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1569 Variable *T = makeReg(Dest->getType());
1570 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001571 _pmull(T, Src1);
Matt Wala0a450512014-07-30 12:44:39 -07001572 _movp(Dest, T);
1573 } else if (Dest->getType() == IceType_v4i32) {
Matt Wala7fa22d82014-07-17 12:41:31 -07001574 // Lowering sequence:
1575 // Note: The mask arguments have index 0 on the left.
1576 //
1577 // movups T1, Src0
1578 // pshufd T2, Src0, {1,0,3,0}
1579 // pshufd T3, Src1, {1,0,3,0}
1580 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1581 // pmuludq T1, Src1
1582 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1583 // pmuludq T2, T3
1584 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1585 // shufps T1, T2, {0,2,0,2}
1586 // pshufd T4, T1, {0,2,1,3}
1587 // movups Dest, T4
Matt Wala7fa22d82014-07-17 12:41:31 -07001588
1589 // Mask that directs pshufd to create a vector with entries
1590 // Src[1, 0, 3, 0]
1591 const unsigned Constant1030 = 0x31;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001592 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
Matt Wala7fa22d82014-07-17 12:41:31 -07001593 // Mask that directs shufps to create a vector with entries
1594 // Dest[0, 2], Src[0, 2]
1595 const unsigned Mask0202 = 0x88;
1596 // Mask that directs pshufd to create a vector with entries
1597 // Src[0, 2, 1, 3]
1598 const unsigned Mask0213 = 0xd8;
1599 Variable *T1 = makeReg(IceType_v4i32);
1600 Variable *T2 = makeReg(IceType_v4i32);
1601 Variable *T3 = makeReg(IceType_v4i32);
1602 Variable *T4 = makeReg(IceType_v4i32);
1603 _movp(T1, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001604 _pshufd(T2, Src0, Mask1030);
1605 _pshufd(T3, Src1, Mask1030);
1606 _pmuludq(T1, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001607 _pmuludq(T2, T3);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001608 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1609 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
Matt Wala7fa22d82014-07-17 12:41:31 -07001610 _movp(Dest, T4);
Matt Wala7fa22d82014-07-17 12:41:31 -07001611 } else {
1612 assert(Dest->getType() == IceType_v16i8);
Matt Walaafeaee42014-08-07 13:47:30 -07001613 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Matt Wala7fa22d82014-07-17 12:41:31 -07001614 }
1615 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001616 case InstArithmetic::Shl:
1617 case InstArithmetic::Lshr:
1618 case InstArithmetic::Ashr:
1619 case InstArithmetic::Udiv:
1620 case InstArithmetic::Urem:
1621 case InstArithmetic::Sdiv:
1622 case InstArithmetic::Srem:
1623 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1624 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001625 case InstArithmetic::Fadd: {
1626 Variable *T = makeReg(Dest->getType());
1627 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001628 _addps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001629 _movp(Dest, T);
1630 } break;
1631 case InstArithmetic::Fsub: {
1632 Variable *T = makeReg(Dest->getType());
1633 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001634 _subps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001635 _movp(Dest, T);
1636 } break;
1637 case InstArithmetic::Fmul: {
1638 Variable *T = makeReg(Dest->getType());
1639 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001640 _mulps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001641 _movp(Dest, T);
1642 } break;
1643 case InstArithmetic::Fdiv: {
1644 Variable *T = makeReg(Dest->getType());
1645 _movp(T, Src0);
Matt Walad4799f42014-08-14 14:24:12 -07001646 _divps(T, Src1);
Matt Wala8d1072e2014-07-11 15:43:51 -07001647 _movp(Dest, T);
1648 } break;
Matt Walaafeaee42014-08-07 13:47:30 -07001649 case InstArithmetic::Frem:
1650 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1651 break;
Matt Wala8d1072e2014-07-11 15:43:51 -07001652 }
1653 } else { // Dest->getType() is non-i64 scalar
Jim Stichnothae953202014-12-20 06:17:49 -08001654 Variable *T_edx = nullptr;
1655 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001656 switch (Inst->getOp()) {
Jim Stichnoth4376d292014-05-23 13:39:02 -07001657 case InstArithmetic::_num:
1658 llvm_unreachable("Unknown arithmetic operator");
1659 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001660 case InstArithmetic::Add:
1661 _mov(T, Src0);
1662 _add(T, Src1);
1663 _mov(Dest, T);
1664 break;
1665 case InstArithmetic::And:
1666 _mov(T, Src0);
1667 _and(T, Src1);
1668 _mov(Dest, T);
1669 break;
1670 case InstArithmetic::Or:
1671 _mov(T, Src0);
1672 _or(T, Src1);
1673 _mov(Dest, T);
1674 break;
1675 case InstArithmetic::Xor:
1676 _mov(T, Src0);
1677 _xor(T, Src1);
1678 _mov(Dest, T);
1679 break;
1680 case InstArithmetic::Sub:
1681 _mov(T, Src0);
1682 _sub(T, Src1);
1683 _mov(Dest, T);
1684 break;
1685 case InstArithmetic::Mul:
1686 // TODO: Optimize for llvm::isa<Constant>(Src1)
1687 // TODO: Strength-reduce multiplications by a constant,
1688 // particularly -1 and powers of 2. Advanced: use lea to
1689 // multiply by 3, 5, 9.
1690 //
1691 // The 8-bit version of imul only allows the form "imul r/m8"
1692 // where T must be in eax.
Jan Voung0ac50dc2014-09-30 08:36:06 -07001693 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001694 _mov(T, Src0, RegX8632::Reg_eax);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001695 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1696 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001697 _mov(T, Src0);
Jan Voung0ac50dc2014-09-30 08:36:06 -07001698 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001699 _imul(T, Src1);
1700 _mov(Dest, T);
1701 break;
1702 case InstArithmetic::Shl:
1703 _mov(T, Src0);
1704 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001705 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001706 _shl(T, Src1);
1707 _mov(Dest, T);
1708 break;
1709 case InstArithmetic::Lshr:
1710 _mov(T, Src0);
1711 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001712 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001713 _shr(T, Src1);
1714 _mov(Dest, T);
1715 break;
1716 case InstArithmetic::Ashr:
1717 _mov(T, Src0);
1718 if (!llvm::isa<Constant>(Src1))
Jim Stichnothad403532014-09-25 12:44:17 -07001719 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001720 _sar(T, Src1);
1721 _mov(Dest, T);
1722 break;
1723 case InstArithmetic::Udiv:
Jan Voung70d68832014-06-17 10:02:37 -07001724 // div and idiv are the few arithmetic operators that do not allow
1725 // immediates as the operand.
1726 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001727 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001728 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001729 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001730 _mov(T, Src0, RegX8632::Reg_eax);
1731 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001732 _div(T, Src1, T_ah);
1733 _mov(Dest, T);
1734 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001735 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001736 _mov(T, Src0, RegX8632::Reg_eax);
1737 _mov(T_edx, Zero, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001738 _div(T, Src1, T_edx);
1739 _mov(Dest, T);
1740 }
1741 break;
1742 case InstArithmetic::Sdiv:
Jan Voung70d68832014-06-17 10:02:37 -07001743 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001744 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001745 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001746 _cbwdq(T, T);
1747 _idiv(T, Src1, T);
1748 _mov(Dest, T);
1749 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001750 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1751 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001752 _cbwdq(T_edx, T);
1753 _idiv(T, Src1, T_edx);
1754 _mov(Dest, T);
1755 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001756 break;
1757 case InstArithmetic::Urem:
Jan Voung70d68832014-06-17 10:02:37 -07001758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001759 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnothae953202014-12-20 06:17:49 -08001760 Variable *T_ah = nullptr;
Matt Wala43ff7eb2014-06-18 10:30:07 -07001761 Constant *Zero = Ctx->getConstantZero(IceType_i8);
Jan Voungbd385e42014-09-18 18:18:10 -07001762 _mov(T, Src0, RegX8632::Reg_eax);
1763 _mov(T_ah, Zero, RegX8632::Reg_ah);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001764 _div(T_ah, Src1, T);
1765 _mov(Dest, T_ah);
1766 } else {
Matt Wala43ff7eb2014-06-18 10:30:07 -07001767 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voungbd385e42014-09-18 18:18:10 -07001768 _mov(T_edx, Zero, RegX8632::Reg_edx);
1769 _mov(T, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001770 _div(T_edx, Src1, T);
1771 _mov(Dest, T_edx);
1772 }
1773 break;
1774 case InstArithmetic::Srem:
Jan Voung70d68832014-06-17 10:02:37 -07001775 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jan Voung3a569182014-09-29 10:16:01 -07001776 if (isByteSizedArithType(Dest->getType())) {
Jan Voungbd385e42014-09-18 18:18:10 -07001777 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1778 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001779 _cbwdq(T, T);
1780 Context.insert(InstFakeDef::create(Func, T_ah));
1781 _idiv(T_ah, Src1, T);
1782 _mov(Dest, T_ah);
1783 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07001784 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1785 _mov(T, Src0, RegX8632::Reg_eax);
Matt Walaafeaee42014-08-07 13:47:30 -07001786 _cbwdq(T_edx, T);
1787 _idiv(T_edx, Src1, T);
1788 _mov(Dest, T_edx);
1789 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001790 break;
1791 case InstArithmetic::Fadd:
1792 _mov(T, Src0);
1793 _addss(T, Src1);
1794 _mov(Dest, T);
1795 break;
1796 case InstArithmetic::Fsub:
1797 _mov(T, Src0);
1798 _subss(T, Src1);
1799 _mov(Dest, T);
1800 break;
1801 case InstArithmetic::Fmul:
1802 _mov(T, Src0);
1803 _mulss(T, Src1);
1804 _mov(Dest, T);
1805 break;
1806 case InstArithmetic::Fdiv:
1807 _mov(T, Src0);
1808 _divss(T, Src1);
1809 _mov(Dest, T);
1810 break;
1811 case InstArithmetic::Frem: {
1812 const SizeT MaxSrcs = 2;
1813 Type Ty = Dest->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08001814 InstCall *Call =
1815 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,
1816 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001817 Call->addArg(Src0);
1818 Call->addArg(Src1);
1819 return lowerCall(Call);
1820 } break;
1821 }
1822 }
1823}
1824
1825void TargetX8632::lowerAssign(const InstAssign *Inst) {
1826 Variable *Dest = Inst->getDest();
1827 Operand *Src0 = Inst->getSrc(0);
1828 assert(Dest->getType() == Src0->getType());
1829 if (Dest->getType() == IceType_i64) {
1830 Src0 = legalize(Src0);
1831 Operand *Src0Lo = loOperand(Src0);
1832 Operand *Src0Hi = hiOperand(Src0);
1833 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1834 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Jim Stichnothae953202014-12-20 06:17:49 -08001835 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001836 _mov(T_Lo, Src0Lo);
1837 _mov(DestLo, T_Lo);
1838 _mov(T_Hi, Src0Hi);
1839 _mov(DestHi, T_Hi);
1840 } else {
Jim Stichnoth5bc44312015-03-02 16:56:20 -08001841 Operand *RI;
1842 if (Dest->hasReg())
1843 // If Dest already has a physical register, then legalize the
1844 // Src operand into a Variable with the same register
1845 // assignment. This is mostly a workaround for advanced phi
1846 // lowering's ad-hoc register allocation which assumes no
1847 // register allocation is needed when at least one of the
1848 // operands is non-memory.
1849 RI = legalize(Src0, Legal_Reg, Dest->getRegNum());
1850 else
1851 // If Dest could be a stack operand, then RI must be a physical
1852 // register or a scalar integer immediate.
1853 RI = legalize(Src0, Legal_Reg | Legal_Imm);
Matt Wala45a06232014-07-09 16:33:22 -07001854 if (isVectorType(Dest->getType()))
1855 _movp(Dest, RI);
1856 else
1857 _mov(Dest, RI);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001858 }
1859}
1860
1861void TargetX8632::lowerBr(const InstBr *Inst) {
1862 if (Inst->isUnconditional()) {
1863 _br(Inst->getTargetUnconditional());
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07001864 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001865 }
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07001866 Operand *Cond = Inst->getCondition();
1867
1868 // Handle folding opportunities.
1869 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1870 assert(Producer->isDeleted());
1871 switch (BoolFolding::getProducerKind(Producer)) {
1872 default:
1873 break;
1874 case BoolFolding::PK_Icmp32: {
1875 // TODO(stichnot): Refactor similarities between this block and
1876 // the corresponding code in lowerIcmp().
1877 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
1878 Operand *Src0 = Producer->getSrc(0);
1879 Operand *Src1 = legalize(Producer->getSrc(1));
1880 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
1881 _cmp(Src0RM, Src1);
1882 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
1883 Inst->getTargetFalse());
1884 return;
1885 }
1886 }
1887 }
1888
1889 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1890 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1891 _cmp(Src0, Zero);
1892 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001893}
1894
1895void TargetX8632::lowerCall(const InstCall *Instr) {
Matt Wala105b7042014-08-11 19:56:19 -07001896 // x86-32 calling convention:
1897 //
1898 // * At the point before the call, the stack must be aligned to 16
1899 // bytes.
1900 //
1901 // * The first four arguments of vector type, regardless of their
1902 // position relative to the other arguments in the argument list, are
1903 // placed in registers xmm0 - xmm3.
1904 //
1905 // * Other arguments are pushed onto the stack in right-to-left order,
1906 // such that the left-most argument ends up on the top of the stack at
1907 // the lowest memory address.
1908 //
1909 // * Stack arguments of vector type are aligned to start at the next
1910 // highest multiple of 16 bytes. Other stack arguments are aligned to
1911 // 4 bytes.
1912 //
1913 // This intends to match the section "IA-32 Function Calling
1914 // Convention" of the document "OS X ABI Function Call Guide" by
1915 // Apple.
1916 NeedsStackAlignment = true;
1917
Jim Stichnoth1502e592014-12-11 09:22:45 -08001918 typedef std::vector<Operand *> OperandList;
Matt Wala105b7042014-08-11 19:56:19 -07001919 OperandList XmmArgs;
1920 OperandList StackArgs, StackArgLocations;
1921 uint32_t ParameterAreaSizeBytes = 0;
1922
Matt Wala45a06232014-07-09 16:33:22 -07001923 // Classify each argument operand according to the location where the
1924 // argument is passed.
Matt Wala45a06232014-07-09 16:33:22 -07001925 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1926 Operand *Arg = Instr->getArg(i);
Matt Wala105b7042014-08-11 19:56:19 -07001927 Type Ty = Arg->getType();
1928 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
Jan Voung3a569182014-09-29 10:16:01 -07001929 assert(typeWidthInBytes(Ty) >= 4);
Matt Wala105b7042014-08-11 19:56:19 -07001930 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
Matt Wala45a06232014-07-09 16:33:22 -07001931 XmmArgs.push_back(Arg);
1932 } else {
1933 StackArgs.push_back(Arg);
Matt Wala105b7042014-08-11 19:56:19 -07001934 if (isVectorType(Arg->getType())) {
1935 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1936 }
Jan Voungbd385e42014-09-18 18:18:10 -07001937 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08001938 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
Matt Wala105b7042014-08-11 19:56:19 -07001939 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1940 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
Matt Wala45a06232014-07-09 16:33:22 -07001941 }
1942 }
Matt Wala105b7042014-08-11 19:56:19 -07001943
1944 // Adjust the parameter area so that the stack is aligned. It is
1945 // assumed that the stack is already aligned at the start of the
1946 // calling sequence.
1947 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1948
1949 // Subtract the appropriate amount for the argument area. This also
1950 // takes care of setting the stack adjustment during emission.
Matt Wala45a06232014-07-09 16:33:22 -07001951 //
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001952 // TODO: If for some reason the call instruction gets dead-code
1953 // eliminated after lowering, we would need to ensure that the
Matt Wala105b7042014-08-11 19:56:19 -07001954 // pre-call and the post-call esp adjustment get eliminated as well.
1955 if (ParameterAreaSizeBytes) {
1956 _adjust_stack(ParameterAreaSizeBytes);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001957 }
Matt Wala105b7042014-08-11 19:56:19 -07001958
1959 // Copy arguments that are passed on the stack to the appropriate
1960 // stack locations.
1961 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1962 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
Matt Wala105b7042014-08-11 19:56:19 -07001963 }
1964
Matt Wala45a06232014-07-09 16:33:22 -07001965 // Copy arguments to be passed in registers to the appropriate
1966 // registers.
1967 // TODO: Investigate the impact of lowering arguments passed in
1968 // registers after lowering stack arguments as opposed to the other
1969 // way around. Lowering register arguments after stack arguments may
1970 // reduce register pressure. On the other hand, lowering register
1971 // arguments first (before stack arguments) may result in more compact
1972 // code, as the memory operand displacements may end up being smaller
1973 // before any stack adjustment is done.
1974 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothad403532014-09-25 12:44:17 -07001975 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
Matt Wala45a06232014-07-09 16:33:22 -07001976 // Generate a FakeUse of register arguments so that they do not get
1977 // dead code eliminated as a result of the FakeKill of scratch
1978 // registers after the call.
1979 Context.insert(InstFakeUse::create(Func, Reg));
1980 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001981 // Generate the call instruction. Assign its result to a temporary
1982 // with high register allocation weight.
1983 Variable *Dest = Instr->getDest();
Matt Wala45a06232014-07-09 16:33:22 -07001984 // ReturnReg doubles as ReturnRegLo as necessary.
Jim Stichnothae953202014-12-20 06:17:49 -08001985 Variable *ReturnReg = nullptr;
1986 Variable *ReturnRegHi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001987 if (Dest) {
1988 switch (Dest->getType()) {
1989 case IceType_NUM:
1990 llvm_unreachable("Invalid Call dest type");
1991 break;
1992 case IceType_void:
1993 break;
1994 case IceType_i1:
1995 case IceType_i8:
1996 case IceType_i16:
1997 case IceType_i32:
Jan Voungbd385e42014-09-18 18:18:10 -07001998 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001999 break;
2000 case IceType_i64:
Jan Voungbd385e42014-09-18 18:18:10 -07002001 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
2002 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002003 break;
2004 case IceType_f32:
2005 case IceType_f64:
Jim Stichnothae953202014-12-20 06:17:49 -08002006 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
Matt Wala45a06232014-07-09 16:33:22 -07002007 // the fstp instruction.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002008 break;
Matt Wala928f1292014-07-07 16:50:46 -07002009 case IceType_v4i1:
2010 case IceType_v8i1:
2011 case IceType_v16i1:
2012 case IceType_v16i8:
2013 case IceType_v8i16:
2014 case IceType_v4i32:
Matt Wala45a06232014-07-09 16:33:22 -07002015 case IceType_v4f32:
Jan Voungbd385e42014-09-18 18:18:10 -07002016 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
Matt Wala45a06232014-07-09 16:33:22 -07002017 break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002018 }
2019 }
Jim Stichnothdd165072014-11-02 09:41:45 -08002020 Operand *CallTarget = legalize(Instr->getCallTarget());
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08002021 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2022 if (NeedSandboxing) {
2023 if (llvm::isa<Constant>(CallTarget)) {
2024 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2025 } else {
2026 Variable *CallTargetVar = nullptr;
2027 _mov(CallTargetVar, CallTarget);
2028 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2029 const SizeT BundleSize =
2030 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
2031 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2032 CallTarget = CallTargetVar;
2033 }
2034 }
Matt Wala45a06232014-07-09 16:33:22 -07002035 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002036 Context.insert(NewCall);
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08002037 if (NeedSandboxing)
2038 _bundle_unlock();
Matt Wala45a06232014-07-09 16:33:22 -07002039 if (ReturnRegHi)
2040 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002041
Matt Wala105b7042014-08-11 19:56:19 -07002042 // Add the appropriate offset to esp. The call instruction takes care
2043 // of resetting the stack offset during emission.
2044 if (ParameterAreaSizeBytes) {
Jan Voungbd385e42014-09-18 18:18:10 -07002045 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002046 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002047 }
2048
2049 // Insert a register-kill pseudo instruction.
Jim Stichnoth87ff3a12014-11-14 10:27:29 -08002050 Context.insert(InstFakeKill::create(Func, NewCall));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002051
2052 // Generate a FakeUse to keep the call live if necessary.
Matt Wala45a06232014-07-09 16:33:22 -07002053 if (Instr->hasSideEffects() && ReturnReg) {
2054 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002055 Context.insert(FakeUse);
2056 }
Matt Wala8d1072e2014-07-11 15:43:51 -07002057
Matt Wala45a06232014-07-09 16:33:22 -07002058 if (!Dest)
2059 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002060
Matt Wala45a06232014-07-09 16:33:22 -07002061 // Assign the result of the call to Dest.
2062 if (ReturnReg) {
2063 if (ReturnRegHi) {
2064 assert(Dest->getType() == IceType_i64);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002065 split64(Dest);
2066 Variable *DestLo = Dest->getLo();
2067 Variable *DestHi = Dest->getHi();
Matt Wala45a06232014-07-09 16:33:22 -07002068 _mov(DestLo, ReturnReg);
2069 _mov(DestHi, ReturnRegHi);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002070 } else {
Matt Wala45a06232014-07-09 16:33:22 -07002071 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
2072 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
2073 isVectorType(Dest->getType()));
Matt Wala45a06232014-07-09 16:33:22 -07002074 if (isVectorType(Dest->getType())) {
2075 _movp(Dest, ReturnReg);
2076 } else {
2077 _mov(Dest, ReturnReg);
2078 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002079 }
Jan Voung3a569182014-09-29 10:16:01 -07002080 } else if (isScalarFloatingType(Dest->getType())) {
Matt Wala45a06232014-07-09 16:33:22 -07002081 // Special treatment for an FP function which returns its result in
2082 // st(0).
Matt Wala45a06232014-07-09 16:33:22 -07002083 // If Dest ends up being a physical xmm register, the fstp emit code
2084 // will route st(0) through a temporary stack slot.
Jim Stichnotha5229722014-09-12 13:06:09 -07002085 _fstp(Dest);
2086 // Create a fake use of Dest in case it actually isn't used,
2087 // because st(0) still needs to be popped.
2088 Context.insert(InstFakeUse::create(Func, Dest));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002089 }
2090}
2091
2092void TargetX8632::lowerCast(const InstCast *Inst) {
2093 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2094 InstCast::OpKind CastKind = Inst->getCastKind();
2095 Variable *Dest = Inst->getDest();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002096 switch (CastKind) {
2097 default:
2098 Func->setError("Cast type not supported");
2099 return;
Jan Voung1ee34162014-06-24 13:43:30 -07002100 case InstCast::Sext: {
2101 // Src0RM is the source operand legalized to physical register or memory,
2102 // but not immediate, since the relevant x86 native instructions don't
2103 // allow an immediate operand. If the operand is an immediate, we could
2104 // consider computing the strength-reduced result at translation time,
2105 // but we're unlikely to see something like that in the bitcode that
2106 // the optimizer wouldn't have already taken care of.
2107 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002108 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002109 Type DestTy = Dest->getType();
2110 if (DestTy == IceType_v16i8) {
2111 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2112 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2113 Variable *T = makeReg(DestTy);
2114 _movp(T, Src0RM);
2115 _pand(T, OneMask);
2116 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2117 _pcmpgt(T, Zeros);
2118 _movp(Dest, T);
2119 } else {
2120 // width = width(elty) - 1; dest = (src << width) >> width
2121 SizeT ShiftAmount =
2122 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002123 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
Matt Wala83b80362014-07-16 10:21:30 -07002124 Variable *T = makeReg(DestTy);
2125 _movp(T, Src0RM);
2126 _psll(T, ShiftConstant);
2127 _psra(T, ShiftConstant);
2128 _movp(Dest, T);
2129 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002130 } else if (Dest->getType() == IceType_i64) {
2131 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002132 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002133 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2134 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2135 Variable *T_Lo = makeReg(DestLo->getType());
2136 if (Src0RM->getType() == IceType_i32) {
2137 _mov(T_Lo, Src0RM);
2138 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002139 _movzx(T_Lo, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002140 _shl(T_Lo, Shift);
2141 _sar(T_Lo, Shift);
2142 } else {
2143 _movsx(T_Lo, Src0RM);
2144 }
2145 _mov(DestLo, T_Lo);
Jim Stichnothae953202014-12-20 06:17:49 -08002146 Variable *T_Hi = nullptr;
Jim Stichnothdd30c812014-09-04 16:39:02 -07002147 _mov(T_Hi, T_Lo);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002148 if (Src0RM->getType() != IceType_i1)
2149 // For i1, the sar instruction is already done above.
2150 _sar(T_Hi, Shift);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002151 _mov(DestHi, T_Hi);
2152 } else if (Src0RM->getType() == IceType_i1) {
2153 // t1 = src
2154 // shl t1, dst_bitwidth - 1
2155 // sar t1, dst_bitwidth - 1
2156 // dst = t1
2157 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002158 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002159 Variable *T = makeReg(Dest->getType());
2160 if (typeWidthInBytes(Dest->getType()) <=
2161 typeWidthInBytes(Src0RM->getType())) {
2162 _mov(T, Src0RM);
2163 } else {
2164 // Widen the source using movsx or movzx. (It doesn't matter
2165 // which one, since the following shl/sar overwrite the bits.)
2166 _movzx(T, Src0RM);
2167 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002168 _shl(T, ShiftAmount);
2169 _sar(T, ShiftAmount);
2170 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002171 } else {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002172 // t1 = movsx src; dst = t1
2173 Variable *T = makeReg(Dest->getType());
2174 _movsx(T, Src0RM);
2175 _mov(Dest, T);
2176 }
2177 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002178 }
2179 case InstCast::Zext: {
2180 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002181 if (isVectorType(Dest->getType())) {
Matt Wala83b80362014-07-16 10:21:30 -07002182 // onemask = materialize(1,1,...); dest = onemask & src
2183 Type DestTy = Dest->getType();
2184 Variable *OneMask = makeVectorOfOnes(DestTy);
2185 Variable *T = makeReg(DestTy);
2186 _movp(T, Src0RM);
2187 _pand(T, OneMask);
2188 _movp(Dest, T);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002189 } else if (Dest->getType() == IceType_i64) {
2190 // t1=movzx src; dst.lo=t1; dst.hi=0
2191 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2192 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2193 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2194 Variable *Tmp = makeReg(DestLo->getType());
2195 if (Src0RM->getType() == IceType_i32) {
2196 _mov(Tmp, Src0RM);
Jim Stichnothdd30c812014-09-04 16:39:02 -07002197 } else {
2198 _movzx(Tmp, Src0RM);
2199 }
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002200 if (Src0RM->getType() == IceType_i1) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002201 Constant *One = Ctx->getConstantInt32(1);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002202 _and(Tmp, One);
2203 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002204 _mov(DestLo, Tmp);
2205 _mov(DestHi, Zero);
2206 } else if (Src0RM->getType() == IceType_i1) {
2207 // t = Src0RM; t &= 1; Dest = t
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002208 Constant *One = Ctx->getConstantInt32(1);
Jan Voung39d4aca2014-10-15 15:16:54 -07002209 Type DestTy = Dest->getType();
2210 Variable *T;
2211 if (DestTy == IceType_i8) {
2212 T = makeReg(DestTy);
2213 _mov(T, Src0RM);
2214 } else {
2215 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2216 T = makeReg(IceType_i32);
2217 _movzx(T, Src0RM);
2218 }
Jim Stichnothdd30c812014-09-04 16:39:02 -07002219 _and(T, One);
2220 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002221 } else {
2222 // t1 = movzx src; dst = t1
2223 Variable *T = makeReg(Dest->getType());
2224 _movzx(T, Src0RM);
2225 _mov(Dest, T);
2226 }
2227 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002228 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002229 case InstCast::Trunc: {
Matt Wala83b80362014-07-16 10:21:30 -07002230 if (isVectorType(Dest->getType())) {
2231 // onemask = materialize(1,1,...); dst = src & onemask
2232 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2233 Type Src0Ty = Src0RM->getType();
2234 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2235 Variable *T = makeReg(Dest->getType());
2236 _movp(T, Src0RM);
2237 _pand(T, OneMask);
2238 _movp(Dest, T);
2239 } else {
2240 Operand *Src0 = Inst->getSrc(0);
2241 if (Src0->getType() == IceType_i64)
2242 Src0 = loOperand(Src0);
2243 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2244 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothae953202014-12-20 06:17:49 -08002245 Variable *T = nullptr;
Matt Wala83b80362014-07-16 10:21:30 -07002246 _mov(T, Src0RM);
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002247 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002248 _and(T, Ctx->getConstantInt1(1));
Matt Wala83b80362014-07-16 10:21:30 -07002249 _mov(Dest, T);
2250 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002251 break;
2252 }
2253 case InstCast::Fptrunc:
2254 case InstCast::Fpext: {
Jan Voung1ee34162014-06-24 13:43:30 -07002255 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002256 // t1 = cvt Src0RM; Dest = t1
2257 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002258 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002259 _mov(Dest, T);
2260 break;
2261 }
2262 case InstCast::Fptosi:
Matt Wala83b80362014-07-16 10:21:30 -07002263 if (isVectorType(Dest->getType())) {
2264 assert(Dest->getType() == IceType_v4i32 &&
2265 Inst->getSrc(0)->getType() == IceType_v4f32);
2266 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002267 if (llvm::isa<OperandX8632Mem>(Src0RM))
2268 Src0RM = legalizeToVar(Src0RM);
Matt Wala83b80362014-07-16 10:21:30 -07002269 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002270 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
Matt Wala83b80362014-07-16 10:21:30 -07002271 _movp(Dest, T);
2272 } else if (Dest->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002273 // Use a helper for converting floating-point values to 64-bit
2274 // integers. SSE2 appears to have no way to convert from xmm
2275 // registers to something like the edx:eax register pair, and
2276 // gcc and clang both want to use x87 instructions complete with
2277 // temporary manipulation of the status word. This helper is
2278 // not needed for x86-64.
2279 split64(Dest);
2280 const SizeT MaxSrcs = 1;
2281 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002282 InstCall *Call =
2283 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2284 : H_fptosi_f64_i64,
2285 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002286 Call->addArg(Inst->getSrc(0));
2287 lowerCall(Call);
2288 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002289 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002290 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2291 Variable *T_1 = makeReg(IceType_i32);
2292 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002293 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002294 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002295 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002296 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002297 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002298 }
2299 break;
2300 case InstCast::Fptoui:
Matt Wala83b80362014-07-16 10:21:30 -07002301 if (isVectorType(Dest->getType())) {
2302 assert(Dest->getType() == IceType_v4i32 &&
2303 Inst->getSrc(0)->getType() == IceType_v4f32);
2304 const SizeT MaxSrcs = 1;
Jim Stichnothc4508792015-03-01 23:12:55 -08002305 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
Matt Wala83b80362014-07-16 10:21:30 -07002306 Call->addArg(Inst->getSrc(0));
2307 lowerCall(Call);
2308 } else if (Dest->getType() == IceType_i64 ||
2309 Dest->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002310 // Use a helper for both x86-32 and x86-64.
2311 split64(Dest);
2312 const SizeT MaxSrcs = 1;
2313 Type DestType = Dest->getType();
Jan Voung1ee34162014-06-24 13:43:30 -07002314 Type SrcType = Inst->getSrc(0)->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002315 IceString TargetString;
2316 if (isInt32Asserting32Or64(DestType)) {
2317 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2318 : H_fptoui_f64_i32;
2319 } else {
2320 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2321 : H_fptoui_f64_i64;
2322 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002323 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2324 Call->addArg(Inst->getSrc(0));
2325 lowerCall(Call);
2326 return;
2327 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002328 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002329 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2330 Variable *T_1 = makeReg(IceType_i32);
2331 Variable *T_2 = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002332 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002333 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnoth3ef786f2014-09-08 11:19:21 -07002334 if (Dest->getType() == IceType_i1)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002335 _and(T_2, Ctx->getConstantInt1(1));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002336 _mov(Dest, T_2);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002337 }
2338 break;
2339 case InstCast::Sitofp:
Matt Wala83b80362014-07-16 10:21:30 -07002340 if (isVectorType(Dest->getType())) {
2341 assert(Dest->getType() == IceType_v4f32 &&
2342 Inst->getSrc(0)->getType() == IceType_v4i32);
2343 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002344 if (llvm::isa<OperandX8632Mem>(Src0RM))
2345 Src0RM = legalizeToVar(Src0RM);
Matt Wala83b80362014-07-16 10:21:30 -07002346 Variable *T = makeReg(Dest->getType());
Jan Voung699bf022014-10-08 13:52:10 -07002347 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
Matt Wala83b80362014-07-16 10:21:30 -07002348 _movp(Dest, T);
2349 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002350 // Use a helper for x86-32.
2351 const SizeT MaxSrcs = 1;
2352 Type DestType = Dest->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002353 InstCall *Call =
2354 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2355 : H_sitofp_i64_f64,
2356 Dest, MaxSrcs);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002357 // TODO: Call the correct compiler-rt helper function.
2358 Call->addArg(Inst->getSrc(0));
2359 lowerCall(Call);
2360 return;
2361 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002362 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002363 // Sign-extend the operand.
2364 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2365 Variable *T_1 = makeReg(IceType_i32);
2366 Variable *T_2 = makeReg(Dest->getType());
2367 if (Src0RM->getType() == IceType_i32)
2368 _mov(T_1, Src0RM);
2369 else
2370 _movsx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002371 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002372 _mov(Dest, T_2);
2373 }
2374 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002375 case InstCast::Uitofp: {
2376 Operand *Src0 = Inst->getSrc(0);
Matt Wala83b80362014-07-16 10:21:30 -07002377 if (isVectorType(Src0->getType())) {
2378 assert(Dest->getType() == IceType_v4f32 &&
2379 Src0->getType() == IceType_v4i32);
2380 const SizeT MaxSrcs = 1;
Jim Stichnothc4508792015-03-01 23:12:55 -08002381 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
Matt Wala83b80362014-07-16 10:21:30 -07002382 Call->addArg(Src0);
2383 lowerCall(Call);
2384 } else if (Src0->getType() == IceType_i64 ||
2385 Src0->getType() == IceType_i32) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002386 // Use a helper for x86-32 and x86-64. Also use a helper for
2387 // i32 on x86-32.
2388 const SizeT MaxSrcs = 1;
2389 Type DestType = Dest->getType();
Jim Stichnothc4508792015-03-01 23:12:55 -08002390 IceString TargetString;
2391 if (isInt32Asserting32Or64(Src0->getType())) {
2392 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2393 : H_uitofp_i32_f64;
2394 } else {
2395 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2396 : H_uitofp_i64_f64;
2397 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002398 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Jan Voung1ee34162014-06-24 13:43:30 -07002399 Call->addArg(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002400 lowerCall(Call);
2401 return;
2402 } else {
Jan Voung1ee34162014-06-24 13:43:30 -07002403 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002404 // Zero-extend the operand.
2405 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2406 Variable *T_1 = makeReg(IceType_i32);
2407 Variable *T_2 = makeReg(Dest->getType());
2408 if (Src0RM->getType() == IceType_i32)
2409 _mov(T_1, Src0RM);
2410 else
2411 _movzx(T_1, Src0RM);
Jan Voung699bf022014-10-08 13:52:10 -07002412 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002413 _mov(Dest, T_2);
2414 }
2415 break;
Jan Voung1ee34162014-06-24 13:43:30 -07002416 }
2417 case InstCast::Bitcast: {
2418 Operand *Src0 = Inst->getSrc(0);
2419 if (Dest->getType() == Src0->getType()) {
2420 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002421 lowerAssign(Assign);
2422 return;
2423 }
2424 switch (Dest->getType()) {
2425 default:
2426 llvm_unreachable("Unexpected Bitcast dest type");
Matt Wala83b80362014-07-16 10:21:30 -07002427 case IceType_i8: {
2428 assert(Src0->getType() == IceType_v8i1);
Jim Stichnothc4508792015-03-01 23:12:55 -08002429 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
Matt Wala83b80362014-07-16 10:21:30 -07002430 Call->addArg(Src0);
2431 lowerCall(Call);
2432 } break;
2433 case IceType_i16: {
2434 assert(Src0->getType() == IceType_v16i1);
Jim Stichnothc4508792015-03-01 23:12:55 -08002435 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
Matt Wala83b80362014-07-16 10:21:30 -07002436 Call->addArg(Src0);
2437 lowerCall(Call);
2438 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002439 case IceType_i32:
2440 case IceType_f32: {
Jan Voung1ee34162014-06-24 13:43:30 -07002441 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002442 Type DestType = Dest->getType();
2443 Type SrcType = Src0RM->getType();
Jim Stichnoth6e992142014-07-30 14:45:20 -07002444 (void)DestType;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002445 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2446 (DestType == IceType_f32 && SrcType == IceType_i32));
2447 // a.i32 = bitcast b.f32 ==>
2448 // t.f32 = b.f32
2449 // s.f32 = spill t.f32
2450 // a.i32 = s.f32
Jim Stichnothae953202014-12-20 06:17:49 -08002451 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002452 // TODO: Should be able to force a spill setup by calling legalize() with
2453 // Legal_Mem and not Legal_Reg or Legal_Imm.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002454 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002455 SpillVar->setLinkedTo(Dest);
2456 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002457 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002458 _mov(T, Src0RM);
2459 _mov(Spill, T);
2460 _mov(Dest, Spill);
2461 } break;
2462 case IceType_i64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002463 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002464 assert(Src0RM->getType() == IceType_f64);
2465 // a.i64 = bitcast b.f64 ==>
2466 // s.f64 = spill b.f64
2467 // t_lo.i32 = lo(s.f64)
2468 // a_lo.i32 = t_lo.i32
2469 // t_hi.i32 = hi(s.f64)
2470 // a_hi.i32 = t_hi.i32
Jim Stichnothc77f8172015-05-31 23:34:44 -07002471 Operand *SpillLo, *SpillHi;
2472 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2473 SpillVariable *SpillVar =
2474 Func->makeVariable<SpillVariable>(IceType_f64);
2475 SpillVar->setLinkedTo(Src0Var);
2476 Variable *Spill = SpillVar;
2477 Spill->setWeight(RegWeight::Zero);
2478 _movq(Spill, Src0RM);
2479 SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
2480 SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
2481 } else {
2482 SpillLo = loOperand(Src0RM);
2483 SpillHi = hiOperand(Src0RM);
2484 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002485
2486 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2487 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2488 Variable *T_Lo = makeReg(IceType_i32);
2489 Variable *T_Hi = makeReg(IceType_i32);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002490
2491 _mov(T_Lo, SpillLo);
2492 _mov(DestLo, T_Lo);
2493 _mov(T_Hi, SpillHi);
2494 _mov(DestHi, T_Hi);
2495 } break;
2496 case IceType_f64: {
Jan Voung1ee34162014-06-24 13:43:30 -07002497 Src0 = legalize(Src0);
2498 assert(Src0->getType() == IceType_i64);
Jim Stichnothc77f8172015-05-31 23:34:44 -07002499 if (llvm::isa<OperandX8632Mem>(Src0)) {
2500 Variable *T = Func->makeVariable(Dest->getType());
2501 _movq(T, Src0);
2502 _movq(Dest, T);
2503 break;
2504 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002505 // a.f64 = bitcast b.i64 ==>
2506 // t_lo.i32 = b_lo.i32
Jan Voung1ee34162014-06-24 13:43:30 -07002507 // FakeDef(s.f64)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002508 // lo(s.f64) = t_lo.i32
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002509 // t_hi.i32 = b_hi.i32
2510 // hi(s.f64) = t_hi.i32
2511 // a.f64 = s.f64
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002512 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
Jim Stichnoth800dab22014-09-20 12:25:02 -07002513 SpillVar->setLinkedTo(Dest);
2514 Variable *Spill = SpillVar;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002515 Spill->setWeight(RegWeight::Zero);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002516
Jim Stichnothae953202014-12-20 06:17:49 -08002517 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002518 VariableSplit *SpillLo =
2519 VariableSplit::create(Func, Spill, VariableSplit::Low);
2520 VariableSplit *SpillHi =
2521 VariableSplit::create(Func, Spill, VariableSplit::High);
Jan Voung1ee34162014-06-24 13:43:30 -07002522 _mov(T_Lo, loOperand(Src0));
2523 // Technically, the Spill is defined after the _store happens, but
2524 // SpillLo is considered a "use" of Spill so define Spill before it
2525 // is used.
2526 Context.insert(InstFakeDef::create(Func, Spill));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002527 _store(T_Lo, SpillLo);
Jan Voung1ee34162014-06-24 13:43:30 -07002528 _mov(T_Hi, hiOperand(Src0));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002529 _store(T_Hi, SpillHi);
Jan Voung5cd240d2014-06-25 10:36:46 -07002530 _movq(Dest, Spill);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002531 } break;
Matt Wala83b80362014-07-16 10:21:30 -07002532 case IceType_v8i1: {
2533 assert(Src0->getType() == IceType_i8);
Jim Stichnothc4508792015-03-01 23:12:55 -08002534 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002535 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002536 // Arguments to functions are required to be at least 32 bits wide.
2537 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2538 Call->addArg(Src0AsI32);
2539 lowerCall(Call);
2540 } break;
2541 case IceType_v16i1: {
2542 assert(Src0->getType() == IceType_i16);
Jim Stichnothc4508792015-03-01 23:12:55 -08002543 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002544 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
Matt Wala83b80362014-07-16 10:21:30 -07002545 // Arguments to functions are required to be at least 32 bits wide.
2546 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2547 Call->addArg(Src0AsI32);
2548 lowerCall(Call);
2549 } break;
2550 case IceType_v8i16:
2551 case IceType_v16i8:
2552 case IceType_v4i32:
2553 case IceType_v4f32: {
2554 _movp(Dest, legalizeToVar(Src0));
2555 } break;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002556 }
2557 break;
2558 }
Jan Voung1ee34162014-06-24 13:43:30 -07002559 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002560}
2561
Matt Wala49889232014-07-18 12:45:09 -07002562void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002563 Operand *SourceVectNotLegalized = Inst->getSrc(0);
Jan Voungbc004632014-09-16 15:09:10 -07002564 ConstantInteger32 *ElementIndex =
2565 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
Matt Wala49889232014-07-18 12:45:09 -07002566 // Only constant indices are allowed in PNaCl IR.
2567 assert(ElementIndex);
2568
2569 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002570 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002571 Type ElementTy = typeElementType(Ty);
2572 Type InVectorElementTy = getInVectorElementType(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002573 Variable *ExtractedElementR = makeReg(InVectorElementTy);
Matt Wala49889232014-07-18 12:45:09 -07002574
2575 // TODO(wala): Determine the best lowering sequences for each type.
Matt Wala0a450512014-07-30 12:44:39 -07002576 bool CanUsePextr =
2577 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2578 if (CanUsePextr && Ty != IceType_v4f32) {
2579 // Use pextrb, pextrw, or pextrd.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002580 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Walae3777672014-07-31 09:06:17 -07002581 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2582 _pextr(ExtractedElementR, SourceVectR, Mask);
Matt Wala0a450512014-07-30 12:44:39 -07002583 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2584 // Use pshufd and movd/movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002585 Variable *T = nullptr;
Matt Wala49889232014-07-18 12:45:09 -07002586 if (Index) {
2587 // The shuffle only needs to occur if the element to be extracted
2588 // is not at the lowest index.
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002589 Constant *Mask = Ctx->getConstantInt32(Index);
Matt Wala49889232014-07-18 12:45:09 -07002590 T = makeReg(Ty);
Matt Walad4799f42014-08-14 14:24:12 -07002591 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
Matt Wala49889232014-07-18 12:45:09 -07002592 } else {
Matt Walad4799f42014-08-14 14:24:12 -07002593 T = legalizeToVar(SourceVectNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002594 }
2595
2596 if (InVectorElementTy == IceType_i32) {
Matt Walae3777672014-07-31 09:06:17 -07002597 _movd(ExtractedElementR, T);
Jan Voung3a569182014-09-29 10:16:01 -07002598 } else { // Ty == IceType_f32
Matt Walacfe51462014-07-25 15:57:56 -07002599 // TODO(wala): _movss is only used here because _mov does not
2600 // allow a vector source and a scalar destination. _mov should be
2601 // able to be used here.
2602 // _movss is a binary instruction, so the FakeDef is needed to
2603 // keep the live range analysis consistent.
Matt Walae3777672014-07-31 09:06:17 -07002604 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2605 _movss(ExtractedElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002606 }
Matt Wala49889232014-07-18 12:45:09 -07002607 } else {
2608 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2609 // Spill the value to a stack slot and do the extraction in memory.
Matt Wala49889232014-07-18 12:45:09 -07002610 //
Matt Walae3777672014-07-31 09:06:17 -07002611 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07002612 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002613 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07002614 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07002615 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07002616
2617 // Compute the location of the element in memory.
2618 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2619 OperandX8632Mem *Loc =
2620 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07002621 _mov(ExtractedElementR, Loc);
Matt Wala49889232014-07-18 12:45:09 -07002622 }
2623
2624 if (ElementTy == IceType_i1) {
2625 // Truncate extracted integers to i1s if necessary.
2626 Variable *T = makeReg(IceType_i1);
2627 InstCast *Cast =
Matt Walae3777672014-07-31 09:06:17 -07002628 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002629 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002630 ExtractedElementR = T;
Matt Wala49889232014-07-18 12:45:09 -07002631 }
2632
2633 // Copy the element to the destination.
2634 Variable *Dest = Inst->getDest();
Matt Walae3777672014-07-31 09:06:17 -07002635 _mov(Dest, ExtractedElementR);
Matt Wala49889232014-07-18 12:45:09 -07002636}
2637
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002638void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2639 Operand *Src0 = Inst->getSrc(0);
2640 Operand *Src1 = Inst->getSrc(1);
2641 Variable *Dest = Inst->getDest();
Matt Walace0ca8f2014-07-24 12:34:20 -07002642
2643 if (isVectorType(Dest->getType())) {
2644 InstFcmp::FCond Condition = Inst->getCondition();
2645 size_t Index = static_cast<size_t>(Condition);
2646 assert(Index < TableFcmpSize);
2647
2648 if (TableFcmp[Index].SwapVectorOperands) {
2649 Operand *T = Src0;
2650 Src0 = Src1;
2651 Src1 = T;
2652 }
2653
Jim Stichnothae953202014-12-20 06:17:49 -08002654 Variable *T = nullptr;
Matt Walace0ca8f2014-07-24 12:34:20 -07002655
Matt Walae3777672014-07-31 09:06:17 -07002656 if (Condition == InstFcmp::True) {
2657 // makeVectorOfOnes() requires an integer vector type.
Matt Walace0ca8f2014-07-24 12:34:20 -07002658 T = makeVectorOfMinusOnes(IceType_v4i32);
Matt Walae3777672014-07-31 09:06:17 -07002659 } else if (Condition == InstFcmp::False) {
2660 T = makeVectorOfZeros(Dest->getType());
2661 } else {
2662 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2663 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002664 if (llvm::isa<OperandX8632Mem>(Src1RM))
2665 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07002666
Matt Walae3777672014-07-31 09:06:17 -07002667 switch (Condition) {
2668 default: {
Jan Voungbd385e42014-09-18 18:18:10 -07002669 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2670 assert(Predicate != CondX86::Cmpps_Invalid);
Matt Walae3777672014-07-31 09:06:17 -07002671 T = makeReg(Src0RM->getType());
2672 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002673 _cmpps(T, Src1RM, Predicate);
Matt Walae3777672014-07-31 09:06:17 -07002674 } break;
2675 case InstFcmp::One: {
2676 // Check both unequal and ordered.
2677 T = makeReg(Src0RM->getType());
2678 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002679 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002680 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
Matt Walae3777672014-07-31 09:06:17 -07002681 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002682 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
Matt Walae3777672014-07-31 09:06:17 -07002683 _pand(T, T2);
2684 } break;
2685 case InstFcmp::Ueq: {
2686 // Check both equal or unordered.
2687 T = makeReg(Src0RM->getType());
2688 Variable *T2 = makeReg(Src0RM->getType());
Matt Walae3777672014-07-31 09:06:17 -07002689 _movp(T, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002690 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
Matt Walae3777672014-07-31 09:06:17 -07002691 _movp(T2, Src0RM);
Jan Voungbd385e42014-09-18 18:18:10 -07002692 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
Matt Walae3777672014-07-31 09:06:17 -07002693 _por(T, T2);
2694 } break;
2695 }
Matt Walae3777672014-07-31 09:06:17 -07002696 }
Matt Walace0ca8f2014-07-24 12:34:20 -07002697
2698 _movp(Dest, T);
2699 eliminateNextVectorSextInstruction(Dest);
2700 return;
2701 }
2702
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002703 // Lowering a = fcmp cond, b, c
2704 // ucomiss b, c /* only if C1 != Br_None */
2705 // /* but swap b,c order if SwapOperands==true */
2706 // mov a, <default>
2707 // j<C1> label /* only if C1 != Br_None */
2708 // j<C2> label /* only if C2 != Br_None */
2709 // FakeUse(a) /* only if C1 != Br_None */
2710 // mov a, !<default> /* only if C1 != Br_None */
2711 // label: /* only if C1 != Br_None */
Jim Stichnothe94740a2015-05-22 13:17:30 -07002712 //
2713 // setcc lowering when C1 != Br_None && C2 == Br_None:
2714 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2715 // setcc a, C1
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002716 InstFcmp::FCond Condition = Inst->getCondition();
2717 size_t Index = static_cast<size_t>(Condition);
2718 assert(Index < TableFcmpSize);
Jim Stichnothe94740a2015-05-22 13:17:30 -07002719 if (TableFcmp[Index].SwapScalarOperands)
2720 std::swap(Src0, Src1);
Jan Voungbd385e42014-09-18 18:18:10 -07002721 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2722 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002723 if (HasC1) {
2724 Src0 = legalize(Src0);
2725 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothae953202014-12-20 06:17:49 -08002726 Variable *T = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002727 _mov(T, Src0);
2728 _ucomiss(T, Src1RM);
Jim Stichnothe94740a2015-05-22 13:17:30 -07002729 if (!HasC2) {
2730 assert(TableFcmp[Index].Default);
2731 _setcc(Dest, TableFcmp[Index].C1);
2732 return;
2733 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002734 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002735 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002736 _mov(Dest, Default);
2737 if (HasC1) {
2738 InstX8632Label *Label = InstX8632Label::create(Func, this);
2739 _br(TableFcmp[Index].C1, Label);
2740 if (HasC2) {
2741 _br(TableFcmp[Index].C2, Label);
2742 }
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002743 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
Jim Stichnoth47752552014-10-13 17:15:08 -07002744 _mov_nonkillable(Dest, NonDefault);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002745 Context.insert(Label);
2746 }
2747}
2748
2749void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2750 Operand *Src0 = legalize(Inst->getSrc(0));
2751 Operand *Src1 = legalize(Inst->getSrc(1));
2752 Variable *Dest = Inst->getDest();
2753
Matt Wala9a0168a2014-07-23 14:56:10 -07002754 if (isVectorType(Dest->getType())) {
2755 Type Ty = Src0->getType();
2756 // Promote i1 vectors to 128 bit integer vector types.
2757 if (typeElementType(Ty) == IceType_i1) {
2758 Type NewTy = IceType_NUM;
2759 switch (Ty) {
2760 default:
2761 llvm_unreachable("unexpected type");
2762 break;
2763 case IceType_v4i1:
2764 NewTy = IceType_v4i32;
2765 break;
2766 case IceType_v8i1:
2767 NewTy = IceType_v8i16;
2768 break;
2769 case IceType_v16i1:
2770 NewTy = IceType_v16i8;
2771 break;
2772 }
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002773 Variable *NewSrc0 = Func->makeVariable(NewTy);
2774 Variable *NewSrc1 = Func->makeVariable(NewTy);
Matt Wala9a0168a2014-07-23 14:56:10 -07002775 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2776 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2777 Src0 = NewSrc0;
2778 Src1 = NewSrc1;
2779 Ty = NewTy;
2780 }
2781
2782 InstIcmp::ICond Condition = Inst->getCondition();
2783
Matt Walae3777672014-07-31 09:06:17 -07002784 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2785 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2786
Matt Wala9a0168a2014-07-23 14:56:10 -07002787 // SSE2 only has signed comparison operations. Transform unsigned
2788 // inputs in a manner that allows for the use of signed comparison
2789 // operations by flipping the high order bits.
2790 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2791 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2792 Variable *T0 = makeReg(Ty);
2793 Variable *T1 = makeReg(Ty);
2794 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002795 _movp(T0, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002796 _pxor(T0, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002797 _movp(T1, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002798 _pxor(T1, HighOrderBits);
Matt Walae3777672014-07-31 09:06:17 -07002799 Src0RM = T0;
2800 Src1RM = T1;
Matt Wala9a0168a2014-07-23 14:56:10 -07002801 }
2802
Matt Wala9a0168a2014-07-23 14:56:10 -07002803 Variable *T = makeReg(Ty);
2804 switch (Condition) {
2805 default:
2806 llvm_unreachable("unexpected condition");
2807 break;
2808 case InstIcmp::Eq: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002809 if (llvm::isa<OperandX8632Mem>(Src1RM))
2810 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07002811 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002812 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002813 } break;
2814 case InstIcmp::Ne: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002815 if (llvm::isa<OperandX8632Mem>(Src1RM))
2816 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07002817 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002818 _pcmpeq(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002819 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2820 _pxor(T, MinusOne);
2821 } break;
2822 case InstIcmp::Ugt:
2823 case InstIcmp::Sgt: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002824 if (llvm::isa<OperandX8632Mem>(Src1RM))
2825 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07002826 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002827 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002828 } break;
2829 case InstIcmp::Uge:
2830 case InstIcmp::Sge: {
Matt Walae3777672014-07-31 09:06:17 -07002831 // !(Src1RM > Src0RM)
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002832 if (llvm::isa<OperandX8632Mem>(Src0RM))
2833 Src0RM = legalizeToVar(Src0RM);
Matt Walae3777672014-07-31 09:06:17 -07002834 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002835 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002836 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2837 _pxor(T, MinusOne);
2838 } break;
2839 case InstIcmp::Ult:
2840 case InstIcmp::Slt: {
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002841 if (llvm::isa<OperandX8632Mem>(Src0RM))
2842 Src0RM = legalizeToVar(Src0RM);
Matt Walae3777672014-07-31 09:06:17 -07002843 _movp(T, Src1RM);
Matt Walad4799f42014-08-14 14:24:12 -07002844 _pcmpgt(T, Src0RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002845 } break;
2846 case InstIcmp::Ule:
2847 case InstIcmp::Sle: {
Matt Walae3777672014-07-31 09:06:17 -07002848 // !(Src0RM > Src1RM)
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07002849 if (llvm::isa<OperandX8632Mem>(Src1RM))
2850 Src1RM = legalizeToVar(Src1RM);
Matt Walae3777672014-07-31 09:06:17 -07002851 _movp(T, Src0RM);
Matt Walad4799f42014-08-14 14:24:12 -07002852 _pcmpgt(T, Src1RM);
Matt Wala9a0168a2014-07-23 14:56:10 -07002853 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2854 _pxor(T, MinusOne);
2855 } break;
2856 }
Matt Wala9a0168a2014-07-23 14:56:10 -07002857
2858 _movp(Dest, T);
Matt Walace0ca8f2014-07-24 12:34:20 -07002859 eliminateNextVectorSextInstruction(Dest);
Matt Wala9a0168a2014-07-23 14:56:10 -07002860 return;
2861 }
2862
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002863 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002864 if (Src0->getType() == IceType_i64) {
2865 InstIcmp::ICond Condition = Inst->getCondition();
2866 size_t Index = static_cast<size_t>(Condition);
2867 assert(Index < TableIcmp64Size);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002868 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2869 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002870 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2871 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07002872 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2873 Constant *One = Ctx->getConstantInt32(1);
2874 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2875 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2876 _mov(Dest, One);
2877 _cmp(Src0HiRM, Src1HiRI);
2878 if (TableIcmp64[Index].C1 != CondX86::Br_None)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002879 _br(TableIcmp64[Index].C1, LabelTrue);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07002880 if (TableIcmp64[Index].C2 != CondX86::Br_None)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002881 _br(TableIcmp64[Index].C2, LabelFalse);
Jim Stichnoth90db6ae2015-05-07 09:35:07 -07002882 _cmp(Src0LoRM, Src1LoRI);
2883 _br(TableIcmp64[Index].C3, LabelTrue);
2884 Context.insert(LabelFalse);
2885 _mov_nonkillable(Dest, Zero);
2886 Context.insert(LabelTrue);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002887 return;
2888 }
2889
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002890 // cmp b, c
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07002891 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
Jim Stichnothef8cf0e2014-08-26 22:16:29 -07002892 _cmp(Src0RM, Src1);
Jim Stichnothf48b3202015-05-04 10:22:17 -07002893 _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07002894}
2895
Matt Wala49889232014-07-18 12:45:09 -07002896void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Matt Walae3777672014-07-31 09:06:17 -07002897 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2898 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
Jan Voungbc004632014-09-16 15:09:10 -07002899 ConstantInteger32 *ElementIndex =
2900 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
Matt Wala49889232014-07-18 12:45:09 -07002901 // Only constant indices are allowed in PNaCl IR.
2902 assert(ElementIndex);
2903 unsigned Index = ElementIndex->getValue();
Matt Walae3777672014-07-31 09:06:17 -07002904 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
Matt Wala49889232014-07-18 12:45:09 -07002905
Matt Walae3777672014-07-31 09:06:17 -07002906 Type Ty = SourceVectNotLegalized->getType();
Matt Wala49889232014-07-18 12:45:09 -07002907 Type ElementTy = typeElementType(Ty);
2908 Type InVectorElementTy = getInVectorElementType(Ty);
2909
2910 if (ElementTy == IceType_i1) {
2911 // Expand the element to the appropriate size for it to be inserted
2912 // in the vector.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07002913 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Matt Walae3777672014-07-31 09:06:17 -07002914 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2915 ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002916 lowerCast(Cast);
Matt Walae3777672014-07-31 09:06:17 -07002917 ElementToInsertNotLegalized = Expanded;
Matt Wala49889232014-07-18 12:45:09 -07002918 }
2919
Matt Wala0a450512014-07-30 12:44:39 -07002920 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2921 // Use insertps, pinsrb, pinsrw, or pinsrd.
Matt Walae3777672014-07-31 09:06:17 -07002922 Operand *ElementRM =
2923 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2924 Operand *SourceVectRM =
2925 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07002926 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002927 _movp(T, SourceVectRM);
Matt Wala0a450512014-07-30 12:44:39 -07002928 if (Ty == IceType_v4f32)
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002929 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Matt Wala0a450512014-07-30 12:44:39 -07002930 else
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002931 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Matt Wala0a450512014-07-30 12:44:39 -07002932 _movp(Inst->getDest(), T);
2933 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2934 // Use shufps or movss.
Jim Stichnothae953202014-12-20 06:17:49 -08002935 Variable *ElementR = nullptr;
Matt Walae3777672014-07-31 09:06:17 -07002936 Operand *SourceVectRM =
2937 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2938
Matt Wala49889232014-07-18 12:45:09 -07002939 if (InVectorElementTy == IceType_f32) {
Matt Walae3777672014-07-31 09:06:17 -07002940 // ElementR will be in an XMM register since it is floating point.
2941 ElementR = legalizeToVar(ElementToInsertNotLegalized);
Matt Wala49889232014-07-18 12:45:09 -07002942 } else {
2943 // Copy an integer to an XMM register.
Matt Walae3777672014-07-31 09:06:17 -07002944 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2945 ElementR = makeReg(Ty);
2946 _movd(ElementR, T);
Matt Wala49889232014-07-18 12:45:09 -07002947 }
2948
Matt Walacfe51462014-07-25 15:57:56 -07002949 if (Index == 0) {
2950 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002951 _movp(T, SourceVectRM);
2952 _movss(T, ElementR);
Matt Walacfe51462014-07-25 15:57:56 -07002953 _movp(Inst->getDest(), T);
2954 return;
2955 }
2956
Matt Wala49889232014-07-18 12:45:09 -07002957 // shufps treats the source and desination operands as vectors of
2958 // four doublewords. The destination's two high doublewords are
2959 // selected from the source operand and the two low doublewords are
2960 // selected from the (original value of) the destination operand.
2961 // An insertelement operation can be effected with a sequence of two
2962 // shufps operations with appropriate masks. In all cases below,
2963 // Element[0] is being inserted into SourceVectOperand. Indices are
2964 // ordered from left to right.
2965 //
Matt Walae3777672014-07-31 09:06:17 -07002966 // insertelement into index 1 (result is stored in ElementR):
2967 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2968 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
Matt Wala49889232014-07-18 12:45:09 -07002969 //
2970 // insertelement into index 2 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002971 // T := SourceVectRM
2972 // ElementR := ElementR[0, 0] T[0, 3]
2973 // T := T[0, 1] ElementR[0, 3]
Matt Wala49889232014-07-18 12:45:09 -07002974 //
2975 // insertelement into index 3 (result is stored in T):
Matt Walae3777672014-07-31 09:06:17 -07002976 // T := SourceVectRM
2977 // ElementR := ElementR[0, 0] T[0, 2]
2978 // T := T[0, 1] ElementR[3, 0]
Jim Stichnothdd842db2015-01-27 12:53:53 -08002979 const unsigned char Mask1[3] = {0, 192, 128};
2980 const unsigned char Mask2[3] = {227, 196, 52};
Matt Wala49889232014-07-18 12:45:09 -07002981
Jim Stichnothd2cb4362014-11-20 11:24:42 -08002982 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2983 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
Matt Wala49889232014-07-18 12:45:09 -07002984
Matt Walacfe51462014-07-25 15:57:56 -07002985 if (Index == 1) {
Matt Walae3777672014-07-31 09:06:17 -07002986 _shufps(ElementR, SourceVectRM, Mask1Constant);
2987 _shufps(ElementR, SourceVectRM, Mask2Constant);
2988 _movp(Inst->getDest(), ElementR);
Matt Wala49889232014-07-18 12:45:09 -07002989 } else {
2990 Variable *T = makeReg(Ty);
Matt Walae3777672014-07-31 09:06:17 -07002991 _movp(T, SourceVectRM);
2992 _shufps(ElementR, T, Mask1Constant);
2993 _shufps(T, ElementR, Mask2Constant);
Matt Wala49889232014-07-18 12:45:09 -07002994 _movp(Inst->getDest(), T);
2995 }
Matt Wala49889232014-07-18 12:45:09 -07002996 } else {
2997 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2998 // Spill the value to a stack slot and perform the insertion in
2999 // memory.
Matt Wala49889232014-07-18 12:45:09 -07003000 //
Matt Walae3777672014-07-31 09:06:17 -07003001 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
Matt Wala49889232014-07-18 12:45:09 -07003002 // support for legalizing to mem is implemented.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003003 Variable *Slot = Func->makeVariable(Ty);
Matt Wala49889232014-07-18 12:45:09 -07003004 Slot->setWeight(RegWeight::Zero);
Matt Walae3777672014-07-31 09:06:17 -07003005 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
Matt Wala49889232014-07-18 12:45:09 -07003006
3007 // Compute the location of the position to insert in memory.
3008 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3009 OperandX8632Mem *Loc =
3010 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Matt Walae3777672014-07-31 09:06:17 -07003011 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
Matt Wala49889232014-07-18 12:45:09 -07003012
3013 Variable *T = makeReg(Ty);
3014 _movp(T, Slot);
3015 _movp(Inst->getDest(), T);
3016 }
3017}
3018
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003019void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003020 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003021 case Intrinsics::AtomicCmpxchg: {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003022 if (!Intrinsics::isMemoryOrderValid(
3023 ID, getConstantMemoryOrder(Instr->getArg(3)),
3024 getConstantMemoryOrder(Instr->getArg(4)))) {
3025 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
Jan Voung5cd240d2014-06-25 10:36:46 -07003026 return;
3027 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003028 Variable *DestPrev = Instr->getDest();
3029 Operand *PtrToMem = Instr->getArg(0);
3030 Operand *Expected = Instr->getArg(1);
3031 Operand *Desired = Instr->getArg(2);
Jan Voungc820ddf2014-07-29 14:38:51 -07003032 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3033 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07003034 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
Jan Voung5cd240d2014-06-25 10:36:46 -07003035 return;
Jan Vounga3a01a22014-07-14 10:32:41 -07003036 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003037 case Intrinsics::AtomicFence:
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003038 if (!Intrinsics::isMemoryOrderValid(
3039 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003040 Func->setError("Unexpected memory ordering for AtomicFence");
3041 return;
3042 }
3043 _mfence();
3044 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003045 case Intrinsics::AtomicFenceAll:
Jan Voung5cd240d2014-06-25 10:36:46 -07003046 // NOTE: FenceAll should prevent and load/store from being moved
3047 // across the fence (both atomic and non-atomic). The InstX8632Mfence
3048 // instruction is currently marked coarsely as "HasSideEffects".
3049 _mfence();
3050 return;
3051 case Intrinsics::AtomicIsLockFree: {
3052 // X86 is always lock free for 8/16/32/64 bit accesses.
3053 // TODO(jvoung): Since the result is constant when given a constant
3054 // byte size, this opens up DCE opportunities.
3055 Operand *ByteSize = Instr->getArg(0);
3056 Variable *Dest = Instr->getDest();
Jan Voungbc004632014-09-16 15:09:10 -07003057 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003058 Constant *Result;
3059 switch (CI->getValue()) {
3060 default:
3061 // Some x86-64 processors support the cmpxchg16b intruction, which
3062 // can make 16-byte operations lock free (when used with the LOCK
3063 // prefix). However, that's not supported in 32-bit mode, so just
3064 // return 0 even for large sizes.
3065 Result = Ctx->getConstantZero(IceType_i32);
3066 break;
3067 case 1:
3068 case 2:
3069 case 4:
3070 case 8:
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003071 Result = Ctx->getConstantInt32(1);
Jan Voung5cd240d2014-06-25 10:36:46 -07003072 break;
3073 }
3074 _mov(Dest, Result);
3075 return;
3076 }
3077 // The PNaCl ABI requires the byte size to be a compile-time constant.
3078 Func->setError("AtomicIsLockFree byte size should be compile-time const");
3079 return;
3080 }
3081 case Intrinsics::AtomicLoad: {
3082 // We require the memory address to be naturally aligned.
3083 // Given that is the case, then normal loads are atomic.
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003084 if (!Intrinsics::isMemoryOrderValid(
3085 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003086 Func->setError("Unexpected memory ordering for AtomicLoad");
3087 return;
3088 }
3089 Variable *Dest = Instr->getDest();
3090 if (Dest->getType() == IceType_i64) {
3091 // Follow what GCC does and use a movq instead of what lowerLoad()
3092 // normally does (split the load into two).
3093 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
3094 // can't happen anyway, since this is x86-32 and integer arithmetic only
3095 // happens on 32-bit quantities.
3096 Variable *T = makeReg(IceType_f64);
Jan Voungbefd03a2015-06-02 11:03:03 -07003097 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
Jan Voung5cd240d2014-06-25 10:36:46 -07003098 _movq(T, Addr);
3099 // Then cast the bits back out of the XMM register to the i64 Dest.
3100 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3101 lowerCast(Cast);
Jan Vounge6e497d2014-07-30 10:06:03 -07003102 // Make sure that the atomic load isn't elided when unused.
Jan Voung5cd240d2014-06-25 10:36:46 -07003103 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
3104 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
3105 return;
3106 }
3107 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
3108 lowerLoad(Load);
Jan Vounge6e497d2014-07-30 10:06:03 -07003109 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
3110 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
3111 // insert the FakeUse on the last-inserted instruction's dest.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003112 Context.insert(
3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
Jan Voung5cd240d2014-06-25 10:36:46 -07003114 return;
3115 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003116 case Intrinsics::AtomicRMW:
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003117 if (!Intrinsics::isMemoryOrderValid(
3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003119 Func->setError("Unexpected memory ordering for AtomicRMW");
3120 return;
3121 }
John Portof8b4cc82015-06-09 18:06:19 -07003122 lowerAtomicRMW(Instr->getDest(),
3123 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
3124 Instr->getArg(0))->getValue()),
3125 Instr->getArg(1), Instr->getArg(2));
Jan Voung5cd240d2014-06-25 10:36:46 -07003126 return;
3127 case Intrinsics::AtomicStore: {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07003128 if (!Intrinsics::isMemoryOrderValid(
3129 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
Jan Voung5cd240d2014-06-25 10:36:46 -07003130 Func->setError("Unexpected memory ordering for AtomicStore");
3131 return;
3132 }
3133 // We require the memory address to be naturally aligned.
3134 // Given that is the case, then normal stores are atomic.
3135 // Add a fence after the store to make it visible.
3136 Operand *Value = Instr->getArg(0);
3137 Operand *Ptr = Instr->getArg(1);
3138 if (Value->getType() == IceType_i64) {
3139 // Use a movq instead of what lowerStore() normally does
3140 // (split the store into two), following what GCC does.
3141 // Cast the bits from int -> to an xmm register first.
3142 Variable *T = makeReg(IceType_f64);
3143 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3144 lowerCast(Cast);
3145 // Then store XMM w/ a movq.
Jan Voungbefd03a2015-06-02 11:03:03 -07003146 OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
Jan Voung5cd240d2014-06-25 10:36:46 -07003147 _storeq(T, Addr);
3148 _mfence();
3149 return;
3150 }
3151 InstStore *Store = InstStore::create(Func, Value, Ptr);
3152 lowerStore(Store);
3153 _mfence();
3154 return;
3155 }
Jan Voung7fa813b2014-07-18 13:01:08 -07003156 case Intrinsics::Bswap: {
3157 Variable *Dest = Instr->getDest();
3158 Operand *Val = Instr->getArg(0);
3159 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3160 // argument must be a register. Use rotate left for 16-bit bswap.
3161 if (Val->getType() == IceType_i64) {
3162 Variable *T_Lo = legalizeToVar(loOperand(Val));
3163 Variable *T_Hi = legalizeToVar(hiOperand(Val));
3164 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3165 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3166 _bswap(T_Lo);
3167 _bswap(T_Hi);
3168 _mov(DestLo, T_Hi);
3169 _mov(DestHi, T_Lo);
3170 } else if (Val->getType() == IceType_i32) {
3171 Variable *T = legalizeToVar(Val);
3172 _bswap(T);
3173 _mov(Dest, T);
3174 } else {
3175 assert(Val->getType() == IceType_i16);
3176 Val = legalize(Val);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003177 Constant *Eight = Ctx->getConstantInt16(8);
Jim Stichnothae953202014-12-20 06:17:49 -08003178 Variable *T = nullptr;
Jan Voung7fa813b2014-07-18 13:01:08 -07003179 _mov(T, Val);
3180 _rol(T, Eight);
3181 _mov(Dest, T);
3182 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003183 return;
Jan Voung7fa813b2014-07-18 13:01:08 -07003184 }
Jan Vounge4da26f2014-07-15 17:52:39 -07003185 case Intrinsics::Ctpop: {
3186 Variable *Dest = Instr->getDest();
3187 Operand *Val = Instr->getArg(0);
Jim Stichnothc4508792015-03-01 23:12:55 -08003188 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
3189 ? H_call_ctpop_i32
3190 : H_call_ctpop_i64,
3191 Dest, 1);
Jan Vounge4da26f2014-07-15 17:52:39 -07003192 Call->addArg(Val);
3193 lowerCall(Call);
3194 // The popcount helpers always return 32-bit values, while the intrinsic's
3195 // signature matches the native POPCNT instruction and fills a 64-bit reg
3196 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3197 // the user doesn't do that in the IR. If the user does that in the IR,
3198 // then this zero'ing instruction is dead and gets optimized out.
3199 if (Val->getType() == IceType_i64) {
3200 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3201 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3202 _mov(DestHi, Zero);
3203 }
3204 return;
3205 }
3206 case Intrinsics::Ctlz: {
3207 // The "is zero undef" parameter is ignored and we always return
3208 // a well-defined value.
3209 Operand *Val = legalize(Instr->getArg(0));
3210 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003211 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003212 if (Val->getType() == IceType_i64) {
3213 FirstVal = loOperand(Val);
3214 SecondVal = hiOperand(Val);
3215 } else {
3216 FirstVal = Val;
3217 }
3218 const bool IsCttz = false;
3219 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3220 SecondVal);
3221 return;
3222 }
3223 case Intrinsics::Cttz: {
3224 // The "is zero undef" parameter is ignored and we always return
3225 // a well-defined value.
3226 Operand *Val = legalize(Instr->getArg(0));
3227 Operand *FirstVal;
Jim Stichnothae953202014-12-20 06:17:49 -08003228 Operand *SecondVal = nullptr;
Jan Vounge4da26f2014-07-15 17:52:39 -07003229 if (Val->getType() == IceType_i64) {
3230 FirstVal = hiOperand(Val);
3231 SecondVal = loOperand(Val);
3232 } else {
3233 FirstVal = Val;
3234 }
3235 const bool IsCttz = true;
3236 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3237 SecondVal);
3238 return;
3239 }
Jim Stichnoth8c980d02015-03-19 13:01:50 -07003240 case Intrinsics::Fabs: {
3241 Operand *Src = legalize(Instr->getArg(0));
3242 Type Ty = Src->getType();
3243 Variable *Dest = Instr->getDest();
3244 Variable *T = makeVectorOfFabsMask(Ty);
Jim Stichnoth5dfdf5f2015-03-20 12:56:07 -07003245 // The pand instruction operates on an m128 memory operand, so if
3246 // Src is an f32 or f64, we need to make sure it's in a register.
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003247 if (isVectorType(Ty)) {
3248 if (llvm::isa<OperandX8632Mem>(Src))
3249 Src = legalizeToVar(Src);
3250 } else {
Jim Stichnoth5dfdf5f2015-03-20 12:56:07 -07003251 Src = legalizeToVar(Src);
Jim Stichnothf79d2cb2015-03-23 15:10:54 -07003252 }
Jim Stichnoth8c980d02015-03-19 13:01:50 -07003253 _pand(T, Src);
3254 if (isVectorType(Ty))
3255 _movp(Dest, T);
3256 else
3257 _mov(Dest, T);
3258 return;
3259 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003260 case Intrinsics::Longjmp: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003261 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003262 Call->addArg(Instr->getArg(0));
3263 Call->addArg(Instr->getArg(1));
3264 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003265 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003266 }
3267 case Intrinsics::Memcpy: {
3268 // In the future, we could potentially emit an inline memcpy/memset, etc.
3269 // for intrinsic calls w/ a known length.
Jim Stichnothc4508792015-03-01 23:12:55 -08003270 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003271 Call->addArg(Instr->getArg(0));
3272 Call->addArg(Instr->getArg(1));
3273 Call->addArg(Instr->getArg(2));
3274 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003275 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003276 }
3277 case Intrinsics::Memmove: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003278 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003279 Call->addArg(Instr->getArg(0));
3280 Call->addArg(Instr->getArg(1));
3281 Call->addArg(Instr->getArg(2));
3282 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003283 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003284 }
3285 case Intrinsics::Memset: {
3286 // The value operand needs to be extended to a stack slot size
Matt Wala105b7042014-08-11 19:56:19 -07003287 // because the PNaCl ABI requires arguments to be at least 32 bits
3288 // wide.
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003289 Operand *ValOp = Instr->getArg(1);
3290 assert(ValOp->getType() == IceType_i8);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003291 Variable *ValExt = Func->makeVariable(stackSlotType());
Jan Voung957c50d2014-07-21 14:05:29 -07003292 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jim Stichnothc4508792015-03-01 23:12:55 -08003293 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003294 Call->addArg(Instr->getArg(0));
3295 Call->addArg(ValExt);
3296 Call->addArg(Instr->getArg(2));
3297 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003298 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003299 }
3300 case Intrinsics::NaClReadTP: {
Karl Schimpfdf80eb82015-02-09 14:20:22 -08003301 if (Ctx->getFlags().getUseSandboxing()) {
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003302 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnothae953202014-12-20 06:17:49 -08003303 Operand *Src =
3304 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
3305 OperandX8632Mem::SegReg_GS);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003306 Variable *Dest = Instr->getDest();
Jim Stichnothae953202014-12-20 06:17:49 -08003307 Variable *T = nullptr;
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003308 _mov(T, Src);
3309 _mov(Dest, T);
3310 } else {
Jim Stichnothc4508792015-03-01 23:12:55 -08003311 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
Jim Stichnothbfb03e52014-08-26 10:29:05 -07003312 lowerCall(Call);
3313 }
Jan Voung5cd240d2014-06-25 10:36:46 -07003314 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003315 }
3316 case Intrinsics::Setjmp: {
Jim Stichnothc4508792015-03-01 23:12:55 -08003317 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003318 Call->addArg(Instr->getArg(0));
3319 lowerCall(Call);
Jan Voung5cd240d2014-06-25 10:36:46 -07003320 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003321 }
Jan Voungf37fbbe2014-07-09 16:13:13 -07003322 case Intrinsics::Sqrt: {
3323 Operand *Src = legalize(Instr->getArg(0));
3324 Variable *Dest = Instr->getDest();
3325 Variable *T = makeReg(Dest->getType());
3326 _sqrtss(T, Src);
3327 _mov(Dest, T);
3328 return;
3329 }
Jan Voung7b34b592014-07-18 13:12:58 -07003330 case Intrinsics::Stacksave: {
Jan Voungbd385e42014-09-18 18:18:10 -07003331 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jan Voung7b34b592014-07-18 13:12:58 -07003332 Variable *Dest = Instr->getDest();
3333 _mov(Dest, esp);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003334 return;
Jan Voung7b34b592014-07-18 13:12:58 -07003335 }
3336 case Intrinsics::Stackrestore: {
Jan Voungbd385e42014-09-18 18:18:10 -07003337 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth47752552014-10-13 17:15:08 -07003338 _mov_nonkillable(esp, Instr->getArg(0));
Jan Voung7b34b592014-07-18 13:12:58 -07003339 return;
3340 }
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003341 case Intrinsics::Trap:
3342 _ud2();
Jan Voung5cd240d2014-06-25 10:36:46 -07003343 return;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003344 case Intrinsics::UnknownIntrinsic:
3345 Func->setError("Should not be lowering UnknownIntrinsic");
3346 return;
3347 }
3348 return;
3349}
3350
Jan Vounga3a01a22014-07-14 10:32:41 -07003351void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3352 Operand *Expected, Operand *Desired) {
3353 if (Expected->getType() == IceType_i64) {
3354 // Reserve the pre-colored registers first, before adding any more
Jan Voungbefd03a2015-06-02 11:03:03 -07003355 // infinite-weight variables from formMemoryOperand's legalization.
Jan Voungbd385e42014-09-18 18:18:10 -07003356 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3357 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3358 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3359 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003360 _mov(T_eax, loOperand(Expected));
3361 _mov(T_edx, hiOperand(Expected));
3362 _mov(T_ebx, loOperand(Desired));
3363 _mov(T_ecx, hiOperand(Desired));
Jan Voungbefd03a2015-06-02 11:03:03 -07003364 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003365 const bool Locked = true;
3366 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3367 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3368 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3369 _mov(DestLo, T_eax);
3370 _mov(DestHi, T_edx);
3371 return;
3372 }
Jan Voungbd385e42014-09-18 18:18:10 -07003373 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003374 _mov(T_eax, Expected);
Jan Voungbefd03a2015-06-02 11:03:03 -07003375 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003376 Variable *DesiredReg = legalizeToVar(Desired);
3377 const bool Locked = true;
3378 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3379 _mov(DestPrev, T_eax);
3380}
3381
Jan Voungc820ddf2014-07-29 14:38:51 -07003382bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3383 Operand *Expected,
3384 Operand *Desired) {
Jan Voung1f47ad02015-03-20 15:01:26 -07003385 if (Ctx->getFlags().getOptLevel() == Opt_m1)
Jan Voungc820ddf2014-07-29 14:38:51 -07003386 return false;
3387 // Peek ahead a few instructions and see how Dest is used.
3388 // It's very common to have:
3389 //
3390 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3391 // [%y_phi = ...] // list of phi stores
3392 // %p = icmp eq i32 %x, %expected
3393 // br i1 %p, label %l1, label %l2
3394 //
3395 // which we can optimize into:
3396 //
3397 // %x = <cmpxchg code>
3398 // [%y_phi = ...] // list of phi stores
3399 // br eq, %l1, %l2
3400 InstList::iterator I = Context.getCur();
3401 // I is currently the InstIntrinsicCall. Peek past that.
3402 // This assumes that the atomic cmpxchg has not been lowered yet,
3403 // so that the instructions seen in the scan from "Cur" is simple.
3404 assert(llvm::isa<InstIntrinsicCall>(*I));
3405 Inst *NextInst = Context.getNextInst(I);
3406 if (!NextInst)
3407 return false;
3408 // There might be phi assignments right before the compare+branch, since this
3409 // could be a backward branch for a loop. This placement of assignments is
3410 // determined by placePhiStores().
3411 std::vector<InstAssign *> PhiAssigns;
3412 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3413 if (PhiAssign->getDest() == Dest)
3414 return false;
3415 PhiAssigns.push_back(PhiAssign);
3416 NextInst = Context.getNextInst(I);
3417 if (!NextInst)
3418 return false;
3419 }
3420 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3421 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3422 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3423 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3424 return false;
3425 }
3426 NextInst = Context.getNextInst(I);
3427 if (!NextInst)
3428 return false;
3429 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3430 if (!NextBr->isUnconditional() &&
3431 NextCmp->getDest() == NextBr->getCondition() &&
3432 NextBr->isLastUse(NextCmp->getDest())) {
3433 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3434 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3435 // Lower the phi assignments now, before the branch (same placement
3436 // as before).
3437 InstAssign *PhiAssign = PhiAssigns[i];
Jan Voungc820ddf2014-07-29 14:38:51 -07003438 PhiAssign->setDeleted();
Jim Stichnoth89d79562014-08-27 13:50:03 -07003439 lowerAssign(PhiAssign);
Jan Voungc820ddf2014-07-29 14:38:51 -07003440 Context.advanceNext();
3441 }
Jan Voungbd385e42014-09-18 18:18:10 -07003442 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
Jan Voungc820ddf2014-07-29 14:38:51 -07003443 // Skip over the old compare and branch, by deleting them.
3444 NextCmp->setDeleted();
3445 NextBr->setDeleted();
3446 Context.advanceNext();
3447 Context.advanceNext();
3448 return true;
3449 }
3450 }
3451 }
3452 return false;
3453}
3454
Jan Voung5cd240d2014-06-25 10:36:46 -07003455void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3456 Operand *Ptr, Operand *Val) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003457 bool NeedsCmpxchg = false;
Jim Stichnothae953202014-12-20 06:17:49 -08003458 LowerBinOp Op_Lo = nullptr;
3459 LowerBinOp Op_Hi = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003460 switch (Operation) {
3461 default:
3462 Func->setError("Unknown AtomicRMW operation");
3463 return;
3464 case Intrinsics::AtomicAdd: {
3465 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003466 // All the fall-through paths must set this to true, but use this
3467 // for asserting.
3468 NeedsCmpxchg = true;
3469 Op_Lo = &TargetX8632::_add;
3470 Op_Hi = &TargetX8632::_adc;
3471 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003472 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003473 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jan Voung5cd240d2014-06-25 10:36:46 -07003474 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003475 Variable *T = nullptr;
Jan Voung5cd240d2014-06-25 10:36:46 -07003476 _mov(T, Val);
3477 _xadd(Addr, T, Locked);
3478 _mov(Dest, T);
3479 return;
3480 }
3481 case Intrinsics::AtomicSub: {
3482 if (Dest->getType() == IceType_i64) {
Jan Vounga3a01a22014-07-14 10:32:41 -07003483 NeedsCmpxchg = true;
3484 Op_Lo = &TargetX8632::_sub;
3485 Op_Hi = &TargetX8632::_sbb;
3486 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003487 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003488 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jan Vounga3a01a22014-07-14 10:32:41 -07003489 const bool Locked = true;
Jim Stichnothae953202014-12-20 06:17:49 -08003490 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003491 _mov(T, Val);
3492 _neg(T);
3493 _xadd(Addr, T, Locked);
3494 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003495 return;
3496 }
3497 case Intrinsics::AtomicOr:
Jan Vounga3a01a22014-07-14 10:32:41 -07003498 // TODO(jvoung): If Dest is null or dead, then some of these
3499 // operations do not need an "exchange", but just a locked op.
3500 // That appears to be "worth" it for sub, or, and, and xor.
3501 // xadd is probably fine vs lock add for add, and xchg is fine
3502 // vs an atomic store.
3503 NeedsCmpxchg = true;
3504 Op_Lo = &TargetX8632::_or;
3505 Op_Hi = &TargetX8632::_or;
3506 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003507 case Intrinsics::AtomicAnd:
Jan Vounga3a01a22014-07-14 10:32:41 -07003508 NeedsCmpxchg = true;
3509 Op_Lo = &TargetX8632::_and;
3510 Op_Hi = &TargetX8632::_and;
3511 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003512 case Intrinsics::AtomicXor:
Jan Vounga3a01a22014-07-14 10:32:41 -07003513 NeedsCmpxchg = true;
3514 Op_Lo = &TargetX8632::_xor;
3515 Op_Hi = &TargetX8632::_xor;
3516 break;
Jan Voung5cd240d2014-06-25 10:36:46 -07003517 case Intrinsics::AtomicExchange:
Jan Vounga3a01a22014-07-14 10:32:41 -07003518 if (Dest->getType() == IceType_i64) {
3519 NeedsCmpxchg = true;
3520 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3521 // just need to be moved to the ecx and ebx registers.
Jim Stichnothae953202014-12-20 06:17:49 -08003522 Op_Lo = nullptr;
3523 Op_Hi = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003524 break;
3525 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003526 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnothae953202014-12-20 06:17:49 -08003527 Variable *T = nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003528 _mov(T, Val);
3529 _xchg(Addr, T);
3530 _mov(Dest, T);
Jan Voung5cd240d2014-06-25 10:36:46 -07003531 return;
3532 }
Jan Vounga3a01a22014-07-14 10:32:41 -07003533 // Otherwise, we need a cmpxchg loop.
Jim Stichnoth6e992142014-07-30 14:45:20 -07003534 (void)NeedsCmpxchg;
Jan Vounga3a01a22014-07-14 10:32:41 -07003535 assert(NeedsCmpxchg);
3536 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3537}
3538
3539void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
3540 Variable *Dest, Operand *Ptr,
3541 Operand *Val) {
3542 // Expand a more complex RMW operation as a cmpxchg loop:
3543 // For 64-bit:
3544 // mov eax, [ptr]
3545 // mov edx, [ptr + 4]
3546 // .LABEL:
3547 // mov ebx, eax
3548 // <Op_Lo> ebx, <desired_adj_lo>
3549 // mov ecx, edx
3550 // <Op_Hi> ecx, <desired_adj_hi>
3551 // lock cmpxchg8b [ptr]
3552 // jne .LABEL
3553 // mov <dest_lo>, eax
3554 // mov <dest_lo>, edx
3555 //
3556 // For 32-bit:
3557 // mov eax, [ptr]
3558 // .LABEL:
3559 // mov <reg>, eax
3560 // op <reg>, [desired_adj]
3561 // lock cmpxchg [ptr], <reg>
3562 // jne .LABEL
3563 // mov <dest>, eax
3564 //
Jim Stichnothae953202014-12-20 06:17:49 -08003565 // If Op_{Lo,Hi} are nullptr, then just copy the value.
Jan Vounga3a01a22014-07-14 10:32:41 -07003566 Val = legalize(Val);
3567 Type Ty = Val->getType();
3568 if (Ty == IceType_i64) {
Jan Voungbd385e42014-09-18 18:18:10 -07003569 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3570 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
Jan Voungbefd03a2015-06-02 11:03:03 -07003571 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
Jan Vounga3a01a22014-07-14 10:32:41 -07003572 _mov(T_eax, loOperand(Addr));
3573 _mov(T_edx, hiOperand(Addr));
Jan Voungbd385e42014-09-18 18:18:10 -07003574 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3575 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
Jan Vounga3a01a22014-07-14 10:32:41 -07003576 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothae953202014-12-20 06:17:49 -08003577 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
Jan Vounga3a01a22014-07-14 10:32:41 -07003578 if (!IsXchg8b) {
3579 Context.insert(Label);
3580 _mov(T_ebx, T_eax);
3581 (this->*Op_Lo)(T_ebx, loOperand(Val));
3582 _mov(T_ecx, T_edx);
3583 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3584 } else {
3585 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3586 // It just needs the Val loaded into ebx and ecx.
3587 // That can also be done before the loop.
3588 _mov(T_ebx, loOperand(Val));
3589 _mov(T_ecx, hiOperand(Val));
3590 Context.insert(Label);
3591 }
3592 const bool Locked = true;
3593 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003594 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003595 if (!IsXchg8b) {
3596 // If Val is a variable, model the extended live range of Val through
3597 // the end of the loop, since it will be re-used by the loop.
3598 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3599 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3600 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3601 Context.insert(InstFakeUse::create(Func, ValLo));
3602 Context.insert(InstFakeUse::create(Func, ValHi));
3603 }
3604 } else {
3605 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3606 Context.insert(InstFakeUse::create(Func, T_ebx));
3607 Context.insert(InstFakeUse::create(Func, T_ecx));
3608 }
Jim Stichnoth27c56bf2015-03-23 10:29:58 -07003609 // The address base (if any) is also reused in the loop.
3610 if (Variable *Base = Addr->getBase())
3611 Context.insert(InstFakeUse::create(Func, Base));
Jan Vounga3a01a22014-07-14 10:32:41 -07003612 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3613 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3614 _mov(DestLo, T_eax);
3615 _mov(DestHi, T_edx);
3616 return;
3617 }
Jan Voungbefd03a2015-06-02 11:03:03 -07003618 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
Jan Voungbd385e42014-09-18 18:18:10 -07003619 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
Jan Vounga3a01a22014-07-14 10:32:41 -07003620 _mov(T_eax, Addr);
3621 InstX8632Label *Label = InstX8632Label::create(Func, this);
3622 Context.insert(Label);
3623 // We want to pick a different register for T than Eax, so don't use
Jim Stichnothae953202014-12-20 06:17:49 -08003624 // _mov(T == nullptr, T_eax).
Jan Vounga3a01a22014-07-14 10:32:41 -07003625 Variable *T = makeReg(Ty);
3626 _mov(T, T_eax);
3627 (this->*Op_Lo)(T, Val);
3628 const bool Locked = true;
3629 _cmpxchg(Addr, T_eax, T, Locked);
Jan Voungbd385e42014-09-18 18:18:10 -07003630 _br(CondX86::Br_ne, Label);
Jan Vounga3a01a22014-07-14 10:32:41 -07003631 // If Val is a variable, model the extended live range of Val through
3632 // the end of the loop, since it will be re-used by the loop.
3633 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3634 Context.insert(InstFakeUse::create(Func, ValVar));
3635 }
Jim Stichnoth27c56bf2015-03-23 10:29:58 -07003636 // The address base (if any) is also reused in the loop.
3637 if (Variable *Base = Addr->getBase())
3638 Context.insert(InstFakeUse::create(Func, Base));
Jan Vounga3a01a22014-07-14 10:32:41 -07003639 _mov(Dest, T_eax);
Jan Voung5cd240d2014-06-25 10:36:46 -07003640}
3641
Jan Vounge4da26f2014-07-15 17:52:39 -07003642// Lowers count {trailing, leading} zeros intrinsic.
3643//
3644// We could do constant folding here, but that should have
3645// been done by the front-end/middle-end optimizations.
3646void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3647 Operand *FirstVal, Operand *SecondVal) {
3648 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3649 // Then the instructions will handle the Val == 0 case much more simply
3650 // and won't require conversion from bit position to number of zeros.
3651 //
3652 // Otherwise:
3653 // bsr IF_NOT_ZERO, Val
3654 // mov T_DEST, 63
3655 // cmovne T_DEST, IF_NOT_ZERO
3656 // xor T_DEST, 31
3657 // mov DEST, T_DEST
3658 //
3659 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3660 // register. Also, bsf and bsr require their dest to be a register.
3661 //
3662 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3663 // E.g., for 000... 00001100, bsr will say that the most significant bit
3664 // set is at position 3, while the number of leading zeros is 28. Xor is
3665 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3666 //
3667 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3668 // are all zero, and compute the result for that case (checking the lower
3669 // 32 bits). Then actually compute the result for the upper bits and
3670 // cmov in the result from the lower computation if the earlier speculation
3671 // was correct.
3672 //
3673 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3674 // bit position conversion, and the speculation is reversed.
3675 assert(Ty == IceType_i32 || Ty == IceType_i64);
3676 Variable *T = makeReg(IceType_i32);
Matt Wala53c5e602014-07-22 16:39:38 -07003677 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
Jan Vounge4da26f2014-07-15 17:52:39 -07003678 if (Cttz) {
Matt Wala53c5e602014-07-22 16:39:38 -07003679 _bsf(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003680 } else {
Matt Wala53c5e602014-07-22 16:39:38 -07003681 _bsr(T, FirstValRM);
Jan Vounge4da26f2014-07-15 17:52:39 -07003682 }
3683 Variable *T_Dest = makeReg(IceType_i32);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003684 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3685 Constant *ThirtyOne = Ctx->getConstantInt32(31);
Jan Vounge4da26f2014-07-15 17:52:39 -07003686 if (Cttz) {
3687 _mov(T_Dest, ThirtyTwo);
3688 } else {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003689 Constant *SixtyThree = Ctx->getConstantInt32(63);
Jan Vounge4da26f2014-07-15 17:52:39 -07003690 _mov(T_Dest, SixtyThree);
3691 }
Jan Voungbd385e42014-09-18 18:18:10 -07003692 _cmov(T_Dest, T, CondX86::Br_ne);
Jan Vounge4da26f2014-07-15 17:52:39 -07003693 if (!Cttz) {
3694 _xor(T_Dest, ThirtyOne);
3695 }
3696 if (Ty == IceType_i32) {
3697 _mov(Dest, T_Dest);
3698 return;
3699 }
3700 _add(T_Dest, ThirtyTwo);
3701 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3702 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3703 // Will be using "test" on this, so we need a registerized variable.
3704 Variable *SecondVar = legalizeToVar(SecondVal);
3705 Variable *T_Dest2 = makeReg(IceType_i32);
3706 if (Cttz) {
3707 _bsf(T_Dest2, SecondVar);
3708 } else {
3709 _bsr(T_Dest2, SecondVar);
3710 _xor(T_Dest2, ThirtyOne);
3711 }
3712 _test(SecondVar, SecondVar);
Jan Voungbd385e42014-09-18 18:18:10 -07003713 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
Jan Vounge4da26f2014-07-15 17:52:39 -07003714 _mov(DestLo, T_Dest2);
3715 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3716}
3717
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003718namespace {
3719
3720bool isAdd(const Inst *Inst) {
3721 if (const InstArithmetic *Arith =
3722 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3723 return (Arith->getOp() == InstArithmetic::Add);
3724 }
3725 return false;
3726}
3727
Jim Stichnoth89d79562014-08-27 13:50:03 -07003728void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3729 const Variable *Index, uint16_t Shift, int32_t Offset,
3730 const Inst *Reason) {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08003731 if (!ALLOW_DUMP)
3732 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08003733 if (!Func->isVerbose(IceV_AddrOpt))
Jim Stichnoth89d79562014-08-27 13:50:03 -07003734 return;
Jim Stichnothe4a8f402015-01-20 12:52:51 -08003735 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07003736 Ostream &Str = Func->getContext()->getStrDump();
3737 Str << "Instruction: ";
3738 Reason->dumpDecorated(Func);
3739 Str << " results in Base=";
3740 if (Base)
3741 Base->dump(Func);
3742 else
3743 Str << "<null>";
3744 Str << ", Index=";
3745 if (Index)
3746 Index->dump(Func);
3747 else
3748 Str << "<null>";
3749 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3750}
3751
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003752bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
3753 const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003754 // Var originates from Var=SrcVar ==>
3755 // set Var:=SrcVar
Jim Stichnothae953202014-12-20 06:17:49 -08003756 if (Var == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003757 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003758 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3759 assert(!VMetadata->isMultiDef(Var));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003760 if (llvm::isa<InstAssign>(VarAssign)) {
3761 Operand *SrcOp = VarAssign->getSrc(0);
3762 assert(SrcOp);
3763 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003764 if (!VMetadata->isMultiDef(SrcVar) &&
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003765 // TODO: ensure SrcVar stays single-BB
3766 true) {
3767 Var = SrcVar;
3768 Reason = VarAssign;
3769 return true;
3770 }
3771 }
3772 }
3773 }
3774 return false;
3775}
3776
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003777bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
3778 Variable *&Index, uint16_t &Shift,
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003779 const Inst *&Reason) {
Jim Stichnothae953202014-12-20 06:17:49 -08003780 // Index==nullptr && Base is Base=Var1+Var2 ==>
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003781 // set Base=Var1, Index=Var2, Shift=0
Jim Stichnothae953202014-12-20 06:17:49 -08003782 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003783 return false;
Jim Stichnothae953202014-12-20 06:17:49 -08003784 if (Index != nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003785 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003786 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003787 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003788 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003789 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003790 if (BaseInst->getSrcSize() < 2)
3791 return false;
3792 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003793 if (VMetadata->isMultiDef(Var1))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003794 return false;
3795 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003796 if (VMetadata->isMultiDef(Var2))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003797 return false;
3798 if (isAdd(BaseInst) &&
3799 // TODO: ensure Var1 and Var2 stay single-BB
3800 true) {
3801 Base = Var1;
3802 Index = Var2;
3803 Shift = 0; // should already have been 0
3804 Reason = BaseInst;
3805 return true;
3806 }
3807 }
3808 }
3809 return false;
3810}
3811
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003812bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
3813 uint16_t &Shift, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003814 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3815 // Index=Var, Shift+=log2(Const)
Jim Stichnothae953202014-12-20 06:17:49 -08003816 if (Index == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003817 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003818 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
Jim Stichnothae953202014-12-20 06:17:49 -08003819 if (IndexInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003820 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003821 assert(!VMetadata->isMultiDef(Index));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003822 if (IndexInst->getSrcSize() < 2)
3823 return false;
3824 if (const InstArithmetic *ArithInst =
3825 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3826 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
Jan Voungbc004632014-09-16 15:09:10 -07003827 if (ConstantInteger32 *Const =
3828 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003829 if (ArithInst->getOp() == InstArithmetic::Mul &&
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003830 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003831 uint64_t Mult = Const->getValue();
3832 uint32_t LogMult;
3833 switch (Mult) {
3834 case 1:
3835 LogMult = 0;
3836 break;
3837 case 2:
3838 LogMult = 1;
3839 break;
3840 case 4:
3841 LogMult = 2;
3842 break;
3843 case 8:
3844 LogMult = 3;
3845 break;
3846 default:
3847 return false;
3848 }
3849 if (Shift + LogMult <= 3) {
3850 Index = Var;
3851 Shift += LogMult;
3852 Reason = IndexInst;
3853 return true;
3854 }
3855 }
3856 }
3857 }
3858 }
3859 return false;
3860}
3861
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003862bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3863 int32_t &Offset, const Inst *&Reason) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003864 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3865 // set Base=Var, Offset+=Const
3866 // Base is Base=Var-Const ==>
3867 // set Base=Var, Offset-=Const
Jim Stichnothae953202014-12-20 06:17:49 -08003868 if (Base == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003869 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003870 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
Jim Stichnothae953202014-12-20 06:17:49 -08003871 if (BaseInst == nullptr)
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003872 return false;
Jim Stichnothad403532014-09-25 12:44:17 -07003873 assert(!VMetadata->isMultiDef(Base));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003874 if (const InstArithmetic *ArithInst =
3875 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3876 if (ArithInst->getOp() != InstArithmetic::Add &&
3877 ArithInst->getOp() != InstArithmetic::Sub)
3878 return false;
3879 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
Jim Stichnothae953202014-12-20 06:17:49 -08003880 Variable *Var = nullptr;
3881 ConstantInteger32 *Const = nullptr;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003882 if (Variable *VariableOperand =
3883 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3884 Var = VariableOperand;
Jan Voungbc004632014-09-16 15:09:10 -07003885 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003886 } else if (IsAdd) {
Jan Voungbc004632014-09-16 15:09:10 -07003887 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003888 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3889 }
Jim Stichnothae953202014-12-20 06:17:49 -08003890 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003891 return false;
Jan Voungbc004632014-09-16 15:09:10 -07003892 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
Jan Voung8acded02014-09-22 18:02:25 -07003893 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
Jan Voungbc004632014-09-16 15:09:10 -07003894 return false;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003895 Base = Var;
Jan Voungbc004632014-09-16 15:09:10 -07003896 Offset += MoreOffset;
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003897 Reason = BaseInst;
3898 return true;
3899 }
3900 return false;
3901}
3902
Jim Stichnoth89d79562014-08-27 13:50:03 -07003903void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3904 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Jim Stichnoth800dab22014-09-20 12:25:02 -07003905 Func->resetCurrentNode();
Jim Stichnothfa4efea2015-01-27 05:06:03 -08003906 if (Func->isVerbose(IceV_AddrOpt)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08003907 OstreamLocker L(Func->getContext());
Jim Stichnoth89d79562014-08-27 13:50:03 -07003908 Ostream &Str = Func->getContext()->getStrDump();
3909 Str << "\nStarting computeAddressOpt for instruction:\n ";
3910 Instr->dumpDecorated(Func);
3911 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003912 (void)Offset; // TODO: pattern-match for non-zero offsets.
Jim Stichnothae953202014-12-20 06:17:49 -08003913 if (Base == nullptr)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003914 return;
3915 // If the Base has more than one use or is live across multiple
3916 // blocks, then don't go further. Alternatively (?), never consider
3917 // a transformation that would change a variable that is currently
3918 // *not* live across basic block boundaries into one that *is*.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003919 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003920 return;
3921
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003922 const VariablesMetadata *VMetadata = Func->getVMetadata();
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003923 bool Continue = true;
3924 while (Continue) {
Jim Stichnothae953202014-12-20 06:17:49 -08003925 const Inst *Reason = nullptr;
Jim Stichnoth144cdce2014-09-22 16:02:59 -07003926 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3927 matchTransitiveAssign(VMetadata, Index, Reason) ||
3928 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3929 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3930 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
Jim Stichnothe5ac7db2014-09-15 10:42:14 -07003931 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3932 } else {
3933 Continue = false;
Matt Wala8835b892014-08-11 17:46:58 -07003934 }
3935
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003936 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3937 // Index=Var, Shift+=Const
3938
3939 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3940 // Index=Var, Shift+=log2(Const)
3941
3942 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3943 // swap(Index,Base)
3944 // Similar for Base=Const*Var and Base=Var<<Const
3945
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003946 // Index is Index=Var+Const ==>
3947 // set Index=Var, Offset+=(Const<<Shift)
3948
3949 // Index is Index=Const+Var ==>
3950 // set Index=Var, Offset+=(Const<<Shift)
3951
3952 // Index is Index=Var-Const ==>
3953 // set Index=Var, Offset-=(Const<<Shift)
3954
3955 // TODO: consider overflow issues with respect to Offset.
3956 // TODO: handle symbolic constants.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003957 }
3958}
3959
3960} // anonymous namespace
3961
Jim Stichnothc77f8172015-05-31 23:34:44 -07003962void TargetX8632::lowerLoad(const InstLoad *Load) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003963 // A Load instruction can be treated the same as an Assign
3964 // instruction, after the source operand is transformed into an
3965 // OperandX8632Mem operand. Note that the address mode
3966 // optimization already creates an OperandX8632Mem operand, so it
3967 // doesn't need another level of transformation.
Jim Stichnothc77f8172015-05-31 23:34:44 -07003968 Variable *DestLoad = Load->getDest();
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07003969 Type Ty = DestLoad->getType();
3970 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothc77f8172015-05-31 23:34:44 -07003971 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07003972 lowerAssign(Assign);
3973}
3974
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003975void TargetX8632::doAddressOptLoad() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08003976 Inst *Inst = Context.getCur();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003977 Variable *Dest = Inst->getDest();
3978 Operand *Addr = Inst->getSrc(0);
Jim Stichnothae953202014-12-20 06:17:49 -08003979 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003980 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003981 int32_t Offset = 0; // TODO: make Constant
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003982 // Vanilla ICE load instructions should not use the segment registers,
3983 // and computeAddressOpt only works at the level of Variables and Constants,
3984 // not other OperandX8632Mem, so there should be no mention of segment
3985 // registers there either.
3986 const OperandX8632Mem::SegmentRegisters SegmentReg =
3987 OperandX8632Mem::DefaultSegment;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003988 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jim Stichnoth89d79562014-08-27 13:50:03 -07003989 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003990 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07003991 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08003992 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003993 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07003994 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07003995 Context.insert(InstLoad::create(Func, Dest, Addr));
3996 }
3997}
3998
Matt Walac3302742014-08-15 16:21:56 -07003999void TargetX8632::randomlyInsertNop(float Probability) {
4000 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4001 if (RNG.getTrueWithProbability(Probability)) {
Jim Stichnothe6d24782014-12-19 05:42:24 -08004002 _nop(RNG(X86_NUM_NOP_VARIANTS));
Matt Walac3302742014-08-15 16:21:56 -07004003 }
4004}
4005
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004006void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
4007 Func->setError("Phi found in regular instruction list");
4008}
4009
4010void TargetX8632::lowerRet(const InstRet *Inst) {
Jim Stichnothae953202014-12-20 06:17:49 -08004011 Variable *Reg = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004012 if (Inst->hasRetValue()) {
4013 Operand *Src0 = legalize(Inst->getRetValue());
4014 if (Src0->getType() == IceType_i64) {
Jim Stichnothad403532014-09-25 12:44:17 -07004015 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
4016 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004017 Reg = eax;
4018 Context.insert(InstFakeUse::create(Func, edx));
Jan Voung3a569182014-09-29 10:16:01 -07004019 } else if (isScalarFloatingType(Src0->getType())) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004020 _fld(Src0);
Matt Wala928f1292014-07-07 16:50:46 -07004021 } else if (isVectorType(Src0->getType())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004022 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004023 } else {
Jan Voungbd385e42014-09-18 18:18:10 -07004024 _mov(Reg, Src0, RegX8632::Reg_eax);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004025 }
4026 }
Jim Stichnoth9f42d8c2015-02-20 09:20:14 -08004027 // Add a ret instruction even if sandboxing is enabled, because
4028 // addEpilog explicitly looks for a ret instruction as a marker for
4029 // where to insert the frame removal instructions.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004030 _ret(Reg);
4031 // Add a fake use of esp to make sure esp stays alive for the entire
4032 // function. Otherwise post-call esp adjustments get dead-code
4033 // eliminated. TODO: Are there more places where the fake use
4034 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4035 // have a ret instruction.
Jan Voungbd385e42014-09-18 18:18:10 -07004036 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004037 Context.insert(InstFakeUse::create(Func, esp));
4038}
4039
4040void TargetX8632::lowerSelect(const InstSelect *Inst) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004041 Variable *Dest = Inst->getDest();
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004042 Type DestTy = Dest->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004043 Operand *SrcT = Inst->getTrueOperand();
4044 Operand *SrcF = Inst->getFalseOperand();
Matt Wala9cb61e22014-07-24 09:44:42 -07004045 Operand *Condition = Inst->getCondition();
4046
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004047 if (isVectorType(DestTy)) {
Matt Wala9cb61e22014-07-24 09:44:42 -07004048 Type SrcTy = SrcT->getType();
4049 Variable *T = makeReg(SrcTy);
Matt Walae3777672014-07-31 09:06:17 -07004050 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4051 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
Matt Wala0a450512014-07-30 12:44:39 -07004052 if (InstructionSet >= SSE4_1) {
4053 // TODO(wala): If the condition operand is a constant, use blendps
4054 // or pblendw.
4055 //
4056 // Use blendvps or pblendvb to implement select.
4057 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4058 SrcTy == IceType_v4f32) {
Matt Walae3777672014-07-31 09:06:17 -07004059 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
Jan Voungbd385e42014-09-18 18:18:10 -07004060 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
Matt Walae3777672014-07-31 09:06:17 -07004061 _movp(xmm0, ConditionRM);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004062 _psll(xmm0, Ctx->getConstantInt8(31));
Matt Walae3777672014-07-31 09:06:17 -07004063 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07004064 _blendvps(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004065 _movp(Dest, T);
4066 } else {
4067 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4068 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
Jim Stichnothfac55172014-10-01 13:06:21 -07004069 : IceType_v16i8;
Jan Voungbd385e42014-09-18 18:18:10 -07004070 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004071 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
Matt Walae3777672014-07-31 09:06:17 -07004072 _movp(T, SrcFRM);
Matt Walad4799f42014-08-14 14:24:12 -07004073 _pblendvb(T, SrcTRM, xmm0);
Matt Wala0a450512014-07-30 12:44:39 -07004074 _movp(Dest, T);
4075 }
4076 return;
4077 }
4078 // Lower select without SSE4.1:
4079 // a=d?b:c ==>
4080 // if elementtype(d) != i1:
4081 // d=sext(d);
4082 // a=(b&d)|(c&~d);
Matt Wala9cb61e22014-07-24 09:44:42 -07004083 Variable *T2 = makeReg(SrcTy);
4084 // Sign extend the condition operand if applicable.
4085 if (SrcTy == IceType_v4f32) {
4086 // The sext operation takes only integer arguments.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004087 Variable *T3 = Func->makeVariable(IceType_v4i32);
Matt Wala9cb61e22014-07-24 09:44:42 -07004088 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4089 _movp(T, T3);
4090 } else if (typeElementType(SrcTy) != IceType_i1) {
4091 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4092 } else {
Matt Walae3777672014-07-31 09:06:17 -07004093 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4094 _movp(T, ConditionRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07004095 }
Matt Wala9cb61e22014-07-24 09:44:42 -07004096 _movp(T2, T);
Matt Walad4799f42014-08-14 14:24:12 -07004097 _pand(T, SrcTRM);
4098 _pandn(T2, SrcFRM);
Matt Wala9cb61e22014-07-24 09:44:42 -07004099 _por(T, T2);
4100 _movp(Dest, T);
Matt Wala9cb61e22014-07-24 09:44:42 -07004101
4102 return;
4103 }
4104
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004105 CondX86::BrCond Cond = CondX86::Br_ne;
4106 Operand *CmpOpnd0 = nullptr;
4107 Operand *CmpOpnd1 = nullptr;
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004108 // Handle folding opportunities.
4109 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4110 assert(Producer->isDeleted());
4111 switch (BoolFolding::getProducerKind(Producer)) {
4112 default:
4113 break;
4114 case BoolFolding::PK_Icmp32: {
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004115 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004116 Cond = getIcmp32Mapping(Cmp->getCondition());
4117 CmpOpnd1 = legalize(Producer->getSrc(1));
4118 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4119 } break;
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004120 }
4121 }
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004122 if (CmpOpnd0 == nullptr) {
4123 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4124 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4125 }
4126 assert(CmpOpnd0);
4127 assert(CmpOpnd1);
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004128
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004129 _cmp(CmpOpnd0, CmpOpnd1);
4130 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4131 // The cmov instruction doesn't allow 8-bit or FP operands, so
4132 // we need explicit control flow.
4133 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4134 InstX8632Label *Label = InstX8632Label::create(Func, this);
Jim Stichnothad403532014-09-25 12:44:17 -07004135 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004136 _mov(Dest, SrcT);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004137 _br(Cond, Label);
Jim Stichnothad403532014-09-25 12:44:17 -07004138 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth47752552014-10-13 17:15:08 -07004139 _mov_nonkillable(Dest, SrcF);
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004140 Context.insert(Label);
4141 return;
4142 }
4143 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4144 // But if SrcT is immediate, we might be able to do better, as
4145 // the cmov instruction doesn't allow an immediate operand:
4146 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4147 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4148 std::swap(SrcT, SrcF);
4149 Cond = InstX8632::getOppositeCondition(Cond);
4150 }
4151 if (DestTy == IceType_i64) {
4152 // Set the low portion.
4153 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4154 Variable *TLo = nullptr;
4155 Operand *SrcFLo = legalize(loOperand(SrcF));
4156 _mov(TLo, SrcFLo);
4157 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4158 _cmov(TLo, SrcTLo, Cond);
4159 _mov(DestLo, TLo);
4160 // Set the high portion.
4161 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4162 Variable *THi = nullptr;
4163 Operand *SrcFHi = legalize(hiOperand(SrcF));
4164 _mov(THi, SrcFHi);
4165 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4166 _cmov(THi, SrcTHi, Cond);
4167 _mov(DestHi, THi);
4168 return;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004169 }
4170
Jim Stichnoth537b5ba2015-05-19 09:48:44 -07004171 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4172 Variable *T = nullptr;
4173 SrcF = legalize(SrcF);
4174 _mov(T, SrcF);
4175 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4176 _cmov(T, SrcT, Cond);
4177 _mov(Dest, T);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004178}
4179
4180void TargetX8632::lowerStore(const InstStore *Inst) {
4181 Operand *Value = Inst->getData();
4182 Operand *Addr = Inst->getAddr();
Jan Voungbefd03a2015-06-02 11:03:03 -07004183 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
Matt Wala105b7042014-08-11 19:56:19 -07004184 Type Ty = NewAddr->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004185
Matt Wala105b7042014-08-11 19:56:19 -07004186 if (Ty == IceType_i64) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004187 Value = legalize(Value);
Jim Stichnothad403532014-09-25 12:44:17 -07004188 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4189 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004190 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4191 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
Matt Wala105b7042014-08-11 19:56:19 -07004192 } else if (isVectorType(Ty)) {
4193 _storep(legalizeToVar(Value), NewAddr);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004194 } else {
Jim Stichnothad403532014-09-25 12:44:17 -07004195 Value = legalize(Value, Legal_Reg | Legal_Imm);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004196 _store(Value, NewAddr);
4197 }
4198}
4199
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004200void TargetX8632::doAddressOptStore() {
Jim Stichnoth607e9f02014-11-06 13:32:05 -08004201 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004202 Operand *Data = Inst->getData();
4203 Operand *Addr = Inst->getAddr();
Jim Stichnothae953202014-12-20 06:17:49 -08004204 Variable *Index = nullptr;
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004205 uint16_t Shift = 0;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004206 int32_t Offset = 0; // TODO: make Constant
4207 Variable *Base = llvm::dyn_cast<Variable>(Addr);
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004208 // Vanilla ICE store instructions should not use the segment registers,
4209 // and computeAddressOpt only works at the level of Variables and Constants,
4210 // not other OperandX8632Mem, so there should be no mention of segment
4211 // registers there either.
4212 const OperandX8632Mem::SegmentRegisters SegmentReg =
4213 OperandX8632Mem::DefaultSegment;
Jim Stichnoth89d79562014-08-27 13:50:03 -07004214 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004215 if (Base && Addr != Base) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004216 Inst->setDeleted();
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004217 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004218 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
Jan Voung3bd9f1a2014-06-18 10:50:57 -07004219 Shift, SegmentReg);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004220 Context.insert(InstStore::create(Func, Data, Addr));
4221 }
4222}
4223
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004224void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4225 // This implements the most naive possible lowering.
4226 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4227 Operand *Src0 = Inst->getComparison();
4228 SizeT NumCases = Inst->getNumCases();
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004229 if (Src0->getType() == IceType_i64) {
4230 Src0 = legalize(Src0); // get Base/Index into physical registers
4231 Operand *Src0Lo = loOperand(Src0);
4232 Operand *Src0Hi = hiOperand(Src0);
4233 if (NumCases >= 2) {
4234 Src0Lo = legalizeToVar(Src0Lo);
4235 Src0Hi = legalizeToVar(Src0Hi);
4236 } else {
4237 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4238 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4239 }
4240 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004241 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4242 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004243 InstX8632Label *Label = InstX8632Label::create(Func, this);
4244 _cmp(Src0Lo, ValueLo);
4245 _br(CondX86::Br_ne, Label);
4246 _cmp(Src0Hi, ValueHi);
4247 _br(CondX86::Br_e, Inst->getLabel(I));
4248 Context.insert(Label);
4249 }
4250 _br(Inst->getLabelDefault());
4251 return;
4252 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004253 // OK, we'll be slightly less naive by forcing Src into a physical
4254 // register if there are 2 or more uses.
4255 if (NumCases >= 2)
Jim Stichnoth2daadb72014-11-03 19:57:24 -08004256 Src0 = legalizeToVar(Src0);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004257 else
Jim Stichnothad403532014-09-25 12:44:17 -07004258 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004259 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004260 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004261 _cmp(Src0, Value);
Jan Voungbd385e42014-09-18 18:18:10 -07004262 _br(CondX86::Br_e, Inst->getLabel(I));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004263 }
4264
4265 _br(Inst->getLabelDefault());
4266}
4267
Matt Walaafeaee42014-08-07 13:47:30 -07004268void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4269 Variable *Dest, Operand *Src0,
4270 Operand *Src1) {
4271 assert(isVectorType(Dest->getType()));
4272 Type Ty = Dest->getType();
4273 Type ElementTy = typeElementType(Ty);
4274 SizeT NumElements = typeNumElements(Ty);
4275
4276 Operand *T = Ctx->getConstantUndef(Ty);
4277 for (SizeT I = 0; I < NumElements; ++I) {
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004278 Constant *Index = Ctx->getConstantInt32(I);
Matt Walaafeaee42014-08-07 13:47:30 -07004279
4280 // Extract the next two inputs.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004281 Variable *Op0 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004282 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004283 Variable *Op1 = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004284 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4285
4286 // Perform the arithmetic as a scalar operation.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004287 Variable *Res = Func->makeVariable(ElementTy);
Matt Walaafeaee42014-08-07 13:47:30 -07004288 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4289
4290 // Insert the result into position.
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004291 Variable *DestT = Func->makeVariable(Ty);
Matt Walaafeaee42014-08-07 13:47:30 -07004292 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4293 T = DestT;
Matt Walaafeaee42014-08-07 13:47:30 -07004294 }
4295
4296 lowerAssign(InstAssign::create(Func, Dest, T));
4297}
4298
Matt Walace0ca8f2014-07-24 12:34:20 -07004299// The following pattern occurs often in lowered C and C++ code:
4300//
4301// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4302// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4303//
4304// We can eliminate the sext operation by copying the result of pcmpeqd,
4305// pcmpgtd, or cmpps (which produce sign extended results) to the result
4306// of the sext operation.
Jim Stichnothd9dc82e2015-03-03 17:06:33 -08004307void TargetX8632::eliminateNextVectorSextInstruction(
4308 Variable *SignExtendedResult) {
Matt Walace0ca8f2014-07-24 12:34:20 -07004309 if (InstCast *NextCast =
4310 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4311 if (NextCast->getCastKind() == InstCast::Sext &&
4312 NextCast->getSrc(0) == SignExtendedResult) {
Jim Stichnoth89d79562014-08-27 13:50:03 -07004313 NextCast->setDeleted();
Matt Walace0ca8f2014-07-24 12:34:20 -07004314 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4315 // Skip over the instruction.
Matt Walace0ca8f2014-07-24 12:34:20 -07004316 Context.advanceNext();
4317 }
4318 }
4319}
4320
Jim Stichnoth9738a9e2015-02-23 16:39:06 -08004321void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004322
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004323// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4324// preserve integrity of liveness analysis. Undef values are also
4325// turned into zeroes, since loOperand() and hiOperand() don't expect
4326// Undef input.
4327void TargetX8632::prelowerPhis() {
4328 CfgNode *Node = Context.getNode();
Jim Stichnoth29841e82014-12-23 12:26:24 -08004329 for (Inst &I : Node->getPhis()) {
4330 auto Phi = llvm::dyn_cast<InstPhi>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004331 if (Phi->isDeleted())
4332 continue;
4333 Variable *Dest = Phi->getDest();
4334 if (Dest->getType() == IceType_i64) {
4335 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4336 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4337 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
4338 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
4339 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
4340 Operand *Src = Phi->getSrc(I);
4341 CfgNode *Label = Phi->getLabel(I);
4342 if (llvm::isa<ConstantUndef>(Src))
4343 Src = Ctx->getConstantZero(Dest->getType());
4344 PhiLo->addArgument(loOperand(Src), Label);
4345 PhiHi->addArgument(hiOperand(Src), Label);
4346 }
4347 Node->getPhis().push_back(PhiLo);
4348 Node->getPhis().push_back(PhiHi);
4349 Phi->setDeleted();
4350 }
4351 }
4352}
4353
4354namespace {
4355
4356bool isMemoryOperand(const Operand *Opnd) {
4357 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4358 return !Var->hasReg();
Jim Stichnoth5bc44312015-03-02 16:56:20 -08004359 // We treat vector undef values the same as a memory operand,
4360 // because they do in fact need a register to materialize the vector
4361 // of zeroes into.
4362 if (llvm::isa<ConstantUndef>(Opnd))
4363 return isScalarFloatingType(Opnd->getType()) ||
4364 isVectorType(Opnd->getType());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004365 if (llvm::isa<Constant>(Opnd))
4366 return isScalarFloatingType(Opnd->getType());
4367 return true;
4368}
4369
4370} // end of anonymous namespace
4371
4372// Lower the pre-ordered list of assignments into mov instructions.
4373// Also has to do some ad-hoc register allocation as necessary.
4374void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4375 const AssignList &Assignments) {
4376 // Check that this is a properly initialized shell of a node.
4377 assert(Node->getOutEdges().size() == 1);
4378 assert(Node->getInsts().empty());
4379 assert(Node->getPhis().empty());
Jim Stichnothbfb410d2014-11-05 16:04:05 -08004380 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004381 getContext().init(Node);
Jim Stichnoth70d0a052014-11-14 15:53:46 -08004382 // Register set setup similar to regAlloc().
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004383 RegSetMask RegInclude = RegSet_All;
4384 RegSetMask RegExclude = RegSet_StackPointer;
4385 if (hasFramePointer())
4386 RegExclude |= RegSet_FramePointer;
4387 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4388 bool NeedsRegs = false;
4389 // Initialize the set of available registers to the set of what is
4390 // available (not live) at the beginning of the successor block,
4391 // minus all registers used as Dest operands in the Assignments. To
4392 // do this, we start off assuming all registers are available, then
4393 // iterate through the Assignments and remove Dest registers.
4394 // During this iteration, we also determine whether we will actually
4395 // need any extra registers for memory-to-memory copies. If so, we
4396 // do the actual work of removing the live-in registers from the
4397 // set. TODO(stichnot): This work is being repeated for every split
4398 // edge to the successor, so consider updating LiveIn just once
4399 // after all the edges are split.
Jim Stichnoth29841e82014-12-23 12:26:24 -08004400 for (const Inst &I : Assignments) {
4401 Variable *Dest = I.getDest();
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004402 if (Dest->hasReg()) {
4403 Available[Dest->getRegNum()] = false;
Jim Stichnoth29841e82014-12-23 12:26:24 -08004404 } else if (isMemoryOperand(I.getSrc(0))) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004405 NeedsRegs = true; // Src and Dest are both in memory
4406 }
4407 }
4408 if (NeedsRegs) {
4409 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4410 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4411 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4412 if (Var->hasReg())
4413 Available[Var->getRegNum()] = false;
4414 }
4415 }
4416 // Iterate backwards through the Assignments. After lowering each
4417 // assignment, add Dest to the set of available registers, and
4418 // remove Src from the set of available registers. Iteration is
4419 // done backwards to enable incremental updates of the available
4420 // register set, and the lowered instruction numbers may be out of
4421 // order, but that can be worked around by renumbering the block
4422 // afterwards if necessary.
Jim Stichnoth7e571362015-01-09 11:43:26 -08004423 for (const Inst &I : reverse_range(Assignments)) {
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004424 Context.rewind();
Jim Stichnoth7e571362015-01-09 11:43:26 -08004425 auto Assign = llvm::dyn_cast<InstAssign>(&I);
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004426 Variable *Dest = Assign->getDest();
4427 Operand *Src = Assign->getSrc(0);
4428 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
4429 // Use normal assignment lowering, except lower mem=mem specially
4430 // so we can register-allocate at the same time.
4431 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
4432 lowerAssign(Assign);
4433 } else {
4434 assert(Dest->getType() == Src->getType());
4435 const llvm::SmallBitVector &RegsForType =
4436 getRegisterSetForType(Dest->getType());
4437 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
Jim Stichnothae953202014-12-20 06:17:49 -08004438 Variable *SpillLoc = nullptr;
4439 Variable *Preg = nullptr;
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004440 // TODO(stichnot): Opportunity for register randomization.
4441 int32_t RegNum = AvailRegsForType.find_first();
4442 bool IsVector = isVectorType(Dest->getType());
4443 bool NeedSpill = (RegNum == -1);
4444 if (NeedSpill) {
4445 // Pick some register to spill and update RegNum.
4446 // TODO(stichnot): Opportunity for register randomization.
4447 RegNum = RegsForType.find_first();
4448 Preg = getPhysicalRegister(RegNum, Dest->getType());
4449 SpillLoc = Func->makeVariable(Dest->getType());
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07004450 // Create a fake def of the physical register to avoid
4451 // liveness inconsistency problems during late-stage liveness
4452 // analysis (e.g. asm-verbose mode).
4453 Context.insert(InstFakeDef::create(Func, Preg));
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004454 if (IsVector)
4455 _movp(SpillLoc, Preg);
4456 else
4457 _mov(SpillLoc, Preg);
4458 }
4459 assert(RegNum >= 0);
4460 if (llvm::isa<ConstantUndef>(Src))
4461 // Materialize an actual constant instead of undef. RegNum is
4462 // passed in for vector types because undef vectors are
4463 // lowered to vector register of zeroes.
4464 Src =
4465 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
4466 Variable *Tmp = makeReg(Dest->getType(), RegNum);
4467 if (IsVector) {
4468 _movp(Tmp, Src);
4469 _movp(Dest, Tmp);
4470 } else {
4471 _mov(Tmp, Src);
4472 _mov(Dest, Tmp);
4473 }
4474 if (NeedSpill) {
4475 // Restore the spilled register.
4476 if (IsVector)
4477 _movp(Preg, SpillLoc);
4478 else
4479 _mov(Preg, SpillLoc);
Jim Stichnoth76dcf1a2015-04-29 10:20:07 -07004480 // Create a fake use of the physical register to keep it live
4481 // for late-stage liveness analysis (e.g. asm-verbose mode).
4482 Context.insert(InstFakeUse::create(Func, Preg));
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004483 }
4484 }
4485 // Update register availability before moving to the previous
4486 // instruction on the Assignments list.
4487 if (Dest->hasReg())
4488 Available[Dest->getRegNum()] = true;
4489 if (SrcVar && SrcVar->hasReg())
4490 Available[SrcVar->getRegNum()] = false;
4491 }
4492
4493 // Add the terminator branch instruction to the end.
Jim Stichnoth5d2fa0c2014-12-01 09:30:55 -08004494 Context.setInsertPoint(Context.getEnd());
Jim Stichnoth336f6c42014-10-30 15:01:31 -07004495 _br(Succ);
4496}
4497
Matt Wala9a0168a2014-07-23 14:56:10 -07004498// There is no support for loading or emitting vector constants, so the
4499// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4500// etc. are initialized with register operations.
4501//
4502// TODO(wala): Add limited support for vector constants so that
4503// complex initialization in registers is unnecessary.
4504
Matt Wala83b80362014-07-16 10:21:30 -07004505Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004506 Variable *Reg = makeReg(Ty, RegNum);
4507 // Insert a FakeDef, since otherwise the live range of Reg might
4508 // be overestimated.
4509 Context.insert(InstFakeDef::create(Func, Reg));
4510 _pxor(Reg, Reg);
4511 return Reg;
4512}
4513
Matt Wala9a0168a2014-07-23 14:56:10 -07004514Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
4515 Variable *MinusOnes = makeReg(Ty, RegNum);
4516 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4517 Context.insert(InstFakeDef::create(Func, MinusOnes));
4518 _pcmpeq(MinusOnes, MinusOnes);
4519 return MinusOnes;
4520}
4521
Matt Wala83b80362014-07-16 10:21:30 -07004522Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
Matt Wala83b80362014-07-16 10:21:30 -07004523 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
Matt Wala9a0168a2014-07-23 14:56:10 -07004524 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
Matt Wala83b80362014-07-16 10:21:30 -07004525 _psub(Dest, MinusOne);
4526 return Dest;
4527}
4528
Matt Wala9a0168a2014-07-23 14:56:10 -07004529Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4530 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4531 Ty == IceType_v16i8);
4532 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4533 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4534 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004535 _psll(Reg, Ctx->getConstantInt8(Shift));
Matt Wala9a0168a2014-07-23 14:56:10 -07004536 return Reg;
4537 } else {
4538 // SSE has no left shift operation for vectors of 8 bit integers.
4539 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004540 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
Matt Wala9a0168a2014-07-23 14:56:10 -07004541 Variable *Reg = makeReg(Ty, RegNum);
4542 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4543 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4544 return Reg;
4545 }
4546}
4547
Jim Stichnoth8c980d02015-03-19 13:01:50 -07004548// Construct a mask in a register that can be and'ed with a
4549// floating-point value to mask off its sign bit. The value will be
4550// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4551// for f64. Construct it as vector of ones logically right shifted
4552// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4553// vector constants in memory.
4554Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {
4555 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4556 _psrl(Reg, Ctx->getConstantInt8(1));
4557 return Reg;
4558}
4559
Matt Wala49889232014-07-18 12:45:09 -07004560OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4561 Variable *Slot,
4562 uint32_t Offset) {
4563 // Ensure that Loc is a stack slot.
Jim Stichnothc6ead202015-02-24 09:30:30 -08004564 assert(Slot->getWeight().isZero());
Matt Wala49889232014-07-18 12:45:09 -07004565 assert(Slot->getRegNum() == Variable::NoRegister);
4566 // Compute the location of Loc in memory.
4567 // TODO(wala,stichnot): lea should not be required. The address of
4568 // the stack slot is known at compile time (although not until after
4569 // addProlog()).
4570 const Type PointerType = IceType_i32;
4571 Variable *Loc = makeReg(PointerType);
4572 _lea(Loc, Slot);
Jim Stichnothd2cb4362014-11-20 11:24:42 -08004573 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
Matt Wala49889232014-07-18 12:45:09 -07004574 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4575}
4576
Matt Wala928f1292014-07-07 16:50:46 -07004577// Helper for legalize() to emit the right code to lower an operand to a
4578// register of the appropriate type.
4579Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4580 Type Ty = Src->getType();
4581 Variable *Reg = makeReg(Ty, RegNum);
Matt Walaad8f7262014-07-14 17:37:37 -07004582 if (isVectorType(Ty)) {
Matt Wala928f1292014-07-07 16:50:46 -07004583 _movp(Reg, Src);
4584 } else {
4585 _mov(Reg, Src);
4586 }
4587 return Reg;
4588}
4589
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004590Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
Jim Stichnothad403532014-09-25 12:44:17 -07004591 int32_t RegNum) {
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004592 Type Ty = From->getType();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004593 // Assert that a physical register is allowed. To date, all calls
4594 // to legalize() allow a physical register. If a physical register
4595 // needs to be explicitly disallowed, then new code will need to be
4596 // written to force a spill.
4597 assert(Allowed & Legal_Reg);
4598 // If we're asking for a specific physical register, make sure we're
4599 // not allowing any other operand kinds. (This could be future
4600 // work, e.g. allow the shl shift amount to be either an immediate
4601 // or in ecx.)
4602 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
Jan Voungb3401d22015-05-18 09:38:21 -07004603 if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004604 // Before doing anything with a Mem operand, we need to ensure
4605 // that the Base and Index components are in physical registers.
4606 Variable *Base = Mem->getBase();
4607 Variable *Index = Mem->getIndex();
Jim Stichnothae953202014-12-20 06:17:49 -08004608 Variable *RegBase = nullptr;
4609 Variable *RegIndex = nullptr;
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004610 if (Base) {
Jim Stichnothad403532014-09-25 12:44:17 -07004611 RegBase = legalizeToVar(Base);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004612 }
4613 if (Index) {
Jim Stichnothad403532014-09-25 12:44:17 -07004614 RegIndex = legalizeToVar(Index);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004615 }
4616 if (Base != RegBase || Index != RegIndex) {
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004617 From =
4618 OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
4619 Mem->getShift(), Mem->getSegmentRegister());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004620 }
4621
4622 if (!(Allowed & Legal_Mem)) {
Matt Wala928f1292014-07-07 16:50:46 -07004623 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004624 }
4625 return From;
4626 }
4627 if (llvm::isa<Constant>(From)) {
Matt Walad8f4a7d2014-06-18 09:55:03 -07004628 if (llvm::isa<ConstantUndef>(From)) {
4629 // Lower undefs to zero. Another option is to lower undefs to an
4630 // uninitialized register; however, using an uninitialized register
4631 // results in less predictable code.
4632 //
4633 // If in the future the implementation is changed to lower undef
4634 // values to uninitialized registers, a FakeDef will be needed:
4635 // Context.insert(InstFakeDef::create(Func, Reg));
4636 // This is in order to ensure that the live range of Reg is not
4637 // overestimated. If the constant being lowered is a 64 bit value,
4638 // then the result should be split and the lo and hi components will
4639 // need to go in uninitialized registers.
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004640 if (isVectorType(Ty))
4641 return makeVectorOfZeros(Ty, RegNum);
4642 From = Ctx->getConstantZero(Ty);
Matt Walad8f4a7d2014-06-18 09:55:03 -07004643 }
Matt Walaad8f7262014-07-14 17:37:37 -07004644 // There should be no constants of vector type (other than undef).
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004645 assert(!isVectorType(Ty));
4646 // Convert a scalar floating point constant into an explicit
4647 // memory operand.
4648 if (isScalarFloatingType(Ty)) {
4649 Variable *Base = nullptr;
4650 std::string Buffer;
4651 llvm::raw_string_ostream StrBuf(Buffer);
4652 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
4653 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
4654 From = OperandX8632Mem::create(Func, Ty, Base, Offset);
4655 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004656 bool NeedsReg = false;
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004657 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004658 // Immediate specifically not allowed
4659 NeedsReg = true;
Jim Stichnoth03ffa582015-06-04 09:25:07 -07004660 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
Jim Stichnothde4ca712014-06-29 08:13:48 -07004661 // On x86, FP constants are lowered to mem operands.
4662 NeedsReg = true;
Jan Voung5a13f452014-06-18 10:42:02 -07004663 if (NeedsReg) {
Matt Wala928f1292014-07-07 16:50:46 -07004664 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004665 }
4666 return From;
4667 }
Jan Voungb3401d22015-05-18 09:38:21 -07004668 if (auto Var = llvm::dyn_cast<Variable>(From)) {
Matt Walaad8f7262014-07-14 17:37:37 -07004669 // Check if the variable is guaranteed a physical register. This
4670 // can happen either when the variable is pre-colored or when it is
4671 // assigned infinite weight.
Jim Stichnothc6ead202015-02-24 09:30:30 -08004672 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004673 // We need a new physical register for the operand if:
Jim Stichnothca662e92014-07-10 15:32:36 -07004674 // Mem is not allowed and Var isn't guaranteed a physical
4675 // register, or
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004676 // RegNum is required and Var->getRegNum() doesn't match.
Matt Walaad8f7262014-07-14 17:37:37 -07004677 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004678 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
Jim Stichnothad403532014-09-25 12:44:17 -07004679 From = copyToReg(From, RegNum);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004680 }
4681 return From;
4682 }
4683 llvm_unreachable("Unhandled operand kind in legalize()");
4684 return From;
4685}
4686
4687// Provide a trivial wrapper to legalize() for this common usage.
Jim Stichnothad403532014-09-25 12:44:17 -07004688Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
4689 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004690}
4691
Jim Stichnotha59ae6f2015-05-17 10:11:41 -07004692// For the cmp instruction, if Src1 is an immediate, or known to be a
4693// physical register, we can allow Src0 to be a memory operand.
4694// Otherwise, Src0 must be copied into a physical register.
4695// (Actually, either Src0 or Src1 can be chosen for the physical
4696// register, but unfortunately we have to commit to one or the other
4697// before register allocation.)
4698Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
4699 bool IsSrc1ImmOrReg = false;
4700 if (llvm::isa<Constant>(Src1)) {
4701 IsSrc1ImmOrReg = true;
4702 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
4703 if (Var->hasReg())
4704 IsSrc1ImmOrReg = true;
4705 }
4706 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
4707}
4708
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07004709OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty,
4710 bool DoLegalize) {
Jan Voung5cd240d2014-06-25 10:36:46 -07004711 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4712 // It may be the case that address mode optimization already creates
4713 // an OperandX8632Mem, so in that case it wouldn't need another level
4714 // of transformation.
4715 if (!Mem) {
4716 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4717 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4718 assert(Base || Offset);
Matt Walae3777672014-07-31 09:06:17 -07004719 if (Offset) {
Jim Stichnoth1c335ef2015-03-18 09:01:52 -07004720 // Make sure Offset is not undef.
4721 Offset = llvm::cast<Constant>(legalize(Offset));
Jan Voungbc004632014-09-16 15:09:10 -07004722 assert(llvm::isa<ConstantInteger32>(Offset) ||
Matt Walae3777672014-07-31 09:06:17 -07004723 llvm::isa<ConstantRelocatable>(Offset));
4724 }
Jan Voung5cd240d2014-06-25 10:36:46 -07004725 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4726 }
Jim Stichnoth8e6bf6e2015-06-03 15:58:12 -07004727 return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem);
Jan Voung5cd240d2014-06-25 10:36:46 -07004728}
4729
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004730Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
Jan Voung1ee34162014-06-24 13:43:30 -07004731 // There aren't any 64-bit integer registers for x86-32.
4732 assert(Type != IceType_i64);
Jim Stichnoth144cdce2014-09-22 16:02:59 -07004733 Variable *Reg = Func->makeVariable(Type);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004734 if (RegNum == Variable::NoRegister)
4735 Reg->setWeightInfinite();
4736 else
4737 Reg->setRegNum(RegNum);
4738 return Reg;
4739}
4740
4741void TargetX8632::postLower() {
Jan Voung1f47ad02015-03-20 15:01:26 -07004742 if (Ctx->getFlags().getOptLevel() == Opt_m1)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004743 return;
Jan Voungb3401d22015-05-18 09:38:21 -07004744 inferTwoAddress();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07004745}
4746
Jim Stichnothe6d24782014-12-19 05:42:24 -08004747void TargetX8632::makeRandomRegisterPermutation(
4748 llvm::SmallVectorImpl<int32_t> &Permutation,
4749 const llvm::SmallBitVector &ExcludeRegisters) const {
4750 // TODO(stichnot): Declaring Permutation this way loses type/size
4751 // information. Fix this in conjunction with the caller-side TODO.
4752 assert(Permutation.size() >= RegX8632::Reg_NUM);
4753 // Expected upper bound on the number of registers in a single
4754 // equivalence class. For x86-32, this would comprise the 8 XMM
4755 // registers. This is for performance, not correctness.
4756 static const unsigned MaxEquivalenceClassSize = 8;
4757 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
4758 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
4759 EquivalenceClassMap EquivalenceClasses;
4760 SizeT NumShuffled = 0, NumPreserved = 0;
4761
4762// Build up the equivalence classes of registers by looking at the
4763// register properties as well as whether the registers should be
4764// explicitly excluded from shuffling.
4765#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
4766 frameptr, isI8, isInt, isFP) \
4767 if (ExcludeRegisters[RegX8632::val]) { \
4768 /* val stays the same in the resulting permutation. */ \
4769 Permutation[RegX8632::val] = RegX8632::val; \
4770 ++NumPreserved; \
4771 } else { \
4772 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
4773 (isInt << 3) | (isFP << 4); \
4774 /* val is assigned to an equivalence class based on its properties. */ \
4775 EquivalenceClasses[Index].push_back(RegX8632::val); \
4776 }
4777 REGX8632_TABLE
4778#undef X
4779
4780 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4781
4782 // Shuffle the resulting equivalence classes.
4783 for (auto I : EquivalenceClasses) {
4784 const RegisterList &List = I.second;
4785 RegisterList Shuffled(List);
Jim Stichnothcaef3482015-04-09 11:19:38 -07004786 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
Jim Stichnothe6d24782014-12-19 05:42:24 -08004787 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
4788 Permutation[List[SI]] = Shuffled[SI];
4789 ++NumShuffled;
4790 }
4791 }
4792
4793 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
4794
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004795 if (Func->isVerbose(IceV_Random)) {
Jim Stichnothe4a8f402015-01-20 12:52:51 -08004796 OstreamLocker L(Func->getContext());
Jim Stichnothe6d24782014-12-19 05:42:24 -08004797 Ostream &Str = Func->getContext()->getStrDump();
4798 Str << "Register equivalence classes:\n";
4799 for (auto I : EquivalenceClasses) {
4800 Str << "{";
4801 const RegisterList &List = I.second;
4802 bool First = true;
4803 for (int32_t Register : List) {
4804 if (!First)
4805 Str << " ";
4806 First = false;
4807 Str << getRegName(Register, IceType_i32);
4808 }
4809 Str << "}\n";
4810 }
4811 }
4812}
4813
Jan Voung76bb0be2015-05-14 09:26:19 -07004814void TargetX8632::emit(const ConstantInteger32 *C) const {
Jan Voungf644a4b2015-03-19 11:57:52 -07004815 if (!ALLOW_DUMP)
4816 return;
4817 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07004818 Str << getConstantPrefix() << C->getValue();
Jan Voungf644a4b2015-03-19 11:57:52 -07004819}
4820
Jan Voung76bb0be2015-05-14 09:26:19 -07004821void TargetX8632::emit(const ConstantInteger64 *) const {
4822 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
4823}
4824
4825void TargetX8632::emit(const ConstantFloat *C) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004826 if (!ALLOW_DUMP)
4827 return;
Matt Wala928f1292014-07-07 16:50:46 -07004828 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07004829 C->emitPoolLabel(Str);
Jan Voungf644a4b2015-03-19 11:57:52 -07004830}
4831
Jan Voung76bb0be2015-05-14 09:26:19 -07004832void TargetX8632::emit(const ConstantDouble *C) const {
Karl Schimpfb6c96af2014-11-17 10:58:39 -08004833 if (!ALLOW_DUMP)
4834 return;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07004835 Ostream &Str = Ctx->getStrEmit();
Jan Voung76bb0be2015-05-14 09:26:19 -07004836 C->emitPoolLabel(Str);
Jim Stichnothf61d5b22014-05-23 13:31:24 -07004837}
4838
Jan Voung76bb0be2015-05-14 09:26:19 -07004839void TargetX8632::emit(const ConstantUndef *) const {
4840 llvm::report_fatal_error("undef value encountered by emitter.");
Matt Walae3777672014-07-31 09:06:17 -07004841}
4842
Jan Voung72984d82015-01-29 14:42:38 -08004843TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
4844 : TargetDataLowering(Ctx) {}
Jim Stichnothde4ca712014-06-29 08:13:48 -07004845
Jan Voung72984d82015-01-29 14:42:38 -08004846void TargetDataX8632::lowerGlobal(const VariableDeclaration &Var) const {
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004847 // If external and not initialized, this must be a cross test.
4848 // Don't generate a declaration for such cases.
Karl Schimpfdf80eb82015-02-09 14:20:22 -08004849 bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal();
Jim Stichnothdd842db2015-01-27 12:53:53 -08004850 if (IsExternal && !Var.hasInitializer())
4851 return;
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004852
Jan Voung72984d82015-01-29 14:42:38 -08004853 Ostream &Str = Ctx->getStrEmit();
4854 const VariableDeclaration::InitializerListType &Initializers =
4855 Var.getInitializers();
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004856 bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004857 bool IsConstant = Var.getIsConstant();
Karl Schimpf9d98d792014-10-13 15:01:08 -07004858 uint32_t Align = Var.getAlignment();
4859 SizeT Size = Var.getNumBytes();
4860 IceString MangledName = Var.mangleName(Ctx);
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004861 IceString SectionSuffix = "";
Karl Schimpfdf80eb82015-02-09 14:20:22 -08004862 if (Ctx->getFlags().getDataSections())
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004863 SectionSuffix = "." + MangledName;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004864
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004865 Str << "\t.type\t" << MangledName << ",@object\n";
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004866
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004867 if (IsConstant)
4868 Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004869 else if (HasNonzeroInitializer)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004870 Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
Jan Voungfed97af2015-01-25 10:46:00 -08004871 else
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004872 Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004873
4874 if (IsExternal)
4875 Str << "\t.globl\t" << MangledName << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004876
Jan Voungfed97af2015-01-25 10:46:00 -08004877 if (Align > 1)
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004878 Str << "\t.align\t" << Align << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004879
Jan Voungfed97af2015-01-25 10:46:00 -08004880 Str << MangledName << ":\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004881
Karl Schimpfdf6f9d12014-10-20 14:09:00 -07004882 if (HasNonzeroInitializer) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004883 for (VariableDeclaration::Initializer *Init : Initializers) {
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004884 switch (Init->getKind()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004885 case VariableDeclaration::Initializer::DataInitializerKind: {
4886 const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
4887 ->getContents();
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004888 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
4889 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4890 }
4891 break;
4892 }
Karl Schimpf9d98d792014-10-13 15:01:08 -07004893 case VariableDeclaration::Initializer::ZeroInitializerKind:
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004894 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
4895 break;
Karl Schimpf9d98d792014-10-13 15:01:08 -07004896 case VariableDeclaration::Initializer::RelocInitializerKind: {
4897 const auto Reloc =
4898 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004899 Str << "\t.long\t";
Karl Schimpf9d98d792014-10-13 15:01:08 -07004900 Str << Reloc->getDeclaration()->mangleName(Ctx);
Jan Voungc0d965f2014-11-04 16:55:01 -08004901 if (RelocOffsetT Offset = Reloc->getOffset()) {
Karl Schimpf9d98d792014-10-13 15:01:08 -07004902 if (Offset >= 0 || (Offset == INT32_MIN))
4903 Str << " + " << Offset;
4904 else
4905 Str << " - " << -Offset;
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004906 }
4907 Str << "\n";
4908 break;
4909 }
Karl Schimpfe3f64d02014-10-07 10:38:22 -07004910 }
Jim Stichnothde4ca712014-06-29 08:13:48 -07004911 }
Jan Voungfed97af2015-01-25 10:46:00 -08004912 } else
4913 // NOTE: for non-constant zero initializers, this is BSS (no bits),
4914 // so an ELF writer would not write to the file, and only track
4915 // virtual offsets, but the .s writer still needs this .zero and
4916 // cannot simply use the .size to advance offsets.
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004917 Str << "\t.zero\t" << Size << "\n";
Jim Stichnoth2a063e22014-10-08 11:24:51 -07004918
Jan Voungfed97af2015-01-25 10:46:00 -08004919 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
Jim Stichnothde4ca712014-06-29 08:13:48 -07004920}
4921
Jim Stichnothbbca7542015-02-11 16:08:31 -08004922void TargetDataX8632::lowerGlobals(
4923 std::unique_ptr<VariableDeclarationList> Vars) const {
Jim Stichnothd442e7e2015-02-12 14:01:48 -08004924 switch (Ctx->getFlags().getOutFileType()) {
4925 case FT_Elf: {
Jim Stichnothbbca7542015-02-11 16:08:31 -08004926 ELFObjectWriter *Writer = Ctx->getObjectWriter();
4927 Writer->writeDataSection(*Vars, llvm::ELF::R_386_32);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08004928 } break;
4929 case FT_Asm:
4930 case FT_Iasm: {
Jim Stichnothbbca7542015-02-11 16:08:31 -08004931 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
4932 OstreamLocker L(Ctx);
4933 for (const VariableDeclaration *Var : *Vars) {
4934 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
4935 lowerGlobal(*Var);
4936 }
4937 }
Jim Stichnothd442e7e2015-02-12 14:01:48 -08004938 } break;
Jim Stichnothbbca7542015-02-11 16:08:31 -08004939 }
Jan Voung72984d82015-01-29 14:42:38 -08004940}
4941
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004942template <typename T> struct PoolTypeConverter {};
4943
4944template <> struct PoolTypeConverter<float> {
4945 typedef uint32_t PrimitiveIntType;
4946 typedef ConstantFloat IceType;
4947 static const Type Ty = IceType_f32;
4948 static const char *TypeName;
4949 static const char *AsmTag;
4950 static const char *PrintfString;
4951};
4952const char *PoolTypeConverter<float>::TypeName = "float";
4953const char *PoolTypeConverter<float>::AsmTag = ".long";
4954const char *PoolTypeConverter<float>::PrintfString = "0x%x";
4955
4956template <> struct PoolTypeConverter<double> {
4957 typedef uint64_t PrimitiveIntType;
4958 typedef ConstantDouble IceType;
4959 static const Type Ty = IceType_f64;
4960 static const char *TypeName;
4961 static const char *AsmTag;
4962 static const char *PrintfString;
4963};
4964const char *PoolTypeConverter<double>::TypeName = "double";
4965const char *PoolTypeConverter<double>::AsmTag = ".quad";
4966const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
4967
4968template <typename T>
Jan Voung72984d82015-01-29 14:42:38 -08004969void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
Jan Voungb5447a02015-02-24 16:57:17 -08004970 if (!ALLOW_DUMP)
4971 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08004972 Ostream &Str = Ctx->getStrEmit();
4973 Type Ty = T::Ty;
4974 SizeT Align = typeAlignInBytes(Ty);
4975 ConstantList Pool = Ctx->getConstantPool(Ty);
4976
4977 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
4978 << "\n";
4979 Str << "\t.align\t" << Align << "\n";
4980 for (Constant *C : Pool) {
4981 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
4982 typename T::IceType::PrimType Value = Const->getValue();
4983 // Use memcpy() to copy bits from Value into RawValue in a way
4984 // that avoids breaking strict-aliasing rules.
4985 typename T::PrimitiveIntType RawValue;
4986 memcpy(&RawValue, &Value, sizeof(Value));
4987 char buf[30];
4988 int CharsPrinted =
4989 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
4990 assert(CharsPrinted >= 0 &&
4991 (size_t)CharsPrinted < llvm::array_lengthof(buf));
4992 (void)CharsPrinted; // avoid warnings if asserts are disabled
4993 Const->emitPoolLabel(Str);
4994 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
4995 << Value << "\n";
4996 }
4997}
4998
Jim Stichnothbbca7542015-02-11 16:08:31 -08004999void TargetDataX8632::lowerConstants() const {
Karl Schimpfdf80eb82015-02-09 14:20:22 -08005000 if (Ctx->getFlags().getDisableTranslation())
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005001 return;
5002 // No need to emit constants from the int pool since (for x86) they
5003 // are embedded as immediates in the instructions, just emit float/double.
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005004 switch (Ctx->getFlags().getOutFileType()) {
5005 case FT_Elf: {
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005006 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5007 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5008 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005009 } break;
5010 case FT_Asm:
5011 case FT_Iasm: {
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005012 OstreamLocker L(Ctx);
5013 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5014 emitConstantPool<PoolTypeConverter<double>>(Ctx);
Jim Stichnothd442e7e2015-02-12 14:01:48 -08005015 } break;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08005016 }
5017}
5018
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07005019} // end of namespace Ice