blob: 3c7ad5e66924f59501d901f09531401f2445c41d [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringX86Base class, which
12/// consists almost entirely of the lowering sequence for each
13/// high-level instruction.
14///
John Porto7e93c622015-06-23 10:58:57 -070015//===----------------------------------------------------------------------===//
16
17#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
19
John Porto7e93c622015-06-23 10:58:57 -070020#include "IceCfg.h"
21#include "IceCfgNode.h"
22#include "IceClFlags.h"
23#include "IceDefs.h"
24#include "IceELFObjectWriter.h"
25#include "IceGlobalInits.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070030#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070031
Andrew Scull87f80c12015-07-20 10:19:16 -070032#include <stack>
33
John Porto7e93c622015-06-23 10:58:57 -070034namespace Ice {
35namespace X86Internal {
36
John Porto921856d2015-07-07 11:56:26 -070037/// A helper class to ease the settings of RandomizationPoolingPause to disable
38/// constant blinding or pooling for some translation phases.
John Porto7e93c622015-06-23 10:58:57 -070039class BoolFlagSaver {
40 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43
44public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; }
47
48private:
49 const bool OldValue;
50 bool &Flag;
51};
52
53template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55
56public:
57 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070060 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -070061 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -070062 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -070063 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -070064 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 /// we encounter an instruction that ends Var's live range. We disable the
66 /// folding optimization when Var is live beyond this basic block. Note that
67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -070069 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0;
74};
75
76template <class MachineTraits> class BoolFolding {
77public:
78 enum BoolFoldingProducerKind {
79 PK_None,
80 PK_Icmp32,
81 PK_Icmp64,
82 PK_Fcmp,
83 PK_Trunc
84 };
85
Andrew Scull9612d322015-07-06 14:53:25 -070086 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -070087 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -070088 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -070089 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
90
91private:
92 BoolFolding(const BoolFolding &) = delete;
93 BoolFolding &operator=(const BoolFolding &) = delete;
94
95public:
96 BoolFolding() = default;
97 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
98 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
99 static bool hasComplexLowering(const Inst *Instr);
100 void init(CfgNode *Node);
101 const Inst *getProducerFor(const Operand *Opnd) const;
102 void dump(const Cfg *Func) const;
103
104private:
Andrew Scull9612d322015-07-06 14:53:25 -0700105 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700106 bool containsValid(SizeT VarNum) const {
107 auto Element = Producers.find(VarNum);
108 return Element != Producers.end() && Element->second.Instr != nullptr;
109 }
110 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Andrew Scull9612d322015-07-06 14:53:25 -0700111 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Porto7e93c622015-06-23 10:58:57 -0700112 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
113};
114
115template <class MachineTraits>
116BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
118
119template <class MachineTraits>
120typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
121BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
122 if (llvm::isa<InstIcmp>(Instr)) {
123 if (Instr->getSrc(0)->getType() != IceType_i64)
124 return PK_Icmp32;
125 return PK_None; // TODO(stichnot): actually PK_Icmp64;
126 }
127 return PK_None; // TODO(stichnot): remove this
128
129 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp;
131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
132 switch (Cast->getCastKind()) {
133 default:
134 return PK_None;
135 case InstCast::Trunc:
136 return PK_Trunc;
137 }
138 }
139 return PK_None;
140}
141
142template <class MachineTraits>
143typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
144BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
145 if (llvm::isa<InstBr>(Instr))
146 return CK_Br;
147 if (llvm::isa<InstSelect>(Instr))
148 return CK_Select;
149 return CK_None; // TODO(stichnot): remove this
150
151 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
152 switch (Cast->getCastKind()) {
153 default:
154 return CK_None;
155 case InstCast::Sext:
156 return CK_Sext;
157 case InstCast::Zext:
158 return CK_Zext;
159 }
160 }
161 return CK_None;
162}
163
John Porto921856d2015-07-07 11:56:26 -0700164/// Returns true if the producing instruction has a "complex" lowering sequence.
165/// This generally means that its lowering sequence requires more than one
166/// conditional branch, namely 64-bit integer compares and some floating-point
167/// compares. When this is true, and there is more than one consumer, we prefer
168/// to disable the folding optimization because it minimizes branches.
John Porto7e93c622015-06-23 10:58:57 -0700169template <class MachineTraits>
170bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
171 switch (getProducerKind(Instr)) {
172 default:
173 return false;
174 case PK_Icmp64:
175 return true;
176 case PK_Fcmp:
177 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
John Porto5d0acff2015-06-30 15:29:21 -0700178 .C2 != MachineTraits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700179 }
180}
181
182template <class MachineTraits>
183void BoolFolding<MachineTraits>::init(CfgNode *Node) {
184 Producers.clear();
185 for (Inst &Instr : Node->getInsts()) {
186 // Check whether Instr is a valid producer.
187 Variable *Var = Instr.getDest();
188 if (!Instr.isDeleted() // only consider non-deleted instructions
189 && Var // only instructions with an actual dest var
190 && Var->getType() == IceType_i1 // only bool-type dest vars
191 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
192 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
193 }
194 // Check each src variable against the map.
195 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
196 Operand *Src = Instr.getSrc(I);
197 SizeT NumVars = Src->getNumVars();
198 for (SizeT J = 0; J < NumVars; ++J) {
199 const Variable *Var = Src->getVar(J);
200 SizeT VarNum = Var->getIndex();
201 if (containsValid(VarNum)) {
202 if (I != 0 // All valid consumers use Var as the first source operand
203 || getConsumerKind(&Instr) == CK_None // must be white-listed
204 || (Producers[VarNum].IsComplex && // complex can't be multi-use
205 Producers[VarNum].NumUses > 0)) {
206 setInvalid(VarNum);
207 continue;
208 }
209 ++Producers[VarNum].NumUses;
210 if (Instr.isLastUse(Var)) {
211 Producers[VarNum].IsLiveOut = false;
212 }
213 }
214 }
215 }
216 }
217 for (auto &I : Producers) {
218 // Ignore entries previously marked invalid.
219 if (I.second.Instr == nullptr)
220 continue;
221 // Disable the producer if its dest may be live beyond this block.
222 if (I.second.IsLiveOut) {
223 setInvalid(I.first);
224 continue;
225 }
John Porto921856d2015-07-07 11:56:26 -0700226 // Mark as "dead" rather than outright deleting. This is so that other
227 // peephole style optimizations during or before lowering have access to
228 // this instruction in undeleted form. See for example
229 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700230 I.second.Instr->setDead();
231 }
232}
233
234template <class MachineTraits>
235const Inst *
236BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
237 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
238 if (Var == nullptr)
239 return nullptr;
240 SizeT VarNum = Var->getIndex();
241 auto Element = Producers.find(VarNum);
242 if (Element == Producers.end())
243 return nullptr;
244 return Element->second.Instr;
245}
246
247template <class MachineTraits>
248void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700249 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700250 return;
251 OstreamLocker L(Func->getContext());
252 Ostream &Str = Func->getContext()->getStrDump();
253 for (auto &I : Producers) {
254 if (I.second.Instr == nullptr)
255 continue;
256 Str << "Found foldable producer:\n ";
257 I.second.Instr->dump(Func);
258 Str << "\n";
259 }
260}
261
262template <class Machine>
263void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
264 FoldingInfo.init(Node);
265 FoldingInfo.dump(Func);
266}
267
268template <class Machine>
269TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
John Porto5aeed952015-07-21 13:39:09 -0700270 : TargetLowering(Func) {
John Porto7e93c622015-06-23 10:58:57 -0700271 static_assert(
272 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
273 (TargetInstructionSet::X86InstructionSet_End -
274 TargetInstructionSet::X86InstructionSet_Begin),
275 "Traits::InstructionSet range different from TargetInstructionSet");
276 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
277 TargetInstructionSet::BaseInstructionSet) {
278 InstructionSet = static_cast<typename Traits::InstructionSet>(
279 (Func->getContext()->getFlags().getTargetInstructionSet() -
280 TargetInstructionSet::X86InstructionSet_Begin) +
281 Traits::InstructionSet::Begin);
282 }
John Porto921856d2015-07-07 11:56:26 -0700283 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
284 // initialize in some sort of static initializer for the class.
John Porto5d0acff2015-06-30 15:29:21 -0700285 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
286 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
287 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
288 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
289 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
290 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
John Porto921856d2015-07-07 11:56:26 -0700291
292 Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
293 &FloatRegisters, &VectorRegisters, &ScratchRegs);
294
John Porto7e93c622015-06-23 10:58:57 -0700295 TypeToRegisterSet[IceType_void] = InvalidRegisters;
296 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
297 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
298 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
299 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
300 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
301 TypeToRegisterSet[IceType_f32] = FloatRegisters;
302 TypeToRegisterSet[IceType_f64] = FloatRegisters;
303 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
304 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
305 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
307 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
308 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
309 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
310}
311
312template <class Machine> void TargetX86Base<Machine>::translateO2() {
313 TimerMarker T(TimerStack::TT_O2, Func);
314
315 if (!Ctx->getFlags().getPhiEdgeSplit()) {
316 // Lower Phi instructions.
317 Func->placePhiLoads();
318 if (Func->hasError())
319 return;
320 Func->placePhiStores();
321 if (Func->hasError())
322 return;
323 Func->deletePhis();
324 if (Func->hasError())
325 return;
326 Func->dump("After Phi lowering");
327 }
328
329 // Address mode optimization.
330 Func->getVMetadata()->init(VMK_SingleDefs);
331 Func->doAddressOpt();
332
333 // Find read-modify-write opportunities. Do this after address mode
334 // optimization so that doAddressOpt() doesn't need to be applied to RMW
335 // instructions as well.
336 findRMW();
337 Func->dump("After RMW transform");
338
339 // Argument lowering
340 Func->doArgLowering();
341
John Porto921856d2015-07-07 11:56:26 -0700342 // Target lowering. This requires liveness analysis for some parts of the
343 // lowering decisions, such as compare/branch fusing. If non-lightweight
344 // liveness analysis is used, the instructions need to be renumbered first
345 // TODO: This renumbering should only be necessary if we're actually
346 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700347 Func->renumberInstructions();
348 if (Func->hasError())
349 return;
350
John Porto921856d2015-07-07 11:56:26 -0700351 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
352 // livenessLightweight(). However, for some reason that slows down the rest
353 // of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700354 Func->liveness(Liveness_Basic);
355 if (Func->hasError())
356 return;
357 Func->dump("After x86 address mode opt");
358
359 // Disable constant blinding or pooling for load optimization.
360 {
361 BoolFlagSaver B(RandomizationPoolingPaused, true);
362 doLoadOpt();
363 }
364 Func->genCode();
365 if (Func->hasError())
366 return;
367 Func->dump("After x86 codegen");
368
John Porto921856d2015-07-07 11:56:26 -0700369 // Register allocation. This requires instruction renumbering and full
370 // liveness analysis.
John Porto7e93c622015-06-23 10:58:57 -0700371 Func->renumberInstructions();
372 if (Func->hasError())
373 return;
374 Func->liveness(Liveness_Intervals);
375 if (Func->hasError())
376 return;
John Porto921856d2015-07-07 11:56:26 -0700377 // Validate the live range computations. The expensive validation call is
378 // deliberately only made when assertions are enabled.
John Porto7e93c622015-06-23 10:58:57 -0700379 assert(Func->validateLiveness());
John Porto921856d2015-07-07 11:56:26 -0700380 // The post-codegen dump is done here, after liveness analysis and associated
381 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700382 Func->dump("After initial x8632 codegen");
383 Func->getVMetadata()->init(VMK_All);
384 regAlloc(RAK_Global);
385 if (Func->hasError())
386 return;
387 Func->dump("After linear scan regalloc");
388
389 if (Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700390 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700391 Func->dump("After advanced Phi lowering");
392 }
393
394 // Stack frame mapping.
395 Func->genFrame();
396 if (Func->hasError())
397 return;
398 Func->dump("After stack frame mapping");
399
400 Func->contractEmptyNodes();
401 Func->reorderNodes();
402
Qining Lu969f6a32015-07-31 09:58:34 -0700403 // Shuffle basic block order if -reorder-basic-blocks is enabled.
404 Func->shuffleNodes();
405
John Porto921856d2015-07-07 11:56:26 -0700406 // Branch optimization. This needs to be done just before code emission. In
407 // particular, no transformations that insert or reorder CfgNodes should be
408 // done after branch optimization. We go ahead and do it before nop insertion
409 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700410 Func->doBranchOpt();
411 Func->dump("After branch optimization");
412
Qining Lu969f6a32015-07-31 09:58:34 -0700413 // Nop insertion if -nop-insertion is enabled.
414 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700415
416 // Mark nodes that require sandbox alignment
417 if (Ctx->getFlags().getUseSandboxing())
418 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700419}
420
421template <class Machine> void TargetX86Base<Machine>::translateOm1() {
422 TimerMarker T(TimerStack::TT_Om1, Func);
423
424 Func->placePhiLoads();
425 if (Func->hasError())
426 return;
427 Func->placePhiStores();
428 if (Func->hasError())
429 return;
430 Func->deletePhis();
431 if (Func->hasError())
432 return;
433 Func->dump("After Phi lowering");
434
435 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700436 Func->genCode();
437 if (Func->hasError())
438 return;
439 Func->dump("After initial x8632 codegen");
440
441 regAlloc(RAK_InfOnly);
442 if (Func->hasError())
443 return;
444 Func->dump("After regalloc of infinite-weight variables");
445
446 Func->genFrame();
447 if (Func->hasError())
448 return;
449 Func->dump("After stack frame mapping");
450
Qining Lu969f6a32015-07-31 09:58:34 -0700451 // Shuffle basic block order if -reorder-basic-blocks is enabled.
452 Func->shuffleNodes();
453
454 // Nop insertion if -nop-insertion is enabled.
455 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700456
457 // Mark nodes that require sandbox alignment
458 if (Ctx->getFlags().getUseSandboxing())
459 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700460}
461
John Porto5aeed952015-07-21 13:39:09 -0700462inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700463 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700464 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700465 if (isVectorType(Ty))
466 return false;
467 bool isI64 = Ty == IceType_i64;
468
469 switch (Arith->getOp()) {
470 // Not handled for lack of simple lowering:
471 // shift on i64
472 // mul, udiv, urem, sdiv, srem, frem
473 // Not handled for lack of RMW instructions:
474 // fadd, fsub, fmul, fdiv (also vector types)
475 default:
476 return false;
477 case InstArithmetic::Add:
478 case InstArithmetic::Sub:
479 case InstArithmetic::And:
480 case InstArithmetic::Or:
481 case InstArithmetic::Xor:
482 return true;
483 case InstArithmetic::Shl:
484 case InstArithmetic::Lshr:
485 case InstArithmetic::Ashr:
486 return false; // TODO(stichnot): implement
487 return !isI64;
488 }
489}
490
John Porto921856d2015-07-07 11:56:26 -0700491template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700492bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
493 if (A == B)
494 return true;
John Porto921856d2015-07-07 11:56:26 -0700495 if (auto *MemA = llvm::dyn_cast<
496 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
497 if (auto *MemB = llvm::dyn_cast<
498 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
John Porto7e93c622015-06-23 10:58:57 -0700499 return MemA->getBase() == MemB->getBase() &&
500 MemA->getOffset() == MemB->getOffset() &&
501 MemA->getIndex() == MemB->getIndex() &&
502 MemA->getShift() == MemB->getShift() &&
503 MemA->getSegmentRegister() == MemB->getSegmentRegister();
504 }
505 }
506 return false;
507}
508
509template <class Machine> void TargetX86Base<Machine>::findRMW() {
510 Func->dump("Before RMW");
511 OstreamLocker L(Func->getContext());
512 Ostream &Str = Func->getContext()->getStrDump();
513 for (CfgNode *Node : Func->getNodes()) {
514 // Walk through the instructions, considering each sequence of 3
515 // instructions, and look for the particular RMW pattern. Note that this
516 // search can be "broken" (false negatives) if there are intervening deleted
517 // instructions, or intervening instructions that could be safely moved out
518 // of the way to reveal an RMW pattern.
519 auto E = Node->getInsts().end();
520 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
521 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
522 // Make I3 skip over deleted instructions.
523 while (I3 != E && I3->isDeleted())
524 ++I3;
525 if (I1 == E || I2 == E || I3 == E)
526 continue;
527 assert(!I1->isDeleted());
528 assert(!I2->isDeleted());
529 assert(!I3->isDeleted());
530 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
531 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
532 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
533 // Look for:
534 // a = Load addr
535 // b = <op> a, other
536 // Store b, addr
537 // Change to:
538 // a = Load addr
539 // b = <op> a, other
540 // x = FakeDef
541 // RMW <op>, addr, other, x
542 // b = Store b, addr, x
543 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
544 // called on the updated Store instruction, to avoid liveness
545 // problems later.
546 //
547 // With this transformation, the Store instruction acquires a Dest
548 // variable and is now subject to dead code elimination if there are
549 // no more uses of "b". Variable "x" is a beacon for determining
550 // whether the Store instruction gets dead-code eliminated. If the
551 // Store instruction is eliminated, then it must be the case that
552 // the RMW instruction ends x's live range, and therefore the RMW
553 // instruction will be retained and later lowered. On the other
554 // hand, if the RMW instruction does not end x's live range, then
555 // the Store instruction must still be present, and therefore the
556 // RMW instruction is ignored during lowering because it is
557 // redundant with the Store instruction.
558 //
559 // Note that if "a" has further uses, the RMW transformation may
560 // still trigger, resulting in two loads and one store, which is
561 // worse than the original one load and one store. However, this is
562 // probably rare, and caching probably keeps it just as fast.
John Porto921856d2015-07-07 11:56:26 -0700563 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
564 Store->getAddr()))
John Porto7e93c622015-06-23 10:58:57 -0700565 continue;
566 Operand *ArithSrcFromLoad = Arith->getSrc(0);
567 Operand *ArithSrcOther = Arith->getSrc(1);
568 if (ArithSrcFromLoad != Load->getDest()) {
569 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
570 continue;
571 std::swap(ArithSrcFromLoad, ArithSrcOther);
572 }
573 if (Arith->getDest() != Store->getData())
574 continue;
575 if (!canRMW(Arith))
576 continue;
577 if (Func->isVerbose(IceV_RMW)) {
578 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
579 Load->dump(Func);
580 Str << "\n ";
581 Arith->dump(Func);
582 Str << "\n ";
583 Store->dump(Func);
584 Str << "\n";
585 }
John Porto5aeed952015-07-21 13:39:09 -0700586 Variable *Beacon = Func->makeVariable(IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -0700587 Beacon->setWeight(0);
588 Store->setRmwBeacon(Beacon);
589 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
590 Node->getInsts().insert(I3, BeaconDef);
John Porto921856d2015-07-07 11:56:26 -0700591 auto *RMW = Traits::Insts::FakeRMW::create(
John Porto7e93c622015-06-23 10:58:57 -0700592 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
593 Node->getInsts().insert(I3, RMW);
594 }
595 }
596 }
597 }
598 }
599}
600
601// Converts a ConstantInteger32 operand into its constant value, or
602// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700603inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
John Porto7e93c622015-06-23 10:58:57 -0700604 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
605 return Integer->getValue();
606 return Intrinsics::MemoryOrderInvalid;
607}
608
Andrew Scull9612d322015-07-06 14:53:25 -0700609/// Determines whether the dest of a Load instruction can be folded
610/// into one of the src operands of a 2-operand instruction. This is
611/// true as long as the load dest matches exactly one of the binary
612/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
613/// the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700614inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
615 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700616 if (Src0 == LoadDest && Src1 != LoadDest) {
617 Src0 = LoadSrc;
618 return true;
619 }
620 if (Src0 != LoadDest && Src1 == LoadDest) {
621 Src1 = LoadSrc;
622 return true;
623 }
624 return false;
625}
626
627template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
628 for (CfgNode *Node : Func->getNodes()) {
629 Context.init(Node);
630 while (!Context.atEnd()) {
631 Variable *LoadDest = nullptr;
632 Operand *LoadSrc = nullptr;
633 Inst *CurInst = Context.getCur();
634 Inst *Next = Context.getNextInst();
635 // Determine whether the current instruction is a Load
636 // instruction or equivalent.
637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
638 // An InstLoad always qualifies.
639 LoadDest = Load->getDest();
640 const bool DoLegalize = false;
641 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
642 LoadDest->getType(), DoLegalize);
643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
644 // An AtomicLoad intrinsic qualifies as long as it has a valid
645 // memory ordering, and can be implemented in a single
646 // instruction (i.e., not i64).
647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
648 if (ID == Intrinsics::AtomicLoad &&
649 Intrin->getDest()->getType() != IceType_i64 &&
650 Intrinsics::isMemoryOrderValid(
651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
652 LoadDest = Intrin->getDest();
653 const bool DoLegalize = false;
654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
655 DoLegalize);
656 }
657 }
658 // A Load instruction can be folded into the following
659 // instruction only if the following instruction ends the Load's
660 // Dest variable's live range.
661 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
662 assert(LoadSrc);
663 Inst *NewInst = nullptr;
664 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
665 Operand *Src0 = Arith->getSrc(0);
666 Operand *Src1 = Arith->getSrc(1);
667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
668 NewInst = InstArithmetic::create(Func, Arith->getOp(),
669 Arith->getDest(), Src0, Src1);
670 }
671 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
672 Operand *Src0 = Icmp->getSrc(0);
673 Operand *Src1 = Icmp->getSrc(1);
674 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
675 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
676 Icmp->getDest(), Src0, Src1);
677 }
678 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
679 Operand *Src0 = Fcmp->getSrc(0);
680 Operand *Src1 = Fcmp->getSrc(1);
681 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
682 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
683 Fcmp->getDest(), Src0, Src1);
684 }
685 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
686 Operand *Src0 = Select->getTrueOperand();
687 Operand *Src1 = Select->getFalseOperand();
688 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
689 NewInst = InstSelect::create(Func, Select->getDest(),
690 Select->getCondition(), Src0, Src1);
691 }
692 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
693 // The load dest can always be folded into a Cast
694 // instruction.
695 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
696 if (Src0 == LoadDest) {
697 NewInst = InstCast::create(Func, Cast->getCastKind(),
698 Cast->getDest(), LoadSrc);
699 }
700 }
701 if (NewInst) {
702 CurInst->setDeleted();
703 Next->setDeleted();
704 Context.insert(NewInst);
705 // Update NewInst->LiveRangesEnded so that target lowering
706 // may benefit. Also update NewInst->HasSideEffects.
707 NewInst->spliceLivenessInfo(Next, CurInst);
708 }
709 }
710 Context.advanceCur();
711 Context.advanceNext();
712 }
713 }
714 Func->dump("After load optimization");
715}
716
717template <class Machine>
718bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
John Porto921856d2015-07-07 11:56:26 -0700719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700720 return Br->optimizeBranch(NextNode);
721 }
722 return false;
723}
724
725template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700726Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
727 if (Ty == IceType_void)
728 Ty = IceType_i32;
729 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
John Porto7e93c622015-06-23 10:58:57 -0700731 assert(RegNum < PhysicalRegisters[Ty].size());
732 Variable *Reg = PhysicalRegisters[Ty][RegNum];
733 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700734 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700735 Reg->setRegNum(RegNum);
736 PhysicalRegisters[Ty][RegNum] = Reg;
737 // Specially mark esp as an "argument" so that it is considered
738 // live upon function entry.
John Porto5d0acff2015-06-30 15:29:21 -0700739 if (RegNum == Traits::RegisterSet::Reg_esp) {
John Porto7e93c622015-06-23 10:58:57 -0700740 Func->addImplicitArg(Reg);
741 Reg->setIgnoreLiveness();
742 }
743 }
744 return Reg;
745}
746
747template <class Machine>
748IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
John Porto921856d2015-07-07 11:56:26 -0700749 return Traits::getRegName(RegNum, Ty);
John Porto7e93c622015-06-23 10:58:57 -0700750}
751
752template <class Machine>
753void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
754 Ostream &Str = Ctx->getStrEmit();
755 if (Var->hasReg()) {
756 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
757 return;
758 }
759 if (Var->getWeight().isInf()) {
760 llvm_unreachable("Infinite-weight Variable has no register assigned");
761 }
762 int32_t Offset = Var->getStackOffset();
763 if (!hasFramePointer())
764 Offset += getStackAdjustment();
765 if (Offset)
766 Str << Offset;
767 const Type FrameSPTy = IceType_i32;
768 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
769}
770
771template <class Machine>
John Porto5d0acff2015-06-30 15:29:21 -0700772typename TargetX86Base<Machine>::Traits::Address
John Porto7e93c622015-06-23 10:58:57 -0700773TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
774 if (Var->hasReg())
775 llvm_unreachable("Stack Variable has a register assigned");
776 if (Var->getWeight().isInf()) {
777 llvm_unreachable("Infinite-weight Variable has no register assigned");
778 }
779 int32_t Offset = Var->getStackOffset();
780 if (!hasFramePointer())
781 Offset += getStackAdjustment();
John Porto5d0acff2015-06-30 15:29:21 -0700782 return typename Traits::Address(
783 Traits::RegisterSet::getEncodedGPR(getFrameOrStackReg()), Offset);
John Porto7e93c622015-06-23 10:58:57 -0700784}
785
786template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
787 VarList &Args = Func->getArgs();
788 // The first four arguments of vector type, regardless of their
789 // position relative to the other arguments in the argument list, are
790 // passed in registers xmm0 - xmm3.
791 unsigned NumXmmArgs = 0;
792
793 Context.init(Func->getEntryNode());
794 Context.setInsertPoint(Context.getCur());
795
796 for (SizeT I = 0, E = Args.size();
797 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
798 Variable *Arg = Args[I];
799 Type Ty = Arg->getType();
800 if (!isVectorType(Ty))
801 continue;
802 // Replace Arg in the argument list with the home register. Then
803 // generate an instruction in the prolog to copy the home register
804 // to the assigned location of Arg.
John Porto5d0acff2015-06-30 15:29:21 -0700805 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
John Porto7e93c622015-06-23 10:58:57 -0700806 ++NumXmmArgs;
John Porto5aeed952015-07-21 13:39:09 -0700807 Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700808 if (BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -0700809 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
810 RegisterArg->setRegNum(RegNum);
811 RegisterArg->setIsArg();
812 Arg->setIsArg(false);
813
814 Args[I] = RegisterArg;
815 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
816 }
817}
818
Andrew Scull9612d322015-07-06 14:53:25 -0700819/// Helper function for addProlog().
820///
821/// This assumes Arg is an argument passed on the stack. This sets the
822/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
823/// width. For an I64 arg that has been split into Lo and Hi components,
824/// it calls itself recursively on the components, taking care to handle
825/// Lo first because of the little-endian architecture. Lastly, this
826/// function generates an instruction to copy Arg into its assigned
827/// register if applicable.
John Porto7e93c622015-06-23 10:58:57 -0700828template <class Machine>
829void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
830 Variable *FramePtr,
831 size_t BasicFrameOffset,
832 size_t &InArgsSizeBytes) {
833 Variable *Lo = Arg->getLo();
834 Variable *Hi = Arg->getHi();
835 Type Ty = Arg->getType();
836 if (Lo && Hi && Ty == IceType_i64) {
837 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
838 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
839 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
840 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
841 return;
842 }
843 if (isVectorType(Ty)) {
844 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
845 }
846 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
847 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
848 if (Arg->hasReg()) {
849 assert(Ty != IceType_i64);
John Porto921856d2015-07-07 11:56:26 -0700850 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700851 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
852 if (isVectorType(Arg->getType())) {
853 _movp(Arg, Mem);
854 } else {
855 _mov(Arg, Mem);
856 }
John Porto921856d2015-07-07 11:56:26 -0700857 // This argument-copying instruction uses an explicit Traits::X86OperandMem
858 // operand instead of a Variable, so its fill-from-stack operation has to be
859 // tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -0700860 Ctx->statsUpdateFills();
861 }
862}
863
864template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
865 return IceType_i32;
866}
867
868template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
869 // Stack frame layout:
870 //
871 // +------------------------+
872 // | 1. return address |
873 // +------------------------+
874 // | 2. preserved registers |
875 // +------------------------+
876 // | 3. padding |
877 // +------------------------+
878 // | 4. global spill area |
879 // +------------------------+
880 // | 5. padding |
881 // +------------------------+
882 // | 6. local spill area |
883 // +------------------------+
884 // | 7. padding |
885 // +------------------------+
886 // | 8. allocas |
887 // +------------------------+
888 //
889 // The following variables record the size in bytes of the given areas:
890 // * X86_RET_IP_SIZE_BYTES: area 1
891 // * PreservedRegsSizeBytes: area 2
892 // * SpillAreaPaddingBytes: area 3
893 // * GlobalsSize: area 4
894 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
895 // * LocalsSpillAreaSize: area 6
896 // * SpillAreaSizeBytes: areas 3 - 7
897
898 // Determine stack frame offsets for each Variable without a
899 // register assignment. This can be done as one variable per stack
900 // slot. Or, do coalescing by running the register allocator again
901 // with an infinite set of registers (as a side effect, this gives
902 // variables a second chance at physical register assignment).
903 //
904 // A middle ground approach is to leverage sparsity and allocate one
905 // block of space on the frame for globals (variables with
906 // multi-block lifetime), and one block to share for locals
907 // (single-block lifetime).
908
909 Context.init(Node);
910 Context.setInsertPoint(Context.getCur());
911
912 llvm::SmallBitVector CalleeSaves =
913 getRegisterSet(RegSet_CalleeSave, RegSet_None);
914 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
915 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
916 size_t GlobalsSize = 0;
917 // If there is a separate locals area, this represents that area.
918 // Otherwise it counts any variable not counted by GlobalsSize.
919 SpillAreaSizeBytes = 0;
920 // If there is a separate locals area, this specifies the alignment
921 // for it.
922 uint32_t LocalsSlotsAlignmentBytes = 0;
923 // The entire spill locations area gets aligned to largest natural
924 // alignment of the variables that have a spill slot.
925 uint32_t SpillAreaAlignmentBytes = 0;
926 // A spill slot linked to a variable with a stack slot should reuse
927 // that stack slot.
928 std::function<bool(Variable *)> TargetVarHook =
929 [&VariablesLinkedToSpillSlots](Variable *Var) {
John Porto921856d2015-07-07 11:56:26 -0700930 if (auto *SpillVar =
931 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
John Porto7e93c622015-06-23 10:58:57 -0700932 assert(Var->getWeight().isZero());
933 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
934 VariablesLinkedToSpillSlots.push_back(Var);
935 return true;
936 }
937 }
938 return false;
939 };
940
941 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
942 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
943 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
944 &LocalsSlotsAlignmentBytes, TargetVarHook);
945 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
946 SpillAreaSizeBytes += GlobalsSize;
947
948 // Add push instructions for preserved registers.
949 uint32_t NumCallee = 0;
950 size_t PreservedRegsSizeBytes = 0;
951 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
952 if (CalleeSaves[i] && RegsUsed[i]) {
953 ++NumCallee;
954 PreservedRegsSizeBytes += 4;
955 _push(getPhysicalRegister(i));
956 }
957 }
958 Ctx->statsUpdateRegistersSaved(NumCallee);
959
960 // Generate "push ebp; mov ebp, esp"
961 if (IsEbpBasedFrame) {
962 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
963 .count() == 0);
964 PreservedRegsSizeBytes += 4;
John Porto5d0acff2015-06-30 15:29:21 -0700965 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
966 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -0700967 _push(ebp);
968 _mov(ebp, esp);
969 // Keep ebp live for late-stage liveness analysis
970 // (e.g. asm-verbose mode).
971 Context.insert(InstFakeUse::create(Func, ebp));
972 }
973
974 // Align the variables area. SpillAreaPaddingBytes is the size of
975 // the region after the preserved registers and before the spill areas.
976 // LocalsSlotsPaddingBytes is the amount of padding between the globals
977 // and locals area if they are separate.
978 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
979 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
980 uint32_t SpillAreaPaddingBytes = 0;
981 uint32_t LocalsSlotsPaddingBytes = 0;
982 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
983 SpillAreaAlignmentBytes, GlobalsSize,
984 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
985 &LocalsSlotsPaddingBytes);
986 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
987 uint32_t GlobalsAndSubsequentPaddingSize =
988 GlobalsSize + LocalsSlotsPaddingBytes;
989
990 // Align esp if necessary.
991 if (NeedsStackAlignment) {
992 uint32_t StackOffset =
993 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
994 uint32_t StackSize =
995 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
996 SpillAreaSizeBytes = StackSize - StackOffset;
997 }
998
999 // Generate "sub esp, SpillAreaSizeBytes"
1000 if (SpillAreaSizeBytes)
John Porto5d0acff2015-06-30 15:29:21 -07001001 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
John Porto7e93c622015-06-23 10:58:57 -07001002 Ctx->getConstantInt32(SpillAreaSizeBytes));
1003 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1004
1005 resetStackAdjustment();
1006
1007 // Fill in stack offsets for stack args, and copy args into registers
1008 // for those that were register-allocated. Args are pushed right to
1009 // left, so Arg[0] is closest to the stack/frame pointer.
1010 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1011 size_t BasicFrameOffset =
1012 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1013 if (!IsEbpBasedFrame)
1014 BasicFrameOffset += SpillAreaSizeBytes;
1015
1016 const VarList &Args = Func->getArgs();
1017 size_t InArgsSizeBytes = 0;
1018 unsigned NumXmmArgs = 0;
1019 for (Variable *Arg : Args) {
1020 // Skip arguments passed in registers.
1021 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
1022 ++NumXmmArgs;
1023 continue;
1024 }
1025 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1026 }
1027
1028 // Fill in stack offsets for locals.
1029 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1030 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1031 IsEbpBasedFrame);
1032 // Assign stack offsets to variables that have been linked to spilled
1033 // variables.
1034 for (Variable *Var : VariablesLinkedToSpillSlots) {
John Porto921856d2015-07-07 11:56:26 -07001035 Variable *Linked =
1036 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
John Porto7e93c622015-06-23 10:58:57 -07001037 Var->setStackOffset(Linked->getStackOffset());
1038 }
1039 this->HasComputedFrame = true;
1040
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001041 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
John Porto7e93c622015-06-23 10:58:57 -07001042 OstreamLocker L(Func->getContext());
1043 Ostream &Str = Func->getContext()->getStrDump();
1044
1045 Str << "Stack layout:\n";
1046 uint32_t EspAdjustmentPaddingSize =
1047 SpillAreaSizeBytes - LocalsSpillAreaSize -
1048 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1049 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1050 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1051 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1052 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1053 << " globals spill area = " << GlobalsSize << " bytes\n"
1054 << " globals-locals spill areas intermediate padding = "
1055 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1056 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1057 << " esp alignment padding = " << EspAdjustmentPaddingSize
1058 << " bytes\n";
1059
1060 Str << "Stack details:\n"
1061 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1062 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1063 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1064 << " bytes\n"
1065 << " is ebp based = " << IsEbpBasedFrame << "\n";
1066 }
1067}
1068
1069template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
1070 InstList &Insts = Node->getInsts();
1071 InstList::reverse_iterator RI, E;
1072 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
John Porto921856d2015-07-07 11:56:26 -07001073 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
John Porto7e93c622015-06-23 10:58:57 -07001074 break;
1075 }
1076 if (RI == E)
1077 return;
1078
1079 // Convert the reverse_iterator position into its corresponding
1080 // (forward) iterator position.
1081 InstList::iterator InsertPoint = RI.base();
1082 --InsertPoint;
1083 Context.init(Node);
1084 Context.setInsertPoint(InsertPoint);
1085
John Porto5d0acff2015-06-30 15:29:21 -07001086 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07001087 if (IsEbpBasedFrame) {
John Porto5d0acff2015-06-30 15:29:21 -07001088 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
John Porto7e93c622015-06-23 10:58:57 -07001089 // For late-stage liveness analysis (e.g. asm-verbose mode),
1090 // adding a fake use of esp before the assignment of esp=ebp keeps
1091 // previous esp adjustments from being dead-code eliminated.
1092 Context.insert(InstFakeUse::create(Func, esp));
1093 _mov(esp, ebp);
1094 _pop(ebp);
1095 } else {
1096 // add esp, SpillAreaSizeBytes
1097 if (SpillAreaSizeBytes)
1098 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
1099 }
1100
1101 // Add pop instructions for preserved registers.
1102 llvm::SmallBitVector CalleeSaves =
1103 getRegisterSet(RegSet_CalleeSave, RegSet_None);
1104 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1105 SizeT j = CalleeSaves.size() - i - 1;
John Porto5d0acff2015-06-30 15:29:21 -07001106 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
John Porto7e93c622015-06-23 10:58:57 -07001107 continue;
1108 if (CalleeSaves[j] && RegsUsed[j]) {
1109 _pop(getPhysicalRegister(j));
1110 }
1111 }
1112
1113 if (!Ctx->getFlags().getUseSandboxing())
1114 return;
1115 // Change the original ret instruction into a sandboxed return sequence.
1116 // t:ecx = pop
1117 // bundle_lock
1118 // and t, ~31
1119 // jmp *t
1120 // bundle_unlock
1121 // FakeUse <original_ret_operand>
John Porto5d0acff2015-06-30 15:29:21 -07001122 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001123 _pop(T_ecx);
Andrew Scull86df4e92015-07-30 13:54:44 -07001124 lowerIndirectJump(T_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001125 if (RI->getSrcSize()) {
1126 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1127 Context.insert(InstFakeUse::create(Func, RetValue));
1128 }
1129 RI->setDeleted();
1130}
1131
1132template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
1133 switch (Var->getType()) {
1134 default:
1135 return;
1136 case IceType_i64:
1137 // TODO: Only consider F64 if we need to push each half when
1138 // passing as an argument to a function call. Note that each half
1139 // is still typed as I32.
1140 case IceType_f64:
1141 break;
1142 }
1143 Variable *Lo = Var->getLo();
1144 Variable *Hi = Var->getHi();
1145 if (Lo) {
1146 assert(Hi);
1147 return;
1148 }
1149 assert(Hi == nullptr);
John Porto5aeed952015-07-21 13:39:09 -07001150 Lo = Func->makeVariable(IceType_i32);
1151 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001152 if (BuildDefs::dump()) {
John Porto7e93c622015-06-23 10:58:57 -07001153 Lo->setName(Func, Var->getName(Func) + "__lo");
1154 Hi->setName(Func, Var->getName(Func) + "__hi");
1155 }
1156 Var->setLoHi(Lo, Hi);
1157 if (Var->getIsArg()) {
1158 Lo->setIsArg();
1159 Hi->setIsArg();
1160 }
1161}
1162
1163template <class Machine>
1164Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
1165 assert(Operand->getType() == IceType_i64 ||
1166 Operand->getType() == IceType_f64);
1167 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1168 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001169 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001170 split64(Var);
1171 return Var->getLo();
1172 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001173 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1174 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001175 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -07001176 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001177 return legalize(ConstInt);
1178 }
John Porto921856d2015-07-07 11:56:26 -07001179 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
1180 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001181 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1182 Mem->getShift(), Mem->getSegmentRegister());
1183 // Test if we should randomize or pool the offset, if so randomize it or
1184 // pool it then create mem operand with the blinded/pooled constant.
1185 // Otherwise, return the mem operand as ordinary mem operand.
1186 return legalize(MemOperand);
1187 }
1188 llvm_unreachable("Unsupported operand type");
1189 return nullptr;
1190}
1191
1192template <class Machine>
1193Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
1194 assert(Operand->getType() == IceType_i64 ||
1195 Operand->getType() == IceType_f64);
1196 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1197 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001198 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001199 split64(Var);
1200 return Var->getHi();
1201 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001202 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1203 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001204 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -07001205 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001206 return legalize(ConstInt);
1207 }
John Porto921856d2015-07-07 11:56:26 -07001208 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001209 Constant *Offset = Mem->getOffset();
1210 if (Offset == nullptr) {
1211 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -07001212 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001213 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -07001214 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001215 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1216 Offset =
1217 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1218 SymOffset->getSuppressMangling());
1219 }
John Porto921856d2015-07-07 11:56:26 -07001220 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001221 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
1222 Mem->getShift(), Mem->getSegmentRegister());
1223 // Test if the Offset is an eligible i32 constants for randomization and
1224 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1225 // operand.
1226 return legalize(MemOperand);
1227 }
1228 llvm_unreachable("Unsupported operand type");
1229 return nullptr;
1230}
1231
1232template <class Machine>
1233llvm::SmallBitVector
1234TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1235 RegSetMask Exclude) const {
John Porto921856d2015-07-07 11:56:26 -07001236 return Traits::getRegisterSet(Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -07001237}
1238
1239template <class Machine>
1240void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1241 IsEbpBasedFrame = true;
1242 // Conservatively require the stack to be aligned. Some stack
1243 // adjustment operations implemented below assume that the stack is
1244 // aligned before the alloca. All the alloca code ensures that the
1245 // stack alignment is preserved after the alloca. The stack alignment
1246 // restriction can be relaxed in some cases.
1247 NeedsStackAlignment = true;
1248
1249 // TODO(stichnot): minimize the number of adjustments of esp, etc.
John Porto5d0acff2015-06-30 15:29:21 -07001250 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07001251 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1252 Variable *Dest = Inst->getDest();
1253 uint32_t AlignmentParam = Inst->getAlignInBytes();
1254 // For default align=0, set it to the real value 1, to avoid any
1255 // bit-manipulation problems below.
1256 AlignmentParam = std::max(AlignmentParam, 1u);
1257
1258 // LLVM enforces power of 2 alignment.
1259 assert(llvm::isPowerOf2_32(AlignmentParam));
1260 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1261
1262 uint32_t Alignment =
1263 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
1264 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
1265 _and(esp, Ctx->getConstantInt32(-Alignment));
1266 }
1267 if (const auto *ConstantTotalSize =
1268 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1269 uint32_t Value = ConstantTotalSize->getValue();
1270 Value = Utils::applyAlignment(Value, Alignment);
1271 _sub(esp, Ctx->getConstantInt32(Value));
1272 } else {
1273 // Non-constant sizes need to be adjusted to the next highest
1274 // multiple of the required alignment at runtime.
1275 Variable *T = makeReg(IceType_i32);
1276 _mov(T, TotalSize);
1277 _add(T, Ctx->getConstantInt32(Alignment - 1));
1278 _and(T, Ctx->getConstantInt32(-Alignment));
1279 _sub(esp, T);
1280 }
1281 _mov(Dest, esp);
1282}
1283
Andrew Scull9612d322015-07-06 14:53:25 -07001284/// Strength-reduce scalar integer multiplication by a constant (for
1285/// i32 or narrower) for certain constants. The lea instruction can be
1286/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1287/// to multiply by powers of 2. These can be combined such that
1288/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1289/// combined with left-shifting by 2.
John Porto7e93c622015-06-23 10:58:57 -07001290template <class Machine>
1291bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1292 int32_t Src1) {
1293 // Disable this optimization for Om1 and O0, just to keep things
1294 // simple there.
1295 if (Ctx->getFlags().getOptLevel() < Opt_1)
1296 return false;
1297 Type Ty = Dest->getType();
1298 Variable *T = nullptr;
1299 if (Src1 == -1) {
1300 _mov(T, Src0);
1301 _neg(T);
1302 _mov(Dest, T);
1303 return true;
1304 }
1305 if (Src1 == 0) {
1306 _mov(Dest, Ctx->getConstantZero(Ty));
1307 return true;
1308 }
1309 if (Src1 == 1) {
1310 _mov(T, Src0);
1311 _mov(Dest, T);
1312 return true;
1313 }
1314 // Don't bother with the edge case where Src1 == MININT.
1315 if (Src1 == -Src1)
1316 return false;
1317 const bool Src1IsNegative = Src1 < 0;
1318 if (Src1IsNegative)
1319 Src1 = -Src1;
1320 uint32_t Count9 = 0;
1321 uint32_t Count5 = 0;
1322 uint32_t Count3 = 0;
1323 uint32_t Count2 = 0;
1324 uint32_t CountOps = 0;
1325 while (Src1 > 1) {
1326 if (Src1 % 9 == 0) {
1327 ++CountOps;
1328 ++Count9;
1329 Src1 /= 9;
1330 } else if (Src1 % 5 == 0) {
1331 ++CountOps;
1332 ++Count5;
1333 Src1 /= 5;
1334 } else if (Src1 % 3 == 0) {
1335 ++CountOps;
1336 ++Count3;
1337 Src1 /= 3;
1338 } else if (Src1 % 2 == 0) {
1339 if (Count2 == 0)
1340 ++CountOps;
1341 ++Count2;
1342 Src1 /= 2;
1343 } else {
1344 return false;
1345 }
1346 }
1347 // Lea optimization only works for i16 and i32 types, not i8.
1348 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1349 return false;
1350 // Limit the number of lea/shl operations for a single multiply, to
1351 // a somewhat arbitrary choice of 3.
1352 const uint32_t MaxOpsForOptimizedMul = 3;
1353 if (CountOps > MaxOpsForOptimizedMul)
1354 return false;
1355 _mov(T, Src0);
1356 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1357 for (uint32_t i = 0; i < Count9; ++i) {
1358 const uint16_t Shift = 3; // log2(9-1)
John Porto921856d2015-07-07 11:56:26 -07001359 _lea(T,
1360 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001361 _set_dest_nonkillable();
1362 }
1363 for (uint32_t i = 0; i < Count5; ++i) {
1364 const uint16_t Shift = 2; // log2(5-1)
John Porto921856d2015-07-07 11:56:26 -07001365 _lea(T,
1366 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001367 _set_dest_nonkillable();
1368 }
1369 for (uint32_t i = 0; i < Count3; ++i) {
1370 const uint16_t Shift = 1; // log2(3-1)
John Porto921856d2015-07-07 11:56:26 -07001371 _lea(T,
1372 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001373 _set_dest_nonkillable();
1374 }
1375 if (Count2) {
1376 _shl(T, Ctx->getConstantInt(Ty, Count2));
1377 }
1378 if (Src1IsNegative)
1379 _neg(T);
1380 _mov(Dest, T);
1381 return true;
1382}
1383
1384template <class Machine>
1385void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1386 Variable *Dest = Inst->getDest();
1387 Operand *Src0 = legalize(Inst->getSrc(0));
1388 Operand *Src1 = legalize(Inst->getSrc(1));
1389 if (Inst->isCommutative()) {
1390 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1391 std::swap(Src0, Src1);
1392 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1393 std::swap(Src0, Src1);
1394 }
1395 if (Dest->getType() == IceType_i64) {
1396 // These helper-call-involved instructions are lowered in this
1397 // separate switch. This is because loOperand() and hiOperand()
1398 // may insert redundant instructions for constant blinding and
1399 // pooling. Such redundant instructions will fail liveness analysis
1400 // under -Om1 setting. And, actually these arguments do not need
1401 // to be processed with loOperand() and hiOperand() to be used.
1402 switch (Inst->getOp()) {
1403 case InstArithmetic::Udiv: {
1404 const SizeT MaxSrcs = 2;
1405 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1406 Call->addArg(Inst->getSrc(0));
1407 Call->addArg(Inst->getSrc(1));
1408 lowerCall(Call);
1409 return;
1410 }
1411 case InstArithmetic::Sdiv: {
1412 const SizeT MaxSrcs = 2;
1413 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1414 Call->addArg(Inst->getSrc(0));
1415 Call->addArg(Inst->getSrc(1));
1416 lowerCall(Call);
1417 return;
1418 }
1419 case InstArithmetic::Urem: {
1420 const SizeT MaxSrcs = 2;
1421 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1422 Call->addArg(Inst->getSrc(0));
1423 Call->addArg(Inst->getSrc(1));
1424 lowerCall(Call);
1425 return;
1426 }
1427 case InstArithmetic::Srem: {
1428 const SizeT MaxSrcs = 2;
1429 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1430 Call->addArg(Inst->getSrc(0));
1431 Call->addArg(Inst->getSrc(1));
1432 lowerCall(Call);
1433 return;
1434 }
1435 default:
1436 break;
1437 }
1438
1439 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1440 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1441 Operand *Src0Lo = loOperand(Src0);
1442 Operand *Src0Hi = hiOperand(Src0);
1443 Operand *Src1Lo = loOperand(Src1);
1444 Operand *Src1Hi = hiOperand(Src1);
1445 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1446 switch (Inst->getOp()) {
1447 case InstArithmetic::_num:
1448 llvm_unreachable("Unknown arithmetic operator");
1449 break;
1450 case InstArithmetic::Add:
1451 _mov(T_Lo, Src0Lo);
1452 _add(T_Lo, Src1Lo);
1453 _mov(DestLo, T_Lo);
1454 _mov(T_Hi, Src0Hi);
1455 _adc(T_Hi, Src1Hi);
1456 _mov(DestHi, T_Hi);
1457 break;
1458 case InstArithmetic::And:
1459 _mov(T_Lo, Src0Lo);
1460 _and(T_Lo, Src1Lo);
1461 _mov(DestLo, T_Lo);
1462 _mov(T_Hi, Src0Hi);
1463 _and(T_Hi, Src1Hi);
1464 _mov(DestHi, T_Hi);
1465 break;
1466 case InstArithmetic::Or:
1467 _mov(T_Lo, Src0Lo);
1468 _or(T_Lo, Src1Lo);
1469 _mov(DestLo, T_Lo);
1470 _mov(T_Hi, Src0Hi);
1471 _or(T_Hi, Src1Hi);
1472 _mov(DestHi, T_Hi);
1473 break;
1474 case InstArithmetic::Xor:
1475 _mov(T_Lo, Src0Lo);
1476 _xor(T_Lo, Src1Lo);
1477 _mov(DestLo, T_Lo);
1478 _mov(T_Hi, Src0Hi);
1479 _xor(T_Hi, Src1Hi);
1480 _mov(DestHi, T_Hi);
1481 break;
1482 case InstArithmetic::Sub:
1483 _mov(T_Lo, Src0Lo);
1484 _sub(T_Lo, Src1Lo);
1485 _mov(DestLo, T_Lo);
1486 _mov(T_Hi, Src0Hi);
1487 _sbb(T_Hi, Src1Hi);
1488 _mov(DestHi, T_Hi);
1489 break;
1490 case InstArithmetic::Mul: {
1491 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001492 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1493 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001494 // gcc does the following:
1495 // a=b*c ==>
1496 // t1 = b.hi; t1 *=(imul) c.lo
1497 // t2 = c.hi; t2 *=(imul) b.lo
1498 // t3:eax = b.lo
1499 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1500 // a.lo = t4.lo
1501 // t4.hi += t1
1502 // t4.hi += t2
1503 // a.hi = t4.hi
1504 // The mul instruction cannot take an immediate operand.
1505 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1506 _mov(T_1, Src0Hi);
1507 _imul(T_1, Src1Lo);
1508 _mov(T_2, Src1Hi);
1509 _imul(T_2, Src0Lo);
John Porto5d0acff2015-06-30 15:29:21 -07001510 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001511 _mul(T_4Lo, T_3, Src1Lo);
1512 // The mul instruction produces two dest variables, edx:eax. We
1513 // create a fake definition of edx to account for this.
1514 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1515 _mov(DestLo, T_4Lo);
1516 _add(T_4Hi, T_1);
1517 _add(T_4Hi, T_2);
1518 _mov(DestHi, T_4Hi);
1519 } break;
1520 case InstArithmetic::Shl: {
1521 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1522 // gcc does the following:
1523 // a=b<<c ==>
1524 // t1:ecx = c.lo & 0xff
1525 // t2 = b.lo
1526 // t3 = b.hi
1527 // t3 = shld t3, t2, t1
1528 // t2 = shl t2, t1
1529 // test t1, 0x20
1530 // je L1
1531 // use(t3)
1532 // t3 = t2
1533 // t2 = 0
1534 // L1:
1535 // a.lo = t2
1536 // a.hi = t3
1537 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1538 Constant *BitTest = Ctx->getConstantInt32(0x20);
1539 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001540 typename Traits::Insts::Label *Label =
1541 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001542 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001543 _mov(T_2, Src0Lo);
1544 _mov(T_3, Src0Hi);
1545 _shld(T_3, T_2, T_1);
1546 _shl(T_2, T_1);
1547 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001548 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001549 // T_2 and T_3 are being assigned again because of the
1550 // intra-block control flow, so we need the _mov_nonkillable
1551 // variant to avoid liveness problems.
1552 _mov_nonkillable(T_3, T_2);
1553 _mov_nonkillable(T_2, Zero);
1554 Context.insert(Label);
1555 _mov(DestLo, T_2);
1556 _mov(DestHi, T_3);
1557 } break;
1558 case InstArithmetic::Lshr: {
1559 // a=b>>c (unsigned) ==>
1560 // t1:ecx = c.lo & 0xff
1561 // t2 = b.lo
1562 // t3 = b.hi
1563 // t2 = shrd t2, t3, t1
1564 // t3 = shr t3, t1
1565 // test t1, 0x20
1566 // je L1
1567 // use(t2)
1568 // t2 = t3
1569 // t3 = 0
1570 // L1:
1571 // a.lo = t2
1572 // a.hi = t3
1573 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1574 Constant *BitTest = Ctx->getConstantInt32(0x20);
1575 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001576 typename Traits::Insts::Label *Label =
1577 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001578 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001579 _mov(T_2, Src0Lo);
1580 _mov(T_3, Src0Hi);
1581 _shrd(T_2, T_3, T_1);
1582 _shr(T_3, T_1);
1583 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001584 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001585 // T_2 and T_3 are being assigned again because of the
1586 // intra-block control flow, so we need the _mov_nonkillable
1587 // variant to avoid liveness problems.
1588 _mov_nonkillable(T_2, T_3);
1589 _mov_nonkillable(T_3, Zero);
1590 Context.insert(Label);
1591 _mov(DestLo, T_2);
1592 _mov(DestHi, T_3);
1593 } break;
1594 case InstArithmetic::Ashr: {
1595 // a=b>>c (signed) ==>
1596 // t1:ecx = c.lo & 0xff
1597 // t2 = b.lo
1598 // t3 = b.hi
1599 // t2 = shrd t2, t3, t1
1600 // t3 = sar t3, t1
1601 // test t1, 0x20
1602 // je L1
1603 // use(t2)
1604 // t2 = t3
1605 // t3 = sar t3, 0x1f
1606 // L1:
1607 // a.lo = t2
1608 // a.hi = t3
1609 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1610 Constant *BitTest = Ctx->getConstantInt32(0x20);
1611 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
John Porto921856d2015-07-07 11:56:26 -07001612 typename Traits::Insts::Label *Label =
1613 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001614 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001615 _mov(T_2, Src0Lo);
1616 _mov(T_3, Src0Hi);
1617 _shrd(T_2, T_3, T_1);
1618 _sar(T_3, T_1);
1619 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001620 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001621 // T_2 and T_3 are being assigned again because of the
1622 // intra-block control flow, so T_2 needs the _mov_nonkillable
1623 // variant to avoid liveness problems. T_3 doesn't need special
1624 // treatment because it is reassigned via _sar instead of _mov.
1625 _mov_nonkillable(T_2, T_3);
1626 _sar(T_3, SignExtend);
1627 Context.insert(Label);
1628 _mov(DestLo, T_2);
1629 _mov(DestHi, T_3);
1630 } break;
1631 case InstArithmetic::Fadd:
1632 case InstArithmetic::Fsub:
1633 case InstArithmetic::Fmul:
1634 case InstArithmetic::Fdiv:
1635 case InstArithmetic::Frem:
1636 llvm_unreachable("FP instruction with i64 type");
1637 break;
1638 case InstArithmetic::Udiv:
1639 case InstArithmetic::Sdiv:
1640 case InstArithmetic::Urem:
1641 case InstArithmetic::Srem:
1642 llvm_unreachable("Call-helper-involved instruction for i64 type \
1643 should have already been handled before");
1644 break;
1645 }
1646 return;
1647 }
1648 if (isVectorType(Dest->getType())) {
1649 // TODO: Trap on integer divide and integer modulo by zero.
1650 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto921856d2015-07-07 11:56:26 -07001651 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001652 Src1 = legalizeToReg(Src1);
John Porto7e93c622015-06-23 10:58:57 -07001653 switch (Inst->getOp()) {
1654 case InstArithmetic::_num:
1655 llvm_unreachable("Unknown arithmetic operator");
1656 break;
1657 case InstArithmetic::Add: {
1658 Variable *T = makeReg(Dest->getType());
1659 _movp(T, Src0);
1660 _padd(T, Src1);
1661 _movp(Dest, T);
1662 } break;
1663 case InstArithmetic::And: {
1664 Variable *T = makeReg(Dest->getType());
1665 _movp(T, Src0);
1666 _pand(T, Src1);
1667 _movp(Dest, T);
1668 } break;
1669 case InstArithmetic::Or: {
1670 Variable *T = makeReg(Dest->getType());
1671 _movp(T, Src0);
1672 _por(T, Src1);
1673 _movp(Dest, T);
1674 } break;
1675 case InstArithmetic::Xor: {
1676 Variable *T = makeReg(Dest->getType());
1677 _movp(T, Src0);
1678 _pxor(T, Src1);
1679 _movp(Dest, T);
1680 } break;
1681 case InstArithmetic::Sub: {
1682 Variable *T = makeReg(Dest->getType());
1683 _movp(T, Src0);
1684 _psub(T, Src1);
1685 _movp(Dest, T);
1686 } break;
1687 case InstArithmetic::Mul: {
1688 bool TypesAreValidForPmull =
1689 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1690 bool InstructionSetIsValidForPmull =
John Porto5d0acff2015-06-30 15:29:21 -07001691 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07001692 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1693 Variable *T = makeReg(Dest->getType());
1694 _movp(T, Src0);
1695 _pmull(T, Src1);
1696 _movp(Dest, T);
1697 } else if (Dest->getType() == IceType_v4i32) {
1698 // Lowering sequence:
1699 // Note: The mask arguments have index 0 on the left.
1700 //
1701 // movups T1, Src0
1702 // pshufd T2, Src0, {1,0,3,0}
1703 // pshufd T3, Src1, {1,0,3,0}
1704 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1705 // pmuludq T1, Src1
1706 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1707 // pmuludq T2, T3
1708 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1709 // shufps T1, T2, {0,2,0,2}
1710 // pshufd T4, T1, {0,2,1,3}
1711 // movups Dest, T4
1712
1713 // Mask that directs pshufd to create a vector with entries
1714 // Src[1, 0, 3, 0]
1715 const unsigned Constant1030 = 0x31;
1716 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1717 // Mask that directs shufps to create a vector with entries
1718 // Dest[0, 2], Src[0, 2]
1719 const unsigned Mask0202 = 0x88;
1720 // Mask that directs pshufd to create a vector with entries
1721 // Src[0, 2, 1, 3]
1722 const unsigned Mask0213 = 0xd8;
1723 Variable *T1 = makeReg(IceType_v4i32);
1724 Variable *T2 = makeReg(IceType_v4i32);
1725 Variable *T3 = makeReg(IceType_v4i32);
1726 Variable *T4 = makeReg(IceType_v4i32);
1727 _movp(T1, Src0);
1728 _pshufd(T2, Src0, Mask1030);
1729 _pshufd(T3, Src1, Mask1030);
1730 _pmuludq(T1, Src1);
1731 _pmuludq(T2, T3);
1732 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1733 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1734 _movp(Dest, T4);
1735 } else {
1736 assert(Dest->getType() == IceType_v16i8);
1737 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1738 }
1739 } break;
1740 case InstArithmetic::Shl:
1741 case InstArithmetic::Lshr:
1742 case InstArithmetic::Ashr:
1743 case InstArithmetic::Udiv:
1744 case InstArithmetic::Urem:
1745 case InstArithmetic::Sdiv:
1746 case InstArithmetic::Srem:
1747 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1748 break;
1749 case InstArithmetic::Fadd: {
1750 Variable *T = makeReg(Dest->getType());
1751 _movp(T, Src0);
1752 _addps(T, Src1);
1753 _movp(Dest, T);
1754 } break;
1755 case InstArithmetic::Fsub: {
1756 Variable *T = makeReg(Dest->getType());
1757 _movp(T, Src0);
1758 _subps(T, Src1);
1759 _movp(Dest, T);
1760 } break;
1761 case InstArithmetic::Fmul: {
1762 Variable *T = makeReg(Dest->getType());
1763 _movp(T, Src0);
1764 _mulps(T, Src1);
1765 _movp(Dest, T);
1766 } break;
1767 case InstArithmetic::Fdiv: {
1768 Variable *T = makeReg(Dest->getType());
1769 _movp(T, Src0);
1770 _divps(T, Src1);
1771 _movp(Dest, T);
1772 } break;
1773 case InstArithmetic::Frem:
1774 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1775 break;
1776 }
1777 return;
1778 }
1779 Variable *T_edx = nullptr;
1780 Variable *T = nullptr;
1781 switch (Inst->getOp()) {
1782 case InstArithmetic::_num:
1783 llvm_unreachable("Unknown arithmetic operator");
1784 break;
1785 case InstArithmetic::Add:
1786 _mov(T, Src0);
1787 _add(T, Src1);
1788 _mov(Dest, T);
1789 break;
1790 case InstArithmetic::And:
1791 _mov(T, Src0);
1792 _and(T, Src1);
1793 _mov(Dest, T);
1794 break;
1795 case InstArithmetic::Or:
1796 _mov(T, Src0);
1797 _or(T, Src1);
1798 _mov(Dest, T);
1799 break;
1800 case InstArithmetic::Xor:
1801 _mov(T, Src0);
1802 _xor(T, Src1);
1803 _mov(Dest, T);
1804 break;
1805 case InstArithmetic::Sub:
1806 _mov(T, Src0);
1807 _sub(T, Src1);
1808 _mov(Dest, T);
1809 break;
1810 case InstArithmetic::Mul:
1811 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1812 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1813 return;
1814 }
1815 // The 8-bit version of imul only allows the form "imul r/m8"
1816 // where T must be in eax.
1817 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001818 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001819 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1820 } else {
1821 _mov(T, Src0);
1822 }
1823 _imul(T, Src1);
1824 _mov(Dest, T);
1825 break;
1826 case InstArithmetic::Shl:
1827 _mov(T, Src0);
1828 if (!llvm::isa<Constant>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001829 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001830 _shl(T, Src1);
1831 _mov(Dest, T);
1832 break;
1833 case InstArithmetic::Lshr:
1834 _mov(T, Src0);
1835 if (!llvm::isa<Constant>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001836 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001837 _shr(T, Src1);
1838 _mov(Dest, T);
1839 break;
1840 case InstArithmetic::Ashr:
1841 _mov(T, Src0);
1842 if (!llvm::isa<Constant>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001843 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001844 _sar(T, Src1);
1845 _mov(Dest, T);
1846 break;
1847 case InstArithmetic::Udiv:
1848 // div and idiv are the few arithmetic operators that do not allow
1849 // immediates as the operand.
1850 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1851 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001852 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1853 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1854 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1855 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1856 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1857 // d[%lh], which means the X86 target lowering (and the register
1858 // allocator) would have to be aware of this restriction. For now, we
1859 // simply zero %eax completely, and move the dividend into %al.
1860 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1861 Context.insert(InstFakeDef::create(Func, T_eax));
1862 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001863 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001864 _div(T, Src1, T);
John Porto7e93c622015-06-23 10:58:57 -07001865 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001866 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001867 } else {
1868 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto5d0acff2015-06-30 15:29:21 -07001869 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1870 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001871 _div(T, Src1, T_edx);
1872 _mov(Dest, T);
1873 }
1874 break;
1875 case InstArithmetic::Sdiv:
1876 // TODO(stichnot): Enable this after doing better performance
1877 // and cross testing.
1878 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1879 // Optimize division by constant power of 2, but not for Om1
1880 // or O0, just to keep things simple there.
1881 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1882 int32_t Divisor = C->getValue();
1883 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1884 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1885 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1886 Type Ty = Dest->getType();
1887 // LLVM does the following for dest=src/(1<<log):
1888 // t=src
1889 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1890 // shr t,typewidth-log
1891 // add t,src
1892 // sar t,log
1893 // dest=t
1894 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1895 _mov(T, Src0);
1896 // If for some reason we are dividing by 1, just treat it
1897 // like an assignment.
1898 if (LogDiv > 0) {
1899 // The initial sar is unnecessary when dividing by 2.
1900 if (LogDiv > 1)
1901 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1902 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1903 _add(T, Src0);
1904 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1905 }
1906 _mov(Dest, T);
1907 return;
1908 }
1909 }
1910 }
1911 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1912 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001913 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001914 _cbwdq(T, T);
1915 _idiv(T, Src1, T);
1916 _mov(Dest, T);
1917 } else {
John Porto5d0acff2015-06-30 15:29:21 -07001918 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1919 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001920 _cbwdq(T_edx, T);
1921 _idiv(T, Src1, T_edx);
1922 _mov(Dest, T);
1923 }
1924 break;
1925 case InstArithmetic::Urem:
1926 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1927 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001928 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1929 Context.insert(InstFakeDef::create(Func, T_eax));
1930 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001931 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001932 Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);
1933 _div(T_al, Src1, T);
1934 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1935 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1936 // this ever becomes a problem we can introduce a pseudo rem instruction
1937 // that returns the remainder in %al directly (and uses a mov for copying
1938 // %ah to %al.)
1939 static constexpr uint8_t AlSizeInBits = 8;
1940 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1941 _mov(Dest, T_al);
1942 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001943 } else {
1944 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto5d0acff2015-06-30 15:29:21 -07001945 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
1946 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001947 _div(T_edx, Src1, T);
1948 _mov(Dest, T_edx);
1949 }
1950 break;
1951 case InstArithmetic::Srem:
1952 // TODO(stichnot): Enable this after doing better performance
1953 // and cross testing.
1954 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1955 // Optimize mod by constant power of 2, but not for Om1 or O0,
1956 // just to keep things simple there.
1957 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1958 int32_t Divisor = C->getValue();
1959 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1960 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1961 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1962 Type Ty = Dest->getType();
1963 // LLVM does the following for dest=src%(1<<log):
1964 // t=src
1965 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1966 // shr t,typewidth-log
1967 // add t,src
1968 // and t, -(1<<log)
1969 // sub t,src
1970 // neg t
1971 // dest=t
1972 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1973 // If for some reason we are dividing by 1, just assign 0.
1974 if (LogDiv == 0) {
1975 _mov(Dest, Ctx->getConstantZero(Ty));
1976 return;
1977 }
1978 _mov(T, Src0);
1979 // The initial sar is unnecessary when dividing by 2.
1980 if (LogDiv > 1)
1981 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1982 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1983 _add(T, Src0);
1984 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1985 _sub(T, Src0);
1986 _neg(T);
1987 _mov(Dest, T);
1988 return;
1989 }
1990 }
1991 }
1992 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1993 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001994 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001995 // T is %al.
John Porto7e93c622015-06-23 10:58:57 -07001996 _cbwdq(T, T);
John Porto448c16f2015-07-28 16:56:29 -07001997 _idiv(T, Src1, T);
1998 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1999 Context.insert(InstFakeDef::create(Func, T_eax));
2000 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
2001 // mov %ah, %al because it would make x86-64 codegen more complicated. If
2002 // this ever becomes a problem we can introduce a pseudo rem instruction
2003 // that returns the remainder in %al directly (and uses a mov for copying
2004 // %ah to %al.)
2005 static constexpr uint8_t AlSizeInBits = 8;
2006 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
2007 _mov(Dest, T);
2008 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07002009 } else {
John Porto5d0acff2015-06-30 15:29:21 -07002010 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2011 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07002012 _cbwdq(T_edx, T);
2013 _idiv(T_edx, Src1, T);
2014 _mov(Dest, T_edx);
2015 }
2016 break;
2017 case InstArithmetic::Fadd:
2018 _mov(T, Src0);
2019 _addss(T, Src1);
2020 _mov(Dest, T);
2021 break;
2022 case InstArithmetic::Fsub:
2023 _mov(T, Src0);
2024 _subss(T, Src1);
2025 _mov(Dest, T);
2026 break;
2027 case InstArithmetic::Fmul:
2028 _mov(T, Src0);
2029 _mulss(T, Src1);
2030 _mov(Dest, T);
2031 break;
2032 case InstArithmetic::Fdiv:
2033 _mov(T, Src0);
2034 _divss(T, Src1);
2035 _mov(Dest, T);
2036 break;
2037 case InstArithmetic::Frem: {
2038 const SizeT MaxSrcs = 2;
2039 Type Ty = Dest->getType();
2040 InstCall *Call = makeHelperCall(
2041 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
2042 Call->addArg(Src0);
2043 Call->addArg(Src1);
2044 return lowerCall(Call);
2045 }
2046 }
2047}
2048
2049template <class Machine>
2050void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
2051 Variable *Dest = Inst->getDest();
2052 Operand *Src0 = Inst->getSrc(0);
2053 assert(Dest->getType() == Src0->getType());
2054 if (Dest->getType() == IceType_i64) {
2055 Src0 = legalize(Src0);
2056 Operand *Src0Lo = loOperand(Src0);
2057 Operand *Src0Hi = hiOperand(Src0);
2058 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2059 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2060 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2061 _mov(T_Lo, Src0Lo);
2062 _mov(DestLo, T_Lo);
2063 _mov(T_Hi, Src0Hi);
2064 _mov(DestHi, T_Hi);
2065 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07002066 Operand *Src0Legal;
John Porto7e93c622015-06-23 10:58:57 -07002067 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07002068 // If Dest already has a physical register, then only basic legalization
2069 // is needed, as the source operand can be a register, immediate, or
2070 // memory.
2071 Src0Legal = legalize(Src0);
John Porto7e93c622015-06-23 10:58:57 -07002072 } else {
2073 // If Dest could be a stack operand, then RI must be a physical
2074 // register or a scalar integer immediate.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07002075 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
John Porto7e93c622015-06-23 10:58:57 -07002076 }
2077 if (isVectorType(Dest->getType()))
Jim Stichnotha3f57b92015-07-30 12:46:04 -07002078 _movp(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07002079 else
Jim Stichnotha3f57b92015-07-30 12:46:04 -07002080 _mov(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07002081 }
2082}
2083
2084template <class Machine>
2085void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
2086 if (Inst->isUnconditional()) {
2087 _br(Inst->getTargetUnconditional());
2088 return;
2089 }
2090 Operand *Cond = Inst->getCondition();
2091
2092 // Handle folding opportunities.
2093 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
2094 assert(Producer->isDeleted());
2095 switch (BoolFolding::getProducerKind(Producer)) {
2096 default:
2097 break;
2098 case BoolFolding::PK_Icmp32: {
2099 // TODO(stichnot): Refactor similarities between this block and
2100 // the corresponding code in lowerIcmp().
2101 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
2102 Operand *Src0 = Producer->getSrc(0);
2103 Operand *Src1 = legalize(Producer->getSrc(1));
2104 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2105 _cmp(Src0RM, Src1);
2106 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
2107 Inst->getTargetFalse());
2108 return;
2109 }
2110 }
2111 }
2112
2113 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2114 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2115 _cmp(Src0, Zero);
John Porto5d0acff2015-06-30 15:29:21 -07002116 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07002117}
2118
2119template <class Machine>
2120void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
2121 // x86-32 calling convention:
2122 //
2123 // * At the point before the call, the stack must be aligned to 16
2124 // bytes.
2125 //
2126 // * The first four arguments of vector type, regardless of their
2127 // position relative to the other arguments in the argument list, are
2128 // placed in registers xmm0 - xmm3.
2129 //
2130 // * Other arguments are pushed onto the stack in right-to-left order,
2131 // such that the left-most argument ends up on the top of the stack at
2132 // the lowest memory address.
2133 //
2134 // * Stack arguments of vector type are aligned to start at the next
2135 // highest multiple of 16 bytes. Other stack arguments are aligned to
2136 // 4 bytes.
2137 //
2138 // This intends to match the section "IA-32 Function Calling
2139 // Convention" of the document "OS X ABI Function Call Guide" by
2140 // Apple.
2141 NeedsStackAlignment = true;
2142
2143 typedef std::vector<Operand *> OperandList;
2144 OperandList XmmArgs;
2145 OperandList StackArgs, StackArgLocations;
2146 uint32_t ParameterAreaSizeBytes = 0;
2147
2148 // Classify each argument operand according to the location where the
2149 // argument is passed.
2150 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2151 Operand *Arg = Instr->getArg(i);
2152 Type Ty = Arg->getType();
2153 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2154 assert(typeWidthInBytes(Ty) >= 4);
2155 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
2156 XmmArgs.push_back(Arg);
2157 } else {
2158 StackArgs.push_back(Arg);
2159 if (isVectorType(Arg->getType())) {
2160 ParameterAreaSizeBytes =
2161 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2162 }
John Porto5d0acff2015-06-30 15:29:21 -07002163 Variable *esp =
2164 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07002165 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
John Porto921856d2015-07-07 11:56:26 -07002166 StackArgLocations.push_back(
2167 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
John Porto7e93c622015-06-23 10:58:57 -07002168 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2169 }
2170 }
2171
2172 // Adjust the parameter area so that the stack is aligned. It is
2173 // assumed that the stack is already aligned at the start of the
2174 // calling sequence.
2175 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2176
2177 // Subtract the appropriate amount for the argument area. This also
2178 // takes care of setting the stack adjustment during emission.
2179 //
2180 // TODO: If for some reason the call instruction gets dead-code
2181 // eliminated after lowering, we would need to ensure that the
2182 // pre-call and the post-call esp adjustment get eliminated as well.
2183 if (ParameterAreaSizeBytes) {
2184 _adjust_stack(ParameterAreaSizeBytes);
2185 }
2186
2187 // Copy arguments that are passed on the stack to the appropriate
2188 // stack locations.
2189 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
2190 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
2191 }
2192
2193 // Copy arguments to be passed in registers to the appropriate
2194 // registers.
2195 // TODO: Investigate the impact of lowering arguments passed in
2196 // registers after lowering stack arguments as opposed to the other
2197 // way around. Lowering register arguments after stack arguments may
2198 // reduce register pressure. On the other hand, lowering register
2199 // arguments first (before stack arguments) may result in more compact
2200 // code, as the memory operand displacements may end up being smaller
2201 // before any stack adjustment is done.
2202 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
John Porto5d0acff2015-06-30 15:29:21 -07002203 Variable *Reg =
Andrew Scull97f460d2015-07-21 10:07:42 -07002204 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
John Porto7e93c622015-06-23 10:58:57 -07002205 // Generate a FakeUse of register arguments so that they do not get
2206 // dead code eliminated as a result of the FakeKill of scratch
2207 // registers after the call.
2208 Context.insert(InstFakeUse::create(Func, Reg));
2209 }
2210 // Generate the call instruction. Assign its result to a temporary
2211 // with high register allocation weight.
2212 Variable *Dest = Instr->getDest();
2213 // ReturnReg doubles as ReturnRegLo as necessary.
2214 Variable *ReturnReg = nullptr;
2215 Variable *ReturnRegHi = nullptr;
2216 if (Dest) {
2217 switch (Dest->getType()) {
2218 case IceType_NUM:
2219 llvm_unreachable("Invalid Call dest type");
2220 break;
2221 case IceType_void:
2222 break;
2223 case IceType_i1:
2224 case IceType_i8:
2225 case IceType_i16:
2226 case IceType_i32:
John Porto5d0acff2015-06-30 15:29:21 -07002227 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07002228 break;
2229 case IceType_i64:
John Porto5d0acff2015-06-30 15:29:21 -07002230 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2231 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07002232 break;
2233 case IceType_f32:
2234 case IceType_f64:
2235 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2236 // the fstp instruction.
2237 break;
2238 case IceType_v4i1:
2239 case IceType_v8i1:
2240 case IceType_v16i1:
2241 case IceType_v16i8:
2242 case IceType_v8i16:
2243 case IceType_v4i32:
2244 case IceType_v4f32:
John Porto5d0acff2015-06-30 15:29:21 -07002245 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07002246 break;
2247 }
2248 }
2249 Operand *CallTarget = legalize(Instr->getCallTarget());
2250 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2251 if (NeedSandboxing) {
2252 if (llvm::isa<Constant>(CallTarget)) {
2253 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2254 } else {
2255 Variable *CallTargetVar = nullptr;
2256 _mov(CallTargetVar, CallTarget);
2257 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2258 const SizeT BundleSize =
John Porto5aeed952015-07-21 13:39:09 -07002259 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
John Porto7e93c622015-06-23 10:58:57 -07002260 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2261 CallTarget = CallTargetVar;
2262 }
2263 }
John Porto921856d2015-07-07 11:56:26 -07002264 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
John Porto7e93c622015-06-23 10:58:57 -07002265 Context.insert(NewCall);
2266 if (NeedSandboxing)
2267 _bundle_unlock();
2268 if (ReturnRegHi)
2269 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2270
2271 // Add the appropriate offset to esp. The call instruction takes care
2272 // of resetting the stack offset during emission.
2273 if (ParameterAreaSizeBytes) {
John Porto5d0acff2015-06-30 15:29:21 -07002274 Variable *esp =
2275 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07002276 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
2277 }
2278
2279 // Insert a register-kill pseudo instruction.
2280 Context.insert(InstFakeKill::create(Func, NewCall));
2281
2282 // Generate a FakeUse to keep the call live if necessary.
2283 if (Instr->hasSideEffects() && ReturnReg) {
2284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
2285 Context.insert(FakeUse);
2286 }
2287
2288 if (!Dest)
2289 return;
2290
2291 // Assign the result of the call to Dest.
2292 if (ReturnReg) {
2293 if (ReturnRegHi) {
2294 assert(Dest->getType() == IceType_i64);
2295 split64(Dest);
2296 Variable *DestLo = Dest->getLo();
2297 Variable *DestHi = Dest->getHi();
2298 _mov(DestLo, ReturnReg);
2299 _mov(DestHi, ReturnRegHi);
2300 } else {
2301 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
2302 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
2303 isVectorType(Dest->getType()));
2304 if (isVectorType(Dest->getType())) {
2305 _movp(Dest, ReturnReg);
2306 } else {
2307 _mov(Dest, ReturnReg);
2308 }
2309 }
2310 } else if (isScalarFloatingType(Dest->getType())) {
2311 // Special treatment for an FP function which returns its result in
2312 // st(0).
2313 // If Dest ends up being a physical xmm register, the fstp emit code
2314 // will route st(0) through a temporary stack slot.
2315 _fstp(Dest);
2316 // Create a fake use of Dest in case it actually isn't used,
2317 // because st(0) still needs to be popped.
2318 Context.insert(InstFakeUse::create(Func, Dest));
2319 }
2320}
2321
2322template <class Machine>
2323void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
2324 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2325 InstCast::OpKind CastKind = Inst->getCastKind();
2326 Variable *Dest = Inst->getDest();
2327 switch (CastKind) {
2328 default:
2329 Func->setError("Cast type not supported");
2330 return;
2331 case InstCast::Sext: {
2332 // Src0RM is the source operand legalized to physical register or memory,
2333 // but not immediate, since the relevant x86 native instructions don't
2334 // allow an immediate operand. If the operand is an immediate, we could
2335 // consider computing the strength-reduced result at translation time,
2336 // but we're unlikely to see something like that in the bitcode that
2337 // the optimizer wouldn't have already taken care of.
2338 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2339 if (isVectorType(Dest->getType())) {
2340 Type DestTy = Dest->getType();
2341 if (DestTy == IceType_v16i8) {
2342 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2343 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2344 Variable *T = makeReg(DestTy);
2345 _movp(T, Src0RM);
2346 _pand(T, OneMask);
2347 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2348 _pcmpgt(T, Zeros);
2349 _movp(Dest, T);
2350 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07002351 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07002352 SizeT ShiftAmount =
2353 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2354 1;
2355 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2356 Variable *T = makeReg(DestTy);
2357 _movp(T, Src0RM);
2358 _psll(T, ShiftConstant);
2359 _psra(T, ShiftConstant);
2360 _movp(Dest, T);
2361 }
2362 } else if (Dest->getType() == IceType_i64) {
2363 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2364 Constant *Shift = Ctx->getConstantInt32(31);
2365 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2366 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2367 Variable *T_Lo = makeReg(DestLo->getType());
2368 if (Src0RM->getType() == IceType_i32) {
2369 _mov(T_Lo, Src0RM);
2370 } else if (Src0RM->getType() == IceType_i1) {
2371 _movzx(T_Lo, Src0RM);
2372 _shl(T_Lo, Shift);
2373 _sar(T_Lo, Shift);
2374 } else {
2375 _movsx(T_Lo, Src0RM);
2376 }
2377 _mov(DestLo, T_Lo);
2378 Variable *T_Hi = nullptr;
2379 _mov(T_Hi, T_Lo);
2380 if (Src0RM->getType() != IceType_i1)
2381 // For i1, the sar instruction is already done above.
2382 _sar(T_Hi, Shift);
2383 _mov(DestHi, T_Hi);
2384 } else if (Src0RM->getType() == IceType_i1) {
2385 // t1 = src
2386 // shl t1, dst_bitwidth - 1
2387 // sar t1, dst_bitwidth - 1
2388 // dst = t1
2389 size_t DestBits =
2390 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2391 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2392 Variable *T = makeReg(Dest->getType());
2393 if (typeWidthInBytes(Dest->getType()) <=
2394 typeWidthInBytes(Src0RM->getType())) {
2395 _mov(T, Src0RM);
2396 } else {
2397 // Widen the source using movsx or movzx. (It doesn't matter
2398 // which one, since the following shl/sar overwrite the bits.)
2399 _movzx(T, Src0RM);
2400 }
2401 _shl(T, ShiftAmount);
2402 _sar(T, ShiftAmount);
2403 _mov(Dest, T);
2404 } else {
2405 // t1 = movsx src; dst = t1
2406 Variable *T = makeReg(Dest->getType());
2407 _movsx(T, Src0RM);
2408 _mov(Dest, T);
2409 }
2410 break;
2411 }
2412 case InstCast::Zext: {
2413 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2414 if (isVectorType(Dest->getType())) {
2415 // onemask = materialize(1,1,...); dest = onemask & src
2416 Type DestTy = Dest->getType();
2417 Variable *OneMask = makeVectorOfOnes(DestTy);
2418 Variable *T = makeReg(DestTy);
2419 _movp(T, Src0RM);
2420 _pand(T, OneMask);
2421 _movp(Dest, T);
2422 } else if (Dest->getType() == IceType_i64) {
2423 // t1=movzx src; dst.lo=t1; dst.hi=0
2424 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2425 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2426 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2427 Variable *Tmp = makeReg(DestLo->getType());
2428 if (Src0RM->getType() == IceType_i32) {
2429 _mov(Tmp, Src0RM);
2430 } else {
2431 _movzx(Tmp, Src0RM);
2432 }
2433 if (Src0RM->getType() == IceType_i1) {
2434 Constant *One = Ctx->getConstantInt32(1);
2435 _and(Tmp, One);
2436 }
2437 _mov(DestLo, Tmp);
2438 _mov(DestHi, Zero);
2439 } else if (Src0RM->getType() == IceType_i1) {
2440 // t = Src0RM; t &= 1; Dest = t
2441 Constant *One = Ctx->getConstantInt32(1);
2442 Type DestTy = Dest->getType();
2443 Variable *T;
2444 if (DestTy == IceType_i8) {
2445 T = makeReg(DestTy);
2446 _mov(T, Src0RM);
2447 } else {
2448 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2449 T = makeReg(IceType_i32);
2450 _movzx(T, Src0RM);
2451 }
2452 _and(T, One);
2453 _mov(Dest, T);
2454 } else {
2455 // t1 = movzx src; dst = t1
2456 Variable *T = makeReg(Dest->getType());
2457 _movzx(T, Src0RM);
2458 _mov(Dest, T);
2459 }
2460 break;
2461 }
2462 case InstCast::Trunc: {
2463 if (isVectorType(Dest->getType())) {
2464 // onemask = materialize(1,1,...); dst = src & onemask
2465 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2466 Type Src0Ty = Src0RM->getType();
2467 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2468 Variable *T = makeReg(Dest->getType());
2469 _movp(T, Src0RM);
2470 _pand(T, OneMask);
2471 _movp(Dest, T);
2472 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07002473 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
John Porto7e93c622015-06-23 10:58:57 -07002474 if (Src0->getType() == IceType_i64)
2475 Src0 = loOperand(Src0);
2476 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2477 // t1 = trunc Src0RM; Dest = t1
2478 Variable *T = nullptr;
2479 _mov(T, Src0RM);
2480 if (Dest->getType() == IceType_i1)
2481 _and(T, Ctx->getConstantInt1(1));
2482 _mov(Dest, T);
2483 }
2484 break;
2485 }
2486 case InstCast::Fptrunc:
2487 case InstCast::Fpext: {
2488 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2489 // t1 = cvt Src0RM; Dest = t1
2490 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002491 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002492 _mov(Dest, T);
2493 break;
2494 }
2495 case InstCast::Fptosi:
2496 if (isVectorType(Dest->getType())) {
2497 assert(Dest->getType() == IceType_v4i32 &&
2498 Inst->getSrc(0)->getType() == IceType_v4f32);
2499 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002500 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002501 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002502 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002503 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002504 _movp(Dest, T);
2505 } else if (Dest->getType() == IceType_i64) {
2506 // Use a helper for converting floating-point values to 64-bit
2507 // integers. SSE2 appears to have no way to convert from xmm
2508 // registers to something like the edx:eax register pair, and
2509 // gcc and clang both want to use x87 instructions complete with
2510 // temporary manipulation of the status word. This helper is
2511 // not needed for x86-64.
2512 split64(Dest);
2513 const SizeT MaxSrcs = 1;
2514 Type SrcType = Inst->getSrc(0)->getType();
2515 InstCall *Call =
2516 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2517 : H_fptosi_f64_i64,
2518 Dest, MaxSrcs);
2519 Call->addArg(Inst->getSrc(0));
2520 lowerCall(Call);
2521 } else {
2522 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2523 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2524 Variable *T_1 = makeReg(IceType_i32);
2525 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002526 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002527 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2528 if (Dest->getType() == IceType_i1)
2529 _and(T_2, Ctx->getConstantInt1(1));
2530 _mov(Dest, T_2);
2531 }
2532 break;
2533 case InstCast::Fptoui:
2534 if (isVectorType(Dest->getType())) {
2535 assert(Dest->getType() == IceType_v4i32 &&
2536 Inst->getSrc(0)->getType() == IceType_v4f32);
2537 const SizeT MaxSrcs = 1;
2538 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2539 Call->addArg(Inst->getSrc(0));
2540 lowerCall(Call);
2541 } else if (Dest->getType() == IceType_i64 ||
2542 Dest->getType() == IceType_i32) {
2543 // Use a helper for both x86-32 and x86-64.
2544 split64(Dest);
2545 const SizeT MaxSrcs = 1;
2546 Type DestType = Dest->getType();
2547 Type SrcType = Inst->getSrc(0)->getType();
2548 IceString TargetString;
2549 if (isInt32Asserting32Or64(DestType)) {
2550 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2551 : H_fptoui_f64_i32;
2552 } else {
2553 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2554 : H_fptoui_f64_i64;
2555 }
2556 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2557 Call->addArg(Inst->getSrc(0));
2558 lowerCall(Call);
2559 return;
2560 } else {
2561 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2562 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2563 Variable *T_1 = makeReg(IceType_i32);
2564 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002565 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002566 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2567 if (Dest->getType() == IceType_i1)
2568 _and(T_2, Ctx->getConstantInt1(1));
2569 _mov(Dest, T_2);
2570 }
2571 break;
2572 case InstCast::Sitofp:
2573 if (isVectorType(Dest->getType())) {
2574 assert(Dest->getType() == IceType_v4f32 &&
2575 Inst->getSrc(0)->getType() == IceType_v4i32);
2576 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002577 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002578 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002579 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002580 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002581 _movp(Dest, T);
2582 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
2583 // Use a helper for x86-32.
2584 const SizeT MaxSrcs = 1;
2585 Type DestType = Dest->getType();
2586 InstCall *Call =
2587 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2588 : H_sitofp_i64_f64,
2589 Dest, MaxSrcs);
2590 // TODO: Call the correct compiler-rt helper function.
2591 Call->addArg(Inst->getSrc(0));
2592 lowerCall(Call);
2593 return;
2594 } else {
2595 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2596 // Sign-extend the operand.
2597 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2598 Variable *T_1 = makeReg(IceType_i32);
2599 Variable *T_2 = makeReg(Dest->getType());
2600 if (Src0RM->getType() == IceType_i32)
2601 _mov(T_1, Src0RM);
2602 else
2603 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002604 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002605 _mov(Dest, T_2);
2606 }
2607 break;
2608 case InstCast::Uitofp: {
2609 Operand *Src0 = Inst->getSrc(0);
2610 if (isVectorType(Src0->getType())) {
2611 assert(Dest->getType() == IceType_v4f32 &&
2612 Src0->getType() == IceType_v4i32);
2613 const SizeT MaxSrcs = 1;
2614 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2615 Call->addArg(Src0);
2616 lowerCall(Call);
2617 } else if (Src0->getType() == IceType_i64 ||
2618 Src0->getType() == IceType_i32) {
2619 // Use a helper for x86-32 and x86-64. Also use a helper for
2620 // i32 on x86-32.
2621 const SizeT MaxSrcs = 1;
2622 Type DestType = Dest->getType();
2623 IceString TargetString;
2624 if (isInt32Asserting32Or64(Src0->getType())) {
2625 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2626 : H_uitofp_i32_f64;
2627 } else {
2628 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2629 : H_uitofp_i64_f64;
2630 }
2631 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2632 Call->addArg(Src0);
2633 lowerCall(Call);
2634 return;
2635 } else {
2636 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2637 // Zero-extend the operand.
2638 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2639 Variable *T_1 = makeReg(IceType_i32);
2640 Variable *T_2 = makeReg(Dest->getType());
2641 if (Src0RM->getType() == IceType_i32)
2642 _mov(T_1, Src0RM);
2643 else
2644 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002645 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002646 _mov(Dest, T_2);
2647 }
2648 break;
2649 }
2650 case InstCast::Bitcast: {
2651 Operand *Src0 = Inst->getSrc(0);
2652 if (Dest->getType() == Src0->getType()) {
2653 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2654 lowerAssign(Assign);
2655 return;
2656 }
2657 switch (Dest->getType()) {
2658 default:
2659 llvm_unreachable("Unexpected Bitcast dest type");
2660 case IceType_i8: {
2661 assert(Src0->getType() == IceType_v8i1);
2662 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2663 Call->addArg(Src0);
2664 lowerCall(Call);
2665 } break;
2666 case IceType_i16: {
2667 assert(Src0->getType() == IceType_v16i1);
2668 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2669 Call->addArg(Src0);
2670 lowerCall(Call);
2671 } break;
2672 case IceType_i32:
2673 case IceType_f32: {
2674 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2675 Type DestType = Dest->getType();
2676 Type SrcType = Src0RM->getType();
2677 (void)DestType;
2678 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2679 (DestType == IceType_f32 && SrcType == IceType_i32));
2680 // a.i32 = bitcast b.f32 ==>
2681 // t.f32 = b.f32
2682 // s.f32 = spill t.f32
2683 // a.i32 = s.f32
2684 Variable *T = nullptr;
2685 // TODO: Should be able to force a spill setup by calling legalize() with
2686 // Legal_Mem and not Legal_Reg or Legal_Imm.
John Porto921856d2015-07-07 11:56:26 -07002687 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002688 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
John Porto7e93c622015-06-23 10:58:57 -07002689 SpillVar->setLinkedTo(Dest);
2690 Variable *Spill = SpillVar;
2691 Spill->setWeight(RegWeight::Zero);
2692 _mov(T, Src0RM);
2693 _mov(Spill, T);
2694 _mov(Dest, Spill);
2695 } break;
2696 case IceType_i64: {
2697 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2698 assert(Src0RM->getType() == IceType_f64);
2699 // a.i64 = bitcast b.f64 ==>
2700 // s.f64 = spill b.f64
2701 // t_lo.i32 = lo(s.f64)
2702 // a_lo.i32 = t_lo.i32
2703 // t_hi.i32 = hi(s.f64)
2704 // a_hi.i32 = t_hi.i32
2705 Operand *SpillLo, *SpillHi;
2706 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
John Porto921856d2015-07-07 11:56:26 -07002707 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002708 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002709 SpillVar->setLinkedTo(Src0Var);
2710 Variable *Spill = SpillVar;
2711 Spill->setWeight(RegWeight::Zero);
2712 _movq(Spill, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002713 SpillLo = Traits::VariableSplit::create(Func, Spill,
2714 Traits::VariableSplit::Low);
2715 SpillHi = Traits::VariableSplit::create(Func, Spill,
2716 Traits::VariableSplit::High);
John Porto7e93c622015-06-23 10:58:57 -07002717 } else {
2718 SpillLo = loOperand(Src0RM);
2719 SpillHi = hiOperand(Src0RM);
2720 }
2721
2722 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2723 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2724 Variable *T_Lo = makeReg(IceType_i32);
2725 Variable *T_Hi = makeReg(IceType_i32);
2726
2727 _mov(T_Lo, SpillLo);
2728 _mov(DestLo, T_Lo);
2729 _mov(T_Hi, SpillHi);
2730 _mov(DestHi, T_Hi);
2731 } break;
2732 case IceType_f64: {
2733 Src0 = legalize(Src0);
2734 assert(Src0->getType() == IceType_i64);
John Porto921856d2015-07-07 11:56:26 -07002735 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
John Porto5aeed952015-07-21 13:39:09 -07002736 Variable *T = Func->makeVariable(Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07002737 _movq(T, Src0);
2738 _movq(Dest, T);
2739 break;
2740 }
2741 // a.f64 = bitcast b.i64 ==>
2742 // t_lo.i32 = b_lo.i32
2743 // FakeDef(s.f64)
2744 // lo(s.f64) = t_lo.i32
2745 // t_hi.i32 = b_hi.i32
2746 // hi(s.f64) = t_hi.i32
2747 // a.f64 = s.f64
John Porto921856d2015-07-07 11:56:26 -07002748 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002749 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002750 SpillVar->setLinkedTo(Dest);
2751 Variable *Spill = SpillVar;
2752 Spill->setWeight(RegWeight::Zero);
2753
2754 Variable *T_Lo = nullptr, *T_Hi = nullptr;
John Porto921856d2015-07-07 11:56:26 -07002755 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2756 Func, Spill, Traits::VariableSplit::Low);
2757 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2758 Func, Spill, Traits::VariableSplit::High);
John Porto7e93c622015-06-23 10:58:57 -07002759 _mov(T_Lo, loOperand(Src0));
2760 // Technically, the Spill is defined after the _store happens, but
2761 // SpillLo is considered a "use" of Spill so define Spill before it
2762 // is used.
2763 Context.insert(InstFakeDef::create(Func, Spill));
2764 _store(T_Lo, SpillLo);
2765 _mov(T_Hi, hiOperand(Src0));
2766 _store(T_Hi, SpillHi);
2767 _movq(Dest, Spill);
2768 } break;
2769 case IceType_v8i1: {
2770 assert(Src0->getType() == IceType_i8);
2771 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002772 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002773 // Arguments to functions are required to be at least 32 bits wide.
2774 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2775 Call->addArg(Src0AsI32);
2776 lowerCall(Call);
2777 } break;
2778 case IceType_v16i1: {
2779 assert(Src0->getType() == IceType_i16);
2780 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002781 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002782 // Arguments to functions are required to be at least 32 bits wide.
2783 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2784 Call->addArg(Src0AsI32);
2785 lowerCall(Call);
2786 } break;
2787 case IceType_v8i16:
2788 case IceType_v16i8:
2789 case IceType_v4i32:
2790 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07002791 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07002792 } break;
2793 }
2794 break;
2795 }
2796 }
2797}
2798
2799template <class Machine>
2800void TargetX86Base<Machine>::lowerExtractElement(
2801 const InstExtractElement *Inst) {
2802 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2803 ConstantInteger32 *ElementIndex =
2804 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2805 // Only constant indices are allowed in PNaCl IR.
2806 assert(ElementIndex);
2807
2808 unsigned Index = ElementIndex->getValue();
2809 Type Ty = SourceVectNotLegalized->getType();
2810 Type ElementTy = typeElementType(Ty);
2811 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2812 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2813
2814 // TODO(wala): Determine the best lowering sequences for each type.
2815 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07002816 InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002817 if (CanUsePextr && Ty != IceType_v4f32) {
2818 // Use pextrb, pextrw, or pextrd.
2819 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07002820 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002821 _pextr(ExtractedElementR, SourceVectR, Mask);
2822 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2823 // Use pshufd and movd/movss.
2824 Variable *T = nullptr;
2825 if (Index) {
2826 // The shuffle only needs to occur if the element to be extracted
2827 // is not at the lowest index.
2828 Constant *Mask = Ctx->getConstantInt32(Index);
2829 T = makeReg(Ty);
2830 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2831 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002832 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002833 }
2834
2835 if (InVectorElementTy == IceType_i32) {
2836 _movd(ExtractedElementR, T);
2837 } else { // Ty == IceType_f32
2838 // TODO(wala): _movss is only used here because _mov does not
2839 // allow a vector source and a scalar destination. _mov should be
2840 // able to be used here.
2841 // _movss is a binary instruction, so the FakeDef is needed to
2842 // keep the live range analysis consistent.
2843 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2844 _movss(ExtractedElementR, T);
2845 }
2846 } else {
2847 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2848 // Spill the value to a stack slot and do the extraction in memory.
2849 //
2850 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2851 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002852 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002853 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07002854 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002855
2856 // Compute the location of the element in memory.
2857 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002858 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002859 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2860 _mov(ExtractedElementR, Loc);
2861 }
2862
2863 if (ElementTy == IceType_i1) {
2864 // Truncate extracted integers to i1s if necessary.
2865 Variable *T = makeReg(IceType_i1);
2866 InstCast *Cast =
2867 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2868 lowerCast(Cast);
2869 ExtractedElementR = T;
2870 }
2871
2872 // Copy the element to the destination.
2873 Variable *Dest = Inst->getDest();
2874 _mov(Dest, ExtractedElementR);
2875}
2876
2877template <class Machine>
2878void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
2879 Operand *Src0 = Inst->getSrc(0);
2880 Operand *Src1 = Inst->getSrc(1);
2881 Variable *Dest = Inst->getDest();
2882
2883 if (isVectorType(Dest->getType())) {
2884 InstFcmp::FCond Condition = Inst->getCondition();
2885 size_t Index = static_cast<size_t>(Condition);
2886 assert(Index < Traits::TableFcmpSize);
2887
2888 if (Traits::TableFcmp[Index].SwapVectorOperands) {
2889 Operand *T = Src0;
2890 Src0 = Src1;
2891 Src1 = T;
2892 }
2893
2894 Variable *T = nullptr;
2895
2896 if (Condition == InstFcmp::True) {
2897 // makeVectorOfOnes() requires an integer vector type.
2898 T = makeVectorOfMinusOnes(IceType_v4i32);
2899 } else if (Condition == InstFcmp::False) {
2900 T = makeVectorOfZeros(Dest->getType());
2901 } else {
2902 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2903 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002904 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002905 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002906
2907 switch (Condition) {
2908 default: {
John Porto5d0acff2015-06-30 15:29:21 -07002909 typename Traits::Cond::CmppsCond Predicate =
2910 Traits::TableFcmp[Index].Predicate;
2911 assert(Predicate != Traits::Cond::Cmpps_Invalid);
John Porto7e93c622015-06-23 10:58:57 -07002912 T = makeReg(Src0RM->getType());
2913 _movp(T, Src0RM);
2914 _cmpps(T, Src1RM, Predicate);
2915 } break;
2916 case InstFcmp::One: {
2917 // Check both unequal and ordered.
2918 T = makeReg(Src0RM->getType());
2919 Variable *T2 = makeReg(Src0RM->getType());
2920 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002921 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
John Porto7e93c622015-06-23 10:58:57 -07002922 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002923 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
John Porto7e93c622015-06-23 10:58:57 -07002924 _pand(T, T2);
2925 } break;
2926 case InstFcmp::Ueq: {
2927 // Check both equal or unordered.
2928 T = makeReg(Src0RM->getType());
2929 Variable *T2 = makeReg(Src0RM->getType());
2930 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002931 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
John Porto7e93c622015-06-23 10:58:57 -07002932 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002933 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
John Porto7e93c622015-06-23 10:58:57 -07002934 _por(T, T2);
2935 } break;
2936 }
2937 }
2938
2939 _movp(Dest, T);
2940 eliminateNextVectorSextInstruction(Dest);
2941 return;
2942 }
2943
2944 // Lowering a = fcmp cond, b, c
2945 // ucomiss b, c /* only if C1 != Br_None */
2946 // /* but swap b,c order if SwapOperands==true */
2947 // mov a, <default>
2948 // j<C1> label /* only if C1 != Br_None */
2949 // j<C2> label /* only if C2 != Br_None */
2950 // FakeUse(a) /* only if C1 != Br_None */
2951 // mov a, !<default> /* only if C1 != Br_None */
2952 // label: /* only if C1 != Br_None */
2953 //
2954 // setcc lowering when C1 != Br_None && C2 == Br_None:
2955 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2956 // setcc a, C1
2957 InstFcmp::FCond Condition = Inst->getCondition();
2958 size_t Index = static_cast<size_t>(Condition);
2959 assert(Index < Traits::TableFcmpSize);
2960 if (Traits::TableFcmp[Index].SwapScalarOperands)
2961 std::swap(Src0, Src1);
John Porto5d0acff2015-06-30 15:29:21 -07002962 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2963 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07002964 if (HasC1) {
2965 Src0 = legalize(Src0);
2966 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2967 Variable *T = nullptr;
2968 _mov(T, Src0);
2969 _ucomiss(T, Src1RM);
2970 if (!HasC2) {
2971 assert(Traits::TableFcmp[Index].Default);
2972 _setcc(Dest, Traits::TableFcmp[Index].C1);
2973 return;
2974 }
2975 }
2976 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
2977 _mov(Dest, Default);
2978 if (HasC1) {
John Porto921856d2015-07-07 11:56:26 -07002979 typename Traits::Insts::Label *Label =
2980 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07002981 _br(Traits::TableFcmp[Index].C1, Label);
2982 if (HasC2) {
2983 _br(Traits::TableFcmp[Index].C2, Label);
2984 }
2985 Constant *NonDefault =
2986 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
2987 _mov_nonkillable(Dest, NonDefault);
2988 Context.insert(Label);
2989 }
2990}
2991
2992template <class Machine>
2993void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
2994 Operand *Src0 = legalize(Inst->getSrc(0));
2995 Operand *Src1 = legalize(Inst->getSrc(1));
2996 Variable *Dest = Inst->getDest();
2997
2998 if (isVectorType(Dest->getType())) {
2999 Type Ty = Src0->getType();
3000 // Promote i1 vectors to 128 bit integer vector types.
3001 if (typeElementType(Ty) == IceType_i1) {
3002 Type NewTy = IceType_NUM;
3003 switch (Ty) {
3004 default:
3005 llvm_unreachable("unexpected type");
3006 break;
3007 case IceType_v4i1:
3008 NewTy = IceType_v4i32;
3009 break;
3010 case IceType_v8i1:
3011 NewTy = IceType_v8i16;
3012 break;
3013 case IceType_v16i1:
3014 NewTy = IceType_v16i8;
3015 break;
3016 }
John Porto5aeed952015-07-21 13:39:09 -07003017 Variable *NewSrc0 = Func->makeVariable(NewTy);
3018 Variable *NewSrc1 = Func->makeVariable(NewTy);
John Porto7e93c622015-06-23 10:58:57 -07003019 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
3020 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
3021 Src0 = NewSrc0;
3022 Src1 = NewSrc1;
3023 Ty = NewTy;
3024 }
3025
3026 InstIcmp::ICond Condition = Inst->getCondition();
3027
3028 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3029 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3030
3031 // SSE2 only has signed comparison operations. Transform unsigned
3032 // inputs in a manner that allows for the use of signed comparison
3033 // operations by flipping the high order bits.
3034 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
3035 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
3036 Variable *T0 = makeReg(Ty);
3037 Variable *T1 = makeReg(Ty);
3038 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
3039 _movp(T0, Src0RM);
3040 _pxor(T0, HighOrderBits);
3041 _movp(T1, Src1RM);
3042 _pxor(T1, HighOrderBits);
3043 Src0RM = T0;
3044 Src1RM = T1;
3045 }
3046
3047 Variable *T = makeReg(Ty);
3048 switch (Condition) {
3049 default:
3050 llvm_unreachable("unexpected condition");
3051 break;
3052 case InstIcmp::Eq: {
John Porto921856d2015-07-07 11:56:26 -07003053 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003054 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07003055 _movp(T, Src0RM);
3056 _pcmpeq(T, Src1RM);
3057 } break;
3058 case InstIcmp::Ne: {
John Porto921856d2015-07-07 11:56:26 -07003059 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003060 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07003061 _movp(T, Src0RM);
3062 _pcmpeq(T, Src1RM);
3063 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3064 _pxor(T, MinusOne);
3065 } break;
3066 case InstIcmp::Ugt:
3067 case InstIcmp::Sgt: {
John Porto921856d2015-07-07 11:56:26 -07003068 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003069 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07003070 _movp(T, Src0RM);
3071 _pcmpgt(T, Src1RM);
3072 } break;
3073 case InstIcmp::Uge:
3074 case InstIcmp::Sge: {
3075 // !(Src1RM > Src0RM)
John Porto921856d2015-07-07 11:56:26 -07003076 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003077 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07003078 _movp(T, Src1RM);
3079 _pcmpgt(T, Src0RM);
3080 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3081 _pxor(T, MinusOne);
3082 } break;
3083 case InstIcmp::Ult:
3084 case InstIcmp::Slt: {
John Porto921856d2015-07-07 11:56:26 -07003085 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003086 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07003087 _movp(T, Src1RM);
3088 _pcmpgt(T, Src0RM);
3089 } break;
3090 case InstIcmp::Ule:
3091 case InstIcmp::Sle: {
3092 // !(Src0RM > Src1RM)
John Porto921856d2015-07-07 11:56:26 -07003093 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003094 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07003095 _movp(T, Src0RM);
3096 _pcmpgt(T, Src1RM);
3097 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3098 _pxor(T, MinusOne);
3099 } break;
3100 }
3101
3102 _movp(Dest, T);
3103 eliminateNextVectorSextInstruction(Dest);
3104 return;
3105 }
3106
3107 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
3108 if (Src0->getType() == IceType_i64) {
3109 InstIcmp::ICond Condition = Inst->getCondition();
3110 size_t Index = static_cast<size_t>(Condition);
3111 assert(Index < Traits::TableIcmp64Size);
3112 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3113 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3114 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3115 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3116 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3117 Constant *One = Ctx->getConstantInt32(1);
John Porto921856d2015-07-07 11:56:26 -07003118 typename Traits::Insts::Label *LabelFalse =
3119 Traits::Insts::Label::create(Func, this);
3120 typename Traits::Insts::Label *LabelTrue =
3121 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003122 _mov(Dest, One);
3123 _cmp(Src0HiRM, Src1HiRI);
John Porto5d0acff2015-06-30 15:29:21 -07003124 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
John Porto7e93c622015-06-23 10:58:57 -07003125 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
John Porto5d0acff2015-06-30 15:29:21 -07003126 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
John Porto7e93c622015-06-23 10:58:57 -07003127 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3128 _cmp(Src0LoRM, Src1LoRI);
3129 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3130 Context.insert(LabelFalse);
3131 _mov_nonkillable(Dest, Zero);
3132 Context.insert(LabelTrue);
3133 return;
3134 }
3135
3136 // cmp b, c
3137 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3138 _cmp(Src0RM, Src1);
3139 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
3140}
3141
3142template <class Machine>
3143void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
3144 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3145 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
3146 ConstantInteger32 *ElementIndex =
3147 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
3148 // Only constant indices are allowed in PNaCl IR.
3149 assert(ElementIndex);
3150 unsigned Index = ElementIndex->getValue();
3151 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3152
3153 Type Ty = SourceVectNotLegalized->getType();
3154 Type ElementTy = typeElementType(Ty);
3155 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3156
3157 if (ElementTy == IceType_i1) {
3158 // Expand the element to the appropriate size for it to be inserted
3159 // in the vector.
John Porto5aeed952015-07-21 13:39:09 -07003160 Variable *Expanded = Func->makeVariable(InVectorElementTy);
John Porto7e93c622015-06-23 10:58:57 -07003161 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3162 ElementToInsertNotLegalized);
3163 lowerCast(Cast);
3164 ElementToInsertNotLegalized = Expanded;
3165 }
3166
3167 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07003168 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07003169 // Use insertps, pinsrb, pinsrw, or pinsrd.
3170 Operand *ElementRM =
3171 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3172 Operand *SourceVectRM =
3173 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3174 Variable *T = makeReg(Ty);
3175 _movp(T, SourceVectRM);
3176 if (Ty == IceType_v4f32)
3177 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3178 else
3179 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
3180 _movp(Inst->getDest(), T);
3181 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3182 // Use shufps or movss.
3183 Variable *ElementR = nullptr;
3184 Operand *SourceVectRM =
3185 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3186
3187 if (InVectorElementTy == IceType_f32) {
3188 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07003189 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003190 } else {
3191 // Copy an integer to an XMM register.
3192 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3193 ElementR = makeReg(Ty);
3194 _movd(ElementR, T);
3195 }
3196
3197 if (Index == 0) {
3198 Variable *T = makeReg(Ty);
3199 _movp(T, SourceVectRM);
3200 _movss(T, ElementR);
3201 _movp(Inst->getDest(), T);
3202 return;
3203 }
3204
3205 // shufps treats the source and desination operands as vectors of
3206 // four doublewords. The destination's two high doublewords are
3207 // selected from the source operand and the two low doublewords are
3208 // selected from the (original value of) the destination operand.
3209 // An insertelement operation can be effected with a sequence of two
3210 // shufps operations with appropriate masks. In all cases below,
3211 // Element[0] is being inserted into SourceVectOperand. Indices are
3212 // ordered from left to right.
3213 //
3214 // insertelement into index 1 (result is stored in ElementR):
3215 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3216 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3217 //
3218 // insertelement into index 2 (result is stored in T):
3219 // T := SourceVectRM
3220 // ElementR := ElementR[0, 0] T[0, 3]
3221 // T := T[0, 1] ElementR[0, 3]
3222 //
3223 // insertelement into index 3 (result is stored in T):
3224 // T := SourceVectRM
3225 // ElementR := ElementR[0, 0] T[0, 2]
3226 // T := T[0, 1] ElementR[3, 0]
3227 const unsigned char Mask1[3] = {0, 192, 128};
3228 const unsigned char Mask2[3] = {227, 196, 52};
3229
3230 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3231 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3232
3233 if (Index == 1) {
3234 _shufps(ElementR, SourceVectRM, Mask1Constant);
3235 _shufps(ElementR, SourceVectRM, Mask2Constant);
3236 _movp(Inst->getDest(), ElementR);
3237 } else {
3238 Variable *T = makeReg(Ty);
3239 _movp(T, SourceVectRM);
3240 _shufps(ElementR, T, Mask1Constant);
3241 _shufps(T, ElementR, Mask2Constant);
3242 _movp(Inst->getDest(), T);
3243 }
3244 } else {
3245 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3246 // Spill the value to a stack slot and perform the insertion in
3247 // memory.
3248 //
3249 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
3250 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003251 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07003252 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07003253 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003254
3255 // Compute the location of the position to insert in memory.
3256 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07003257 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003258 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07003259 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07003260
3261 Variable *T = makeReg(Ty);
3262 _movp(T, Slot);
3263 _movp(Inst->getDest(), T);
3264 }
3265}
3266
3267template <class Machine>
3268void TargetX86Base<Machine>::lowerIntrinsicCall(
3269 const InstIntrinsicCall *Instr) {
3270 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3271 case Intrinsics::AtomicCmpxchg: {
3272 if (!Intrinsics::isMemoryOrderValid(
3273 ID, getConstantMemoryOrder(Instr->getArg(3)),
3274 getConstantMemoryOrder(Instr->getArg(4)))) {
3275 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3276 return;
3277 }
3278 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07003279 Operand *PtrToMem = legalize(Instr->getArg(0));
3280 Operand *Expected = legalize(Instr->getArg(1));
3281 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003282 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3283 return;
3284 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
3285 return;
3286 }
3287 case Intrinsics::AtomicFence:
3288 if (!Intrinsics::isMemoryOrderValid(
3289 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
3290 Func->setError("Unexpected memory ordering for AtomicFence");
3291 return;
3292 }
3293 _mfence();
3294 return;
3295 case Intrinsics::AtomicFenceAll:
3296 // NOTE: FenceAll should prevent and load/store from being moved
3297 // across the fence (both atomic and non-atomic). The InstX8632Mfence
3298 // instruction is currently marked coarsely as "HasSideEffects".
3299 _mfence();
3300 return;
3301 case Intrinsics::AtomicIsLockFree: {
3302 // X86 is always lock free for 8/16/32/64 bit accesses.
3303 // TODO(jvoung): Since the result is constant when given a constant
3304 // byte size, this opens up DCE opportunities.
3305 Operand *ByteSize = Instr->getArg(0);
3306 Variable *Dest = Instr->getDest();
3307 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
3308 Constant *Result;
3309 switch (CI->getValue()) {
3310 default:
3311 // Some x86-64 processors support the cmpxchg16b intruction, which
3312 // can make 16-byte operations lock free (when used with the LOCK
3313 // prefix). However, that's not supported in 32-bit mode, so just
3314 // return 0 even for large sizes.
3315 Result = Ctx->getConstantZero(IceType_i32);
3316 break;
3317 case 1:
3318 case 2:
3319 case 4:
3320 case 8:
3321 Result = Ctx->getConstantInt32(1);
3322 break;
3323 }
3324 _mov(Dest, Result);
3325 return;
3326 }
3327 // The PNaCl ABI requires the byte size to be a compile-time constant.
3328 Func->setError("AtomicIsLockFree byte size should be compile-time const");
3329 return;
3330 }
3331 case Intrinsics::AtomicLoad: {
3332 // We require the memory address to be naturally aligned.
3333 // Given that is the case, then normal loads are atomic.
3334 if (!Intrinsics::isMemoryOrderValid(
3335 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
3336 Func->setError("Unexpected memory ordering for AtomicLoad");
3337 return;
3338 }
3339 Variable *Dest = Instr->getDest();
3340 if (Dest->getType() == IceType_i64) {
3341 // Follow what GCC does and use a movq instead of what lowerLoad()
3342 // normally does (split the load into two).
3343 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
3344 // can't happen anyway, since this is x86-32 and integer arithmetic only
3345 // happens on 32-bit quantities.
3346 Variable *T = makeReg(IceType_f64);
John Porto921856d2015-07-07 11:56:26 -07003347 typename Traits::X86OperandMem *Addr =
3348 formMemoryOperand(Instr->getArg(0), IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07003349 _movq(T, Addr);
3350 // Then cast the bits back out of the XMM register to the i64 Dest.
3351 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3352 lowerCast(Cast);
3353 // Make sure that the atomic load isn't elided when unused.
3354 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
3355 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
3356 return;
3357 }
3358 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
3359 lowerLoad(Load);
3360 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
3361 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
3362 // insert the FakeUse on the last-inserted instruction's dest.
3363 Context.insert(
3364 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3365 return;
3366 }
3367 case Intrinsics::AtomicRMW:
3368 if (!Intrinsics::isMemoryOrderValid(
3369 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3370 Func->setError("Unexpected memory ordering for AtomicRMW");
3371 return;
3372 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07003373 lowerAtomicRMW(
3374 Instr->getDest(),
3375 static_cast<uint32_t>(
3376 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3377 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003378 return;
3379 case Intrinsics::AtomicStore: {
3380 if (!Intrinsics::isMemoryOrderValid(
3381 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3382 Func->setError("Unexpected memory ordering for AtomicStore");
3383 return;
3384 }
3385 // We require the memory address to be naturally aligned.
3386 // Given that is the case, then normal stores are atomic.
3387 // Add a fence after the store to make it visible.
3388 Operand *Value = Instr->getArg(0);
3389 Operand *Ptr = Instr->getArg(1);
3390 if (Value->getType() == IceType_i64) {
3391 // Use a movq instead of what lowerStore() normally does
3392 // (split the store into two), following what GCC does.
3393 // Cast the bits from int -> to an xmm register first.
3394 Variable *T = makeReg(IceType_f64);
3395 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3396 lowerCast(Cast);
3397 // Then store XMM w/ a movq.
John Porto921856d2015-07-07 11:56:26 -07003398 typename Traits::X86OperandMem *Addr =
3399 formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07003400 _storeq(T, Addr);
3401 _mfence();
3402 return;
3403 }
3404 InstStore *Store = InstStore::create(Func, Value, Ptr);
3405 lowerStore(Store);
3406 _mfence();
3407 return;
3408 }
3409 case Intrinsics::Bswap: {
3410 Variable *Dest = Instr->getDest();
3411 Operand *Val = Instr->getArg(0);
3412 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3413 // argument must be a register. Use rotate left for 16-bit bswap.
3414 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07003415 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07003416 Variable *T_Lo = legalizeToReg(loOperand(Val));
3417 Variable *T_Hi = legalizeToReg(hiOperand(Val));
John Porto7e93c622015-06-23 10:58:57 -07003418 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3419 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3420 _bswap(T_Lo);
3421 _bswap(T_Hi);
3422 _mov(DestLo, T_Hi);
3423 _mov(DestHi, T_Lo);
3424 } else if (Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07003425 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07003426 _bswap(T);
3427 _mov(Dest, T);
3428 } else {
3429 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07003430 Constant *Eight = Ctx->getConstantInt16(8);
3431 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07003432 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07003433 _mov(T, Val);
3434 _rol(T, Eight);
3435 _mov(Dest, T);
3436 }
3437 return;
3438 }
3439 case Intrinsics::Ctpop: {
3440 Variable *Dest = Instr->getDest();
3441 Operand *Val = Instr->getArg(0);
3442 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
3443 ? H_call_ctpop_i32
3444 : H_call_ctpop_i64,
3445 Dest, 1);
3446 Call->addArg(Val);
3447 lowerCall(Call);
3448 // The popcount helpers always return 32-bit values, while the intrinsic's
3449 // signature matches the native POPCNT instruction and fills a 64-bit reg
3450 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3451 // the user doesn't do that in the IR. If the user does that in the IR,
3452 // then this zero'ing instruction is dead and gets optimized out.
3453 if (Val->getType() == IceType_i64) {
3454 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3455 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3456 _mov(DestHi, Zero);
3457 }
3458 return;
3459 }
3460 case Intrinsics::Ctlz: {
3461 // The "is zero undef" parameter is ignored and we always return
3462 // a well-defined value.
3463 Operand *Val = legalize(Instr->getArg(0));
3464 Operand *FirstVal;
3465 Operand *SecondVal = nullptr;
3466 if (Val->getType() == IceType_i64) {
3467 FirstVal = loOperand(Val);
3468 SecondVal = hiOperand(Val);
3469 } else {
3470 FirstVal = Val;
3471 }
3472 const bool IsCttz = false;
3473 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3474 SecondVal);
3475 return;
3476 }
3477 case Intrinsics::Cttz: {
3478 // The "is zero undef" parameter is ignored and we always return
3479 // a well-defined value.
3480 Operand *Val = legalize(Instr->getArg(0));
3481 Operand *FirstVal;
3482 Operand *SecondVal = nullptr;
3483 if (Val->getType() == IceType_i64) {
3484 FirstVal = hiOperand(Val);
3485 SecondVal = loOperand(Val);
3486 } else {
3487 FirstVal = Val;
3488 }
3489 const bool IsCttz = true;
3490 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3491 SecondVal);
3492 return;
3493 }
3494 case Intrinsics::Fabs: {
3495 Operand *Src = legalize(Instr->getArg(0));
3496 Type Ty = Src->getType();
3497 Variable *Dest = Instr->getDest();
3498 Variable *T = makeVectorOfFabsMask(Ty);
3499 // The pand instruction operates on an m128 memory operand, so if
3500 // Src is an f32 or f64, we need to make sure it's in a register.
3501 if (isVectorType(Ty)) {
John Porto921856d2015-07-07 11:56:26 -07003502 if (llvm::isa<typename Traits::X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07003503 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003504 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003505 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003506 }
3507 _pand(T, Src);
3508 if (isVectorType(Ty))
3509 _movp(Dest, T);
3510 else
3511 _mov(Dest, T);
3512 return;
3513 }
3514 case Intrinsics::Longjmp: {
3515 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
3516 Call->addArg(Instr->getArg(0));
3517 Call->addArg(Instr->getArg(1));
3518 lowerCall(Call);
3519 return;
3520 }
3521 case Intrinsics::Memcpy: {
3522 // In the future, we could potentially emit an inline memcpy/memset, etc.
3523 // for intrinsic calls w/ a known length.
3524 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
3525 Call->addArg(Instr->getArg(0));
3526 Call->addArg(Instr->getArg(1));
3527 Call->addArg(Instr->getArg(2));
3528 lowerCall(Call);
3529 return;
3530 }
3531 case Intrinsics::Memmove: {
3532 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
3533 Call->addArg(Instr->getArg(0));
3534 Call->addArg(Instr->getArg(1));
3535 Call->addArg(Instr->getArg(2));
3536 lowerCall(Call);
3537 return;
3538 }
3539 case Intrinsics::Memset: {
Jan Voungf645d852015-07-09 10:35:09 -07003540 // The value operand needs to be extended to a stack slot size because the
3541 // PNaCl ABI requires arguments to be at least 32 bits wide.
John Porto7e93c622015-06-23 10:58:57 -07003542 Operand *ValOp = Instr->getArg(1);
3543 assert(ValOp->getType() == IceType_i8);
John Porto5aeed952015-07-21 13:39:09 -07003544 Variable *ValExt = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07003545 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
3546 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3547 Call->addArg(Instr->getArg(0));
3548 Call->addArg(ValExt);
3549 Call->addArg(Instr->getArg(2));
3550 lowerCall(Call);
3551 return;
3552 }
3553 case Intrinsics::NaClReadTP: {
3554 if (Ctx->getFlags().getUseSandboxing()) {
John Porto5aeed952015-07-21 13:39:09 -07003555 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07003556 Variable *Dest = Instr->getDest();
3557 Variable *T = nullptr;
3558 _mov(T, Src);
3559 _mov(Dest, T);
3560 } else {
3561 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3562 lowerCall(Call);
3563 }
3564 return;
3565 }
3566 case Intrinsics::Setjmp: {
3567 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
3568 Call->addArg(Instr->getArg(0));
3569 lowerCall(Call);
3570 return;
3571 }
3572 case Intrinsics::Sqrt: {
3573 Operand *Src = legalize(Instr->getArg(0));
3574 Variable *Dest = Instr->getDest();
3575 Variable *T = makeReg(Dest->getType());
3576 _sqrtss(T, Src);
3577 _mov(Dest, T);
3578 return;
3579 }
3580 case Intrinsics::Stacksave: {
John Porto5d0acff2015-06-30 15:29:21 -07003581 Variable *esp =
3582 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003583 Variable *Dest = Instr->getDest();
3584 _mov(Dest, esp);
3585 return;
3586 }
3587 case Intrinsics::Stackrestore: {
John Porto5d0acff2015-06-30 15:29:21 -07003588 Variable *esp =
3589 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003590 _mov_nonkillable(esp, Instr->getArg(0));
3591 return;
3592 }
3593 case Intrinsics::Trap:
3594 _ud2();
3595 return;
3596 case Intrinsics::UnknownIntrinsic:
3597 Func->setError("Should not be lowering UnknownIntrinsic");
3598 return;
3599 }
3600 return;
3601}
3602
3603template <class Machine>
3604void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3605 Operand *Ptr, Operand *Expected,
3606 Operand *Desired) {
3607 if (Expected->getType() == IceType_i64) {
3608 // Reserve the pre-colored registers first, before adding any more
3609 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07003610 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3611 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3612 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3613 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07003614 _mov(T_eax, loOperand(Expected));
3615 _mov(T_edx, hiOperand(Expected));
3616 _mov(T_ebx, loOperand(Desired));
3617 _mov(T_ecx, hiOperand(Desired));
John Porto921856d2015-07-07 11:56:26 -07003618 typename Traits::X86OperandMem *Addr =
3619 formMemoryOperand(Ptr, Expected->getType());
John Porto7e93c622015-06-23 10:58:57 -07003620 const bool Locked = true;
3621 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3622 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3623 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3624 _mov(DestLo, T_eax);
3625 _mov(DestHi, T_edx);
3626 return;
3627 }
John Porto5d0acff2015-06-30 15:29:21 -07003628 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003629 _mov(T_eax, Expected);
John Porto921856d2015-07-07 11:56:26 -07003630 typename Traits::X86OperandMem *Addr =
3631 formMemoryOperand(Ptr, Expected->getType());
Andrew Scull97f460d2015-07-21 10:07:42 -07003632 Variable *DesiredReg = legalizeToReg(Desired);
John Porto7e93c622015-06-23 10:58:57 -07003633 const bool Locked = true;
3634 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3635 _mov(DestPrev, T_eax);
3636}
3637
3638template <class Machine>
3639bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3640 Operand *PtrToMem,
3641 Operand *Expected,
3642 Operand *Desired) {
3643 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3644 return false;
3645 // Peek ahead a few instructions and see how Dest is used.
3646 // It's very common to have:
3647 //
3648 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3649 // [%y_phi = ...] // list of phi stores
3650 // %p = icmp eq i32 %x, %expected
3651 // br i1 %p, label %l1, label %l2
3652 //
3653 // which we can optimize into:
3654 //
3655 // %x = <cmpxchg code>
3656 // [%y_phi = ...] // list of phi stores
3657 // br eq, %l1, %l2
3658 InstList::iterator I = Context.getCur();
3659 // I is currently the InstIntrinsicCall. Peek past that.
3660 // This assumes that the atomic cmpxchg has not been lowered yet,
3661 // so that the instructions seen in the scan from "Cur" is simple.
3662 assert(llvm::isa<InstIntrinsicCall>(*I));
3663 Inst *NextInst = Context.getNextInst(I);
3664 if (!NextInst)
3665 return false;
3666 // There might be phi assignments right before the compare+branch, since this
3667 // could be a backward branch for a loop. This placement of assignments is
3668 // determined by placePhiStores().
3669 std::vector<InstAssign *> PhiAssigns;
3670 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3671 if (PhiAssign->getDest() == Dest)
3672 return false;
3673 PhiAssigns.push_back(PhiAssign);
3674 NextInst = Context.getNextInst(I);
3675 if (!NextInst)
3676 return false;
3677 }
3678 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3679 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3680 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3681 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3682 return false;
3683 }
3684 NextInst = Context.getNextInst(I);
3685 if (!NextInst)
3686 return false;
3687 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3688 if (!NextBr->isUnconditional() &&
3689 NextCmp->getDest() == NextBr->getCondition() &&
3690 NextBr->isLastUse(NextCmp->getDest())) {
3691 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3692 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3693 // Lower the phi assignments now, before the branch (same placement
3694 // as before).
3695 InstAssign *PhiAssign = PhiAssigns[i];
3696 PhiAssign->setDeleted();
3697 lowerAssign(PhiAssign);
3698 Context.advanceNext();
3699 }
John Porto5d0acff2015-06-30 15:29:21 -07003700 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
3701 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07003702 // Skip over the old compare and branch, by deleting them.
3703 NextCmp->setDeleted();
3704 NextBr->setDeleted();
3705 Context.advanceNext();
3706 Context.advanceNext();
3707 return true;
3708 }
3709 }
3710 }
3711 return false;
3712}
3713
3714template <class Machine>
3715void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3716 Operand *Ptr, Operand *Val) {
3717 bool NeedsCmpxchg = false;
3718 LowerBinOp Op_Lo = nullptr;
3719 LowerBinOp Op_Hi = nullptr;
3720 switch (Operation) {
3721 default:
3722 Func->setError("Unknown AtomicRMW operation");
3723 return;
3724 case Intrinsics::AtomicAdd: {
3725 if (Dest->getType() == IceType_i64) {
3726 // All the fall-through paths must set this to true, but use this
3727 // for asserting.
3728 NeedsCmpxchg = true;
3729 Op_Lo = &TargetX86Base<Machine>::_add;
3730 Op_Hi = &TargetX86Base<Machine>::_adc;
3731 break;
3732 }
John Porto921856d2015-07-07 11:56:26 -07003733 typename Traits::X86OperandMem *Addr =
3734 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003735 const bool Locked = true;
3736 Variable *T = nullptr;
3737 _mov(T, Val);
3738 _xadd(Addr, T, Locked);
3739 _mov(Dest, T);
3740 return;
3741 }
3742 case Intrinsics::AtomicSub: {
3743 if (Dest->getType() == IceType_i64) {
3744 NeedsCmpxchg = true;
3745 Op_Lo = &TargetX86Base<Machine>::_sub;
3746 Op_Hi = &TargetX86Base<Machine>::_sbb;
3747 break;
3748 }
John Porto921856d2015-07-07 11:56:26 -07003749 typename Traits::X86OperandMem *Addr =
3750 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003751 const bool Locked = true;
3752 Variable *T = nullptr;
3753 _mov(T, Val);
3754 _neg(T);
3755 _xadd(Addr, T, Locked);
3756 _mov(Dest, T);
3757 return;
3758 }
3759 case Intrinsics::AtomicOr:
3760 // TODO(jvoung): If Dest is null or dead, then some of these
3761 // operations do not need an "exchange", but just a locked op.
3762 // That appears to be "worth" it for sub, or, and, and xor.
3763 // xadd is probably fine vs lock add for add, and xchg is fine
3764 // vs an atomic store.
3765 NeedsCmpxchg = true;
3766 Op_Lo = &TargetX86Base<Machine>::_or;
3767 Op_Hi = &TargetX86Base<Machine>::_or;
3768 break;
3769 case Intrinsics::AtomicAnd:
3770 NeedsCmpxchg = true;
3771 Op_Lo = &TargetX86Base<Machine>::_and;
3772 Op_Hi = &TargetX86Base<Machine>::_and;
3773 break;
3774 case Intrinsics::AtomicXor:
3775 NeedsCmpxchg = true;
3776 Op_Lo = &TargetX86Base<Machine>::_xor;
3777 Op_Hi = &TargetX86Base<Machine>::_xor;
3778 break;
3779 case Intrinsics::AtomicExchange:
3780 if (Dest->getType() == IceType_i64) {
3781 NeedsCmpxchg = true;
3782 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3783 // just need to be moved to the ecx and ebx registers.
3784 Op_Lo = nullptr;
3785 Op_Hi = nullptr;
3786 break;
3787 }
John Porto921856d2015-07-07 11:56:26 -07003788 typename Traits::X86OperandMem *Addr =
3789 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003790 Variable *T = nullptr;
3791 _mov(T, Val);
3792 _xchg(Addr, T);
3793 _mov(Dest, T);
3794 return;
3795 }
3796 // Otherwise, we need a cmpxchg loop.
3797 (void)NeedsCmpxchg;
3798 assert(NeedsCmpxchg);
3799 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3800}
3801
3802template <class Machine>
3803void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3804 LowerBinOp Op_Hi,
3805 Variable *Dest,
3806 Operand *Ptr,
3807 Operand *Val) {
3808 // Expand a more complex RMW operation as a cmpxchg loop:
3809 // For 64-bit:
3810 // mov eax, [ptr]
3811 // mov edx, [ptr + 4]
3812 // .LABEL:
3813 // mov ebx, eax
3814 // <Op_Lo> ebx, <desired_adj_lo>
3815 // mov ecx, edx
3816 // <Op_Hi> ecx, <desired_adj_hi>
3817 // lock cmpxchg8b [ptr]
3818 // jne .LABEL
3819 // mov <dest_lo>, eax
3820 // mov <dest_lo>, edx
3821 //
3822 // For 32-bit:
3823 // mov eax, [ptr]
3824 // .LABEL:
3825 // mov <reg>, eax
3826 // op <reg>, [desired_adj]
3827 // lock cmpxchg [ptr], <reg>
3828 // jne .LABEL
3829 // mov <dest>, eax
3830 //
3831 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3832 Val = legalize(Val);
3833 Type Ty = Val->getType();
3834 if (Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07003835 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3836 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto921856d2015-07-07 11:56:26 -07003837 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07003838 _mov(T_eax, loOperand(Addr));
3839 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07003840 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3841 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto921856d2015-07-07 11:56:26 -07003842 typename Traits::Insts::Label *Label =
3843 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003844 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3845 if (!IsXchg8b) {
3846 Context.insert(Label);
3847 _mov(T_ebx, T_eax);
3848 (this->*Op_Lo)(T_ebx, loOperand(Val));
3849 _mov(T_ecx, T_edx);
3850 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3851 } else {
3852 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3853 // It just needs the Val loaded into ebx and ecx.
3854 // That can also be done before the loop.
3855 _mov(T_ebx, loOperand(Val));
3856 _mov(T_ecx, hiOperand(Val));
3857 Context.insert(Label);
3858 }
3859 const bool Locked = true;
3860 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003861 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003862 if (!IsXchg8b) {
3863 // If Val is a variable, model the extended live range of Val through
3864 // the end of the loop, since it will be re-used by the loop.
3865 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3866 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3867 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3868 Context.insert(InstFakeUse::create(Func, ValLo));
3869 Context.insert(InstFakeUse::create(Func, ValHi));
3870 }
3871 } else {
3872 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3873 Context.insert(InstFakeUse::create(Func, T_ebx));
3874 Context.insert(InstFakeUse::create(Func, T_ecx));
3875 }
3876 // The address base (if any) is also reused in the loop.
3877 if (Variable *Base = Addr->getBase())
3878 Context.insert(InstFakeUse::create(Func, Base));
3879 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3880 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3881 _mov(DestLo, T_eax);
3882 _mov(DestHi, T_edx);
3883 return;
3884 }
John Porto921856d2015-07-07 11:56:26 -07003885 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto5d0acff2015-06-30 15:29:21 -07003886 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003887 _mov(T_eax, Addr);
John Porto921856d2015-07-07 11:56:26 -07003888 typename Traits::Insts::Label *Label =
3889 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003890 Context.insert(Label);
3891 // We want to pick a different register for T than Eax, so don't use
3892 // _mov(T == nullptr, T_eax).
3893 Variable *T = makeReg(Ty);
3894 _mov(T, T_eax);
3895 (this->*Op_Lo)(T, Val);
3896 const bool Locked = true;
3897 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003898 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003899 // If Val is a variable, model the extended live range of Val through
3900 // the end of the loop, since it will be re-used by the loop.
3901 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3902 Context.insert(InstFakeUse::create(Func, ValVar));
3903 }
3904 // The address base (if any) is also reused in the loop.
3905 if (Variable *Base = Addr->getBase())
3906 Context.insert(InstFakeUse::create(Func, Base));
3907 _mov(Dest, T_eax);
3908}
3909
Andrew Scull9612d322015-07-06 14:53:25 -07003910/// Lowers count {trailing, leading} zeros intrinsic.
3911///
3912/// We could do constant folding here, but that should have
3913/// been done by the front-end/middle-end optimizations.
John Porto7e93c622015-06-23 10:58:57 -07003914template <class Machine>
3915void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3916 Operand *FirstVal,
3917 Operand *SecondVal) {
3918 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3919 // Then the instructions will handle the Val == 0 case much more simply
3920 // and won't require conversion from bit position to number of zeros.
3921 //
3922 // Otherwise:
3923 // bsr IF_NOT_ZERO, Val
3924 // mov T_DEST, 63
3925 // cmovne T_DEST, IF_NOT_ZERO
3926 // xor T_DEST, 31
3927 // mov DEST, T_DEST
3928 //
3929 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3930 // register. Also, bsf and bsr require their dest to be a register.
3931 //
3932 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3933 // E.g., for 000... 00001100, bsr will say that the most significant bit
3934 // set is at position 3, while the number of leading zeros is 28. Xor is
3935 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3936 //
3937 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3938 // are all zero, and compute the result for that case (checking the lower
3939 // 32 bits). Then actually compute the result for the upper bits and
3940 // cmov in the result from the lower computation if the earlier speculation
3941 // was correct.
3942 //
3943 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3944 // bit position conversion, and the speculation is reversed.
3945 assert(Ty == IceType_i32 || Ty == IceType_i64);
3946 Variable *T = makeReg(IceType_i32);
3947 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3948 if (Cttz) {
3949 _bsf(T, FirstValRM);
3950 } else {
3951 _bsr(T, FirstValRM);
3952 }
3953 Variable *T_Dest = makeReg(IceType_i32);
3954 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3955 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3956 if (Cttz) {
3957 _mov(T_Dest, ThirtyTwo);
3958 } else {
3959 Constant *SixtyThree = Ctx->getConstantInt32(63);
3960 _mov(T_Dest, SixtyThree);
3961 }
John Porto5d0acff2015-06-30 15:29:21 -07003962 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07003963 if (!Cttz) {
3964 _xor(T_Dest, ThirtyOne);
3965 }
3966 if (Ty == IceType_i32) {
3967 _mov(Dest, T_Dest);
3968 return;
3969 }
3970 _add(T_Dest, ThirtyTwo);
3971 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3972 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3973 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07003974 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07003975 Variable *T_Dest2 = makeReg(IceType_i32);
3976 if (Cttz) {
3977 _bsf(T_Dest2, SecondVar);
3978 } else {
3979 _bsr(T_Dest2, SecondVar);
3980 _xor(T_Dest2, ThirtyOne);
3981 }
3982 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07003983 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07003984 _mov(DestLo, T_Dest2);
3985 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3986}
3987
Andrew Scull86df4e92015-07-30 13:54:44 -07003988template <class Machine>
3989void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) {
3990 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
3991 if (NeedSandboxing) {
3992 _bundle_lock();
3993 const SizeT BundleSize =
3994 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
3995 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1)));
3996 }
3997 _jmp(Target);
3998 if (NeedSandboxing)
3999 _bundle_unlock();
4000}
4001
John Porto5aeed952015-07-21 13:39:09 -07004002inline bool isAdd(const Inst *Inst) {
John Porto7e93c622015-06-23 10:58:57 -07004003 if (const InstArithmetic *Arith =
4004 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
4005 return (Arith->getOp() == InstArithmetic::Add);
4006 }
4007 return false;
4008}
4009
John Porto5aeed952015-07-21 13:39:09 -07004010inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,
4011 const Variable *Index, uint16_t Shift,
4012 int32_t Offset, const Inst *Reason) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004013 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07004014 return;
4015 if (!Func->isVerbose(IceV_AddrOpt))
4016 return;
4017 OstreamLocker L(Func->getContext());
4018 Ostream &Str = Func->getContext()->getStrDump();
4019 Str << "Instruction: ";
4020 Reason->dumpDecorated(Func);
4021 Str << " results in Base=";
4022 if (Base)
4023 Base->dump(Func);
4024 else
4025 Str << "<null>";
4026 Str << ", Index=";
4027 if (Index)
4028 Index->dump(Func);
4029 else
4030 Str << "<null>";
4031 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
4032}
4033
John Porto5aeed952015-07-21 13:39:09 -07004034inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
4035 Variable *&Var, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004036 // Var originates from Var=SrcVar ==>
4037 // set Var:=SrcVar
4038 if (Var == nullptr)
4039 return false;
4040 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
4041 assert(!VMetadata->isMultiDef(Var));
4042 if (llvm::isa<InstAssign>(VarAssign)) {
4043 Operand *SrcOp = VarAssign->getSrc(0);
4044 assert(SrcOp);
4045 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
4046 if (!VMetadata->isMultiDef(SrcVar) &&
4047 // TODO: ensure SrcVar stays single-BB
4048 true) {
4049 Var = SrcVar;
4050 Reason = VarAssign;
4051 return true;
4052 }
4053 }
4054 }
4055 }
4056 return false;
4057}
4058
John Porto5aeed952015-07-21 13:39:09 -07004059inline bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata,
4060 Variable *&Base, Variable *&Index,
4061 uint16_t &Shift, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004062 // Index==nullptr && Base is Base=Var1+Var2 ==>
4063 // set Base=Var1, Index=Var2, Shift=0
4064 if (Base == nullptr)
4065 return false;
4066 if (Index != nullptr)
4067 return false;
4068 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
4069 if (BaseInst == nullptr)
4070 return false;
4071 assert(!VMetadata->isMultiDef(Base));
4072 if (BaseInst->getSrcSize() < 2)
4073 return false;
4074 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
4075 if (VMetadata->isMultiDef(Var1))
4076 return false;
4077 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
4078 if (VMetadata->isMultiDef(Var2))
4079 return false;
4080 if (isAdd(BaseInst) &&
4081 // TODO: ensure Var1 and Var2 stay single-BB
4082 true) {
4083 Base = Var1;
4084 Index = Var2;
4085 Shift = 0; // should already have been 0
4086 Reason = BaseInst;
4087 return true;
4088 }
4089 }
4090 }
4091 return false;
4092}
4093
John Porto5aeed952015-07-21 13:39:09 -07004094inline bool matchShiftedIndex(const VariablesMetadata *VMetadata,
4095 Variable *&Index, uint16_t &Shift,
4096 const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004097 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
4098 // Index=Var, Shift+=log2(Const)
4099 if (Index == nullptr)
4100 return false;
4101 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
4102 if (IndexInst == nullptr)
4103 return false;
4104 assert(!VMetadata->isMultiDef(Index));
4105 if (IndexInst->getSrcSize() < 2)
4106 return false;
4107 if (const InstArithmetic *ArithInst =
4108 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
4109 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
4110 if (ConstantInteger32 *Const =
4111 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
4112 if (ArithInst->getOp() == InstArithmetic::Mul &&
4113 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
4114 uint64_t Mult = Const->getValue();
4115 uint32_t LogMult;
4116 switch (Mult) {
4117 case 1:
4118 LogMult = 0;
4119 break;
4120 case 2:
4121 LogMult = 1;
4122 break;
4123 case 4:
4124 LogMult = 2;
4125 break;
4126 case 8:
4127 LogMult = 3;
4128 break;
4129 default:
4130 return false;
4131 }
4132 if (Shift + LogMult <= 3) {
4133 Index = Var;
4134 Shift += LogMult;
4135 Reason = IndexInst;
4136 return true;
4137 }
4138 }
4139 }
4140 }
4141 }
4142 return false;
4143}
4144
John Porto5aeed952015-07-21 13:39:09 -07004145inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
4146 int32_t &Offset, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004147 // Base is Base=Var+Const || Base is Base=Const+Var ==>
4148 // set Base=Var, Offset+=Const
4149 // Base is Base=Var-Const ==>
4150 // set Base=Var, Offset-=Const
4151 if (Base == nullptr)
4152 return false;
4153 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
4154 if (BaseInst == nullptr)
4155 return false;
4156 assert(!VMetadata->isMultiDef(Base));
4157 if (const InstArithmetic *ArithInst =
4158 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
4159 if (ArithInst->getOp() != InstArithmetic::Add &&
4160 ArithInst->getOp() != InstArithmetic::Sub)
4161 return false;
4162 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
4163 Variable *Var = nullptr;
4164 ConstantInteger32 *Const = nullptr;
4165 if (Variable *VariableOperand =
4166 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
4167 Var = VariableOperand;
4168 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
4169 } else if (IsAdd) {
4170 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
4171 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
4172 }
4173 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
4174 return false;
4175 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
4176 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
4177 return false;
4178 Base = Var;
4179 Offset += MoreOffset;
4180 Reason = BaseInst;
4181 return true;
4182 }
4183 return false;
4184}
4185
John Porto5aeed952015-07-21 13:39:09 -07004186inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
4187 Variable *&Index, uint16_t &Shift,
4188 int32_t &Offset) {
John Porto7e93c622015-06-23 10:58:57 -07004189 Func->resetCurrentNode();
4190 if (Func->isVerbose(IceV_AddrOpt)) {
4191 OstreamLocker L(Func->getContext());
4192 Ostream &Str = Func->getContext()->getStrDump();
4193 Str << "\nStarting computeAddressOpt for instruction:\n ";
4194 Instr->dumpDecorated(Func);
4195 }
4196 (void)Offset; // TODO: pattern-match for non-zero offsets.
4197 if (Base == nullptr)
4198 return;
4199 // If the Base has more than one use or is live across multiple
4200 // blocks, then don't go further. Alternatively (?), never consider
4201 // a transformation that would change a variable that is currently
4202 // *not* live across basic block boundaries into one that *is*.
4203 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
4204 return;
4205
4206 const VariablesMetadata *VMetadata = Func->getVMetadata();
4207 bool Continue = true;
4208 while (Continue) {
4209 const Inst *Reason = nullptr;
4210 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
4211 matchTransitiveAssign(VMetadata, Index, Reason) ||
4212 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
4213 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
4214 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
4215 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
4216 } else {
4217 Continue = false;
4218 }
4219
4220 // Index is Index=Var<<Const && Const+Shift<=3 ==>
4221 // Index=Var, Shift+=Const
4222
4223 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
4224 // Index=Var, Shift+=log2(Const)
4225
4226 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
4227 // swap(Index,Base)
4228 // Similar for Base=Const*Var and Base=Var<<Const
4229
4230 // Index is Index=Var+Const ==>
4231 // set Index=Var, Offset+=(Const<<Shift)
4232
4233 // Index is Index=Const+Var ==>
4234 // set Index=Var, Offset+=(Const<<Shift)
4235
4236 // Index is Index=Var-Const ==>
4237 // set Index=Var, Offset-=(Const<<Shift)
4238
4239 // TODO: consider overflow issues with respect to Offset.
4240 // TODO: handle symbolic constants.
4241 }
4242}
4243
4244template <class Machine>
4245void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07004246 // A Load instruction can be treated the same as an Assign instruction, after
4247 // the source operand is transformed into an Traits::X86OperandMem operand.
4248 // Note that the address mode optimization already creates an
4249 // Traits::X86OperandMem operand, so it doesn't need another level of
4250 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07004251 Variable *DestLoad = Load->getDest();
4252 Type Ty = DestLoad->getType();
4253 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4254 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4255 lowerAssign(Assign);
4256}
4257
4258template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4259 Inst *Inst = Context.getCur();
4260 Variable *Dest = Inst->getDest();
4261 Operand *Addr = Inst->getSrc(0);
4262 Variable *Index = nullptr;
4263 uint16_t Shift = 0;
4264 int32_t Offset = 0; // TODO: make Constant
John Porto921856d2015-07-07 11:56:26 -07004265 // Vanilla ICE load instructions should not use the segment registers, and
4266 // computeAddressOpt only works at the level of Variables and Constants, not
4267 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004268 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004269 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4270 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004271 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4272 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4273 if (Base && Addr != Base) {
4274 Inst->setDeleted();
4275 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004276 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4277 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004278 Context.insert(InstLoad::create(Func, Dest, Addr));
4279 }
4280}
4281
4282template <class Machine>
4283void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4284 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4285 if (RNG.getTrueWithProbability(Probability)) {
4286 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4287 }
4288}
4289
4290template <class Machine>
4291void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4292 Func->setError("Phi found in regular instruction list");
4293}
4294
4295template <class Machine>
4296void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
4297 Variable *Reg = nullptr;
4298 if (Inst->hasRetValue()) {
4299 Operand *Src0 = legalize(Inst->getRetValue());
4300 if (Src0->getType() == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07004301 Variable *eax =
Andrew Scull97f460d2015-07-21 10:07:42 -07004302 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
John Porto5d0acff2015-06-30 15:29:21 -07004303 Variable *edx =
Andrew Scull97f460d2015-07-21 10:07:42 -07004304 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07004305 Reg = eax;
4306 Context.insert(InstFakeUse::create(Func, edx));
4307 } else if (isScalarFloatingType(Src0->getType())) {
4308 _fld(Src0);
4309 } else if (isVectorType(Src0->getType())) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004310 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004311 } else {
John Porto5d0acff2015-06-30 15:29:21 -07004312 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07004313 }
4314 }
4315 // Add a ret instruction even if sandboxing is enabled, because
4316 // addEpilog explicitly looks for a ret instruction as a marker for
4317 // where to insert the frame removal instructions.
4318 _ret(Reg);
4319 // Add a fake use of esp to make sure esp stays alive for the entire
4320 // function. Otherwise post-call esp adjustments get dead-code
4321 // eliminated. TODO: Are there more places where the fake use
4322 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4323 // have a ret instruction.
John Porto5d0acff2015-06-30 15:29:21 -07004324 Variable *esp =
4325 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07004326 Context.insert(InstFakeUse::create(Func, esp));
4327}
4328
4329template <class Machine>
4330void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4331 Variable *Dest = Inst->getDest();
4332 Type DestTy = Dest->getType();
4333 Operand *SrcT = Inst->getTrueOperand();
4334 Operand *SrcF = Inst->getFalseOperand();
4335 Operand *Condition = Inst->getCondition();
4336
4337 if (isVectorType(DestTy)) {
4338 Type SrcTy = SrcT->getType();
4339 Variable *T = makeReg(SrcTy);
4340 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4341 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004342 if (InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07004343 // TODO(wala): If the condition operand is a constant, use blendps
4344 // or pblendw.
4345 //
4346 // Use blendvps or pblendvb to implement select.
4347 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4348 SrcTy == IceType_v4f32) {
4349 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004350 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004351 _movp(xmm0, ConditionRM);
4352 _psll(xmm0, Ctx->getConstantInt8(31));
4353 _movp(T, SrcFRM);
4354 _blendvps(T, SrcTRM, xmm0);
4355 _movp(Dest, T);
4356 } else {
4357 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4358 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4359 : IceType_v16i8;
John Porto5d0acff2015-06-30 15:29:21 -07004360 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004361 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4362 _movp(T, SrcFRM);
4363 _pblendvb(T, SrcTRM, xmm0);
4364 _movp(Dest, T);
4365 }
4366 return;
4367 }
John Porto5d0acff2015-06-30 15:29:21 -07004368 // Lower select without Traits::SSE4.1:
John Porto7e93c622015-06-23 10:58:57 -07004369 // a=d?b:c ==>
4370 // if elementtype(d) != i1:
4371 // d=sext(d);
4372 // a=(b&d)|(c&~d);
4373 Variable *T2 = makeReg(SrcTy);
4374 // Sign extend the condition operand if applicable.
4375 if (SrcTy == IceType_v4f32) {
4376 // The sext operation takes only integer arguments.
John Porto5aeed952015-07-21 13:39:09 -07004377 Variable *T3 = Func->makeVariable(IceType_v4i32);
John Porto7e93c622015-06-23 10:58:57 -07004378 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4379 _movp(T, T3);
4380 } else if (typeElementType(SrcTy) != IceType_i1) {
4381 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4382 } else {
4383 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4384 _movp(T, ConditionRM);
4385 }
4386 _movp(T2, T);
4387 _pand(T, SrcTRM);
4388 _pandn(T2, SrcFRM);
4389 _por(T, T2);
4390 _movp(Dest, T);
4391
4392 return;
4393 }
4394
John Porto5d0acff2015-06-30 15:29:21 -07004395 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
John Porto7e93c622015-06-23 10:58:57 -07004396 Operand *CmpOpnd0 = nullptr;
4397 Operand *CmpOpnd1 = nullptr;
4398 // Handle folding opportunities.
4399 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4400 assert(Producer->isDeleted());
4401 switch (BoolFolding::getProducerKind(Producer)) {
4402 default:
4403 break;
4404 case BoolFolding::PK_Icmp32: {
4405 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4406 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4407 CmpOpnd1 = legalize(Producer->getSrc(1));
4408 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4409 } break;
4410 }
4411 }
4412 if (CmpOpnd0 == nullptr) {
4413 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4414 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4415 }
4416 assert(CmpOpnd0);
4417 assert(CmpOpnd1);
4418
4419 _cmp(CmpOpnd0, CmpOpnd1);
4420 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4421 // The cmov instruction doesn't allow 8-bit or FP operands, so
4422 // we need explicit control flow.
4423 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto921856d2015-07-07 11:56:26 -07004424 typename Traits::Insts::Label *Label =
4425 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004426 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4427 _mov(Dest, SrcT);
4428 _br(Cond, Label);
4429 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4430 _mov_nonkillable(Dest, SrcF);
4431 Context.insert(Label);
4432 return;
4433 }
4434 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4435 // But if SrcT is immediate, we might be able to do better, as
4436 // the cmov instruction doesn't allow an immediate operand:
4437 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4438 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4439 std::swap(SrcT, SrcF);
John Porto921856d2015-07-07 11:56:26 -07004440 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07004441 }
4442 if (DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004443 SrcT = legalizeUndef(SrcT);
4444 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07004445 // Set the low portion.
4446 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4447 Variable *TLo = nullptr;
4448 Operand *SrcFLo = legalize(loOperand(SrcF));
4449 _mov(TLo, SrcFLo);
4450 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4451 _cmov(TLo, SrcTLo, Cond);
4452 _mov(DestLo, TLo);
4453 // Set the high portion.
4454 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4455 Variable *THi = nullptr;
4456 Operand *SrcFHi = legalize(hiOperand(SrcF));
4457 _mov(THi, SrcFHi);
4458 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4459 _cmov(THi, SrcTHi, Cond);
4460 _mov(DestHi, THi);
4461 return;
4462 }
4463
4464 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4465 Variable *T = nullptr;
4466 SrcF = legalize(SrcF);
4467 _mov(T, SrcF);
4468 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4469 _cmov(T, SrcT, Cond);
4470 _mov(Dest, T);
4471}
4472
4473template <class Machine>
4474void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4475 Operand *Value = Inst->getData();
4476 Operand *Addr = Inst->getAddr();
John Porto921856d2015-07-07 11:56:26 -07004477 typename Traits::X86OperandMem *NewAddr =
4478 formMemoryOperand(Addr, Value->getType());
John Porto7e93c622015-06-23 10:58:57 -07004479 Type Ty = NewAddr->getType();
4480
4481 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004482 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07004483 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4484 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004485 _store(ValueHi,
4486 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4487 _store(ValueLo,
4488 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07004489 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004490 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07004491 } else {
4492 Value = legalize(Value, Legal_Reg | Legal_Imm);
4493 _store(Value, NewAddr);
4494 }
4495}
4496
4497template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4498 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4499 Operand *Data = Inst->getData();
4500 Operand *Addr = Inst->getAddr();
4501 Variable *Index = nullptr;
4502 uint16_t Shift = 0;
4503 int32_t Offset = 0; // TODO: make Constant
4504 Variable *Base = llvm::dyn_cast<Variable>(Addr);
John Porto921856d2015-07-07 11:56:26 -07004505 // Vanilla ICE store instructions should not use the segment registers, and
4506 // computeAddressOpt only works at the level of Variables and Constants, not
4507 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004508 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004509 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4510 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004511 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4512 if (Base && Addr != Base) {
4513 Inst->setDeleted();
4514 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004515 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4516 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004517 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4518 if (Inst->getDest())
4519 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4520 Context.insert(NewStore);
4521 }
4522}
4523
4524template <class Machine>
Andrew Scull87f80c12015-07-20 10:19:16 -07004525Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4526 uint64_t Min, uint64_t Max) {
4527 // TODO(ascull): 64-bit should not reach here but only because it is not
4528 // implemented yet. This should be able to handle the 64-bit case.
4529 assert(Comparison->getType() != IceType_i64);
4530 // Subtracting 0 is a nop so don't do it
4531 if (Min != 0) {
4532 // Avoid clobbering the comparison by copying it
4533 Variable *T = nullptr;
4534 _mov(T, Comparison);
4535 _sub(T, Ctx->getConstantInt32(Min));
4536 Comparison = T;
4537 }
4538
4539 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
4540
4541 return Comparison;
4542}
4543
4544template <class Machine>
4545void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
4546 Operand *Comparison, bool DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004547 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004548 switch (Case.getKind()) {
4549 case CaseCluster::JumpTable: {
4550 typename Traits::Insts::Label *SkipJumpTable;
4551
4552 Operand *RangeIndex =
4553 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004554 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004555 // Skip over jump table logic if comparison not in range and no default
4556 SkipJumpTable = Traits::Insts::Label::create(Func, this);
4557 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07004558 } else {
4559 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004560 }
Andrew Scull87f80c12015-07-20 10:19:16 -07004561
4562 InstJumpTable *JumpTable = Case.getJumpTable();
4563 Context.insert(JumpTable);
4564
4565 // Make sure the index is a register of the same width as the base
4566 Variable *Index;
4567 if (RangeIndex->getType() != getPointerType()) {
4568 Index = makeReg(getPointerType());
4569 _movzx(Index, RangeIndex);
4570 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004571 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07004572 }
4573
4574 constexpr RelocOffsetT RelocOffset = 0;
4575 constexpr bool SuppressMangling = true;
Andrew Scull86df4e92015-07-30 13:54:44 -07004576 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
4577 Constant *Base = Ctx->getConstantSym(
4578 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
4579 SuppressMangling);
Andrew Scull87f80c12015-07-20 10:19:16 -07004580 Constant *Offset = nullptr;
4581 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
4582 // TODO(ascull): remove need for legalize by allowing null base in memop
Andrew Scull86df4e92015-07-30 13:54:44 -07004583 auto *TargetInMemory = Traits::X86OperandMem::create(
Andrew Scull97f460d2015-07-21 10:07:42 -07004584 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
Andrew Scull87f80c12015-07-20 10:19:16 -07004585 Variable *Target = nullptr;
Andrew Scull86df4e92015-07-30 13:54:44 -07004586 _mov(Target, TargetInMemory);
4587 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07004588
Andrew Scull86df4e92015-07-30 13:54:44 -07004589 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07004590 Context.insert(SkipJumpTable);
4591 return;
4592 }
4593 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07004594 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004595 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07004596 if (!DoneCmp) {
4597 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004598 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004599 }
4600 _br(Traits::Cond::Br_e, Case.getTarget());
4601 } else if (DoneCmp && Case.isPairRange()) {
4602 // Range of two items with first item aleady compared against
4603 _br(Traits::Cond::Br_e, Case.getTarget());
4604 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
4605 _cmp(Comparison, Value);
4606 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004607 } else {
4608 // Range
4609 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004610 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004611 }
Andrew Scull86df4e92015-07-30 13:54:44 -07004612 if (DefaultTarget != nullptr)
4613 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004614 return;
4615 }
4616 }
4617}
4618
4619template <class Machine>
4620void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004621 // Group cases together and navigate through them with a binary search
4622 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4623 Operand *Src0 = Inst->getComparison();
Andrew Scull86df4e92015-07-30 13:54:44 -07004624 CfgNode *DefaultTarget = Inst->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07004625
4626 assert(CaseClusters.size() != 0); // Should always be at least one
4627
4628 if (Src0->getType() == IceType_i64) {
4629 Src0 = legalize(Src0); // get Base/Index into physical registers
4630 Operand *Src0Lo = loOperand(Src0);
4631 Operand *Src0Hi = hiOperand(Src0);
4632 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4633 // TODO(ascull): handle 64-bit case properly (currently naive version)
4634 // This might be handled by a higher level lowering of switches.
4635 SizeT NumCases = Inst->getNumCases();
4636 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004637 Src0Lo = legalizeToReg(Src0Lo);
4638 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07004639 } else {
4640 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4641 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4642 }
4643 for (SizeT I = 0; I < NumCases; ++I) {
4644 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4645 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4646 typename Traits::Insts::Label *Label =
4647 Traits::Insts::Label::create(Func, this);
4648 _cmp(Src0Lo, ValueLo);
4649 _br(Traits::Cond::Br_ne, Label);
4650 _cmp(Src0Hi, ValueHi);
4651 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4652 Context.insert(Label);
4653 }
4654 _br(Inst->getLabelDefault());
4655 return;
4656 } else {
4657 // All the values are 32-bit so just check the operand is too and then
4658 // fall through to the 32-bit implementation. This is a common case.
4659 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4660 Constant *Zero = Ctx->getConstantInt32(0);
4661 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07004662 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004663 Src0 = Src0Lo;
4664 }
John Porto7e93c622015-06-23 10:58:57 -07004665 }
4666
Andrew Scull87f80c12015-07-20 10:19:16 -07004667 // 32-bit lowering
4668
4669 if (CaseClusters.size() == 1) {
4670 // Jump straight to default if needed. Currently a common case as jump
4671 // tables occur on their own.
4672 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004673 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004674 return;
4675 }
4676
4677 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07004678 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07004679
4680 // A span is over the clusters
4681 struct SearchSpan {
4682 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
4683 : Begin(Begin), Size(Size), Label(Label) {}
4684
4685 SizeT Begin;
4686 SizeT Size;
4687 typename Traits::Insts::Label *Label;
4688 };
Andrew Scull8447bba2015-07-23 11:41:18 -07004689 // The stack will only grow to the height of the tree so 12 should be plenty
4690 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07004691 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
4692 bool DoneCmp = false;
4693
4694 while (!SearchSpanStack.empty()) {
4695 SearchSpan Span = SearchSpanStack.top();
4696 SearchSpanStack.pop();
4697
4698 if (Span.Label != nullptr)
4699 Context.insert(Span.Label);
4700
4701 switch (Span.Size) {
4702 case 0:
4703 llvm::report_fatal_error("Invalid SearchSpan size");
4704 break;
4705
4706 case 1:
4707 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004708 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004709 DoneCmp = false;
4710 break;
4711
Andrew Scull86df4e92015-07-30 13:54:44 -07004712 case 2: {
4713 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
4714 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
4715
4716 // Placing a range last may allow register clobbering during the range
4717 // test. That means there is no need to clone the register. If it is a
4718 // unit range the comparison may have already been done in the binary
4719 // search (DoneCmp) and so it should be placed first. If this is a range
4720 // of two items and the comparison with the low value has already been
4721 // done, comparing with the other element is cheaper than a range test.
4722 // If the low end of the range is zero then there is no subtraction and
4723 // nothing to be gained.
4724 if (!CaseA->isUnitRange() &&
4725 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
4726 std::swap(CaseA, CaseB);
4727 DoneCmp = false;
4728 }
4729
4730 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07004731 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004732 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
4733 SearchSpanStack.empty() ? nullptr : DefaultTarget);
4734 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07004735
4736 default:
4737 // Pick the middle item and branch b or ae
4738 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
4739 const CaseCluster &Pivot = CaseClusters[PivotIndex];
4740 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004741 typename Traits::Insts::Label *Label =
4742 Traits::Insts::Label::create(Func, this);
4743 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004744 // TODO(ascull): does it alway have to be far?
4745 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07004746 // Lower the left and (pivot+right) sides, falling through to the right
4747 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
4748 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
4749 DoneCmp = true;
4750 break;
4751 }
4752 }
4753
Andrew Scull86df4e92015-07-30 13:54:44 -07004754 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004755}
4756
4757template <class Machine>
4758void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4759 Variable *Dest, Operand *Src0,
4760 Operand *Src1) {
4761 assert(isVectorType(Dest->getType()));
4762 Type Ty = Dest->getType();
4763 Type ElementTy = typeElementType(Ty);
4764 SizeT NumElements = typeNumElements(Ty);
4765
4766 Operand *T = Ctx->getConstantUndef(Ty);
4767 for (SizeT I = 0; I < NumElements; ++I) {
4768 Constant *Index = Ctx->getConstantInt32(I);
4769
4770 // Extract the next two inputs.
John Porto5aeed952015-07-21 13:39:09 -07004771 Variable *Op0 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004772 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
John Porto5aeed952015-07-21 13:39:09 -07004773 Variable *Op1 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004774 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4775
4776 // Perform the arithmetic as a scalar operation.
John Porto5aeed952015-07-21 13:39:09 -07004777 Variable *Res = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004778 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4779
4780 // Insert the result into position.
John Porto5aeed952015-07-21 13:39:09 -07004781 Variable *DestT = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07004782 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4783 T = DestT;
4784 }
4785
4786 lowerAssign(InstAssign::create(Func, Dest, T));
4787}
4788
Andrew Scull9612d322015-07-06 14:53:25 -07004789/// The following pattern occurs often in lowered C and C++ code:
4790///
4791/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4792/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4793///
4794/// We can eliminate the sext operation by copying the result of pcmpeqd,
4795/// pcmpgtd, or cmpps (which produce sign extended results) to the result
4796/// of the sext operation.
John Porto7e93c622015-06-23 10:58:57 -07004797template <class Machine>
4798void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4799 Variable *SignExtendedResult) {
4800 if (InstCast *NextCast =
4801 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4802 if (NextCast->getCastKind() == InstCast::Sext &&
4803 NextCast->getSrc(0) == SignExtendedResult) {
4804 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07004805 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07004806 // Skip over the instruction.
4807 Context.advanceNext();
4808 }
4809 }
4810}
4811
4812template <class Machine>
4813void TargetX86Base<Machine>::lowerUnreachable(
4814 const InstUnreachable * /*Inst*/) {
4815 _ud2();
4816}
4817
4818template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004819void TargetX86Base<Machine>::lowerRMW(
4820 const typename Traits::Insts::FakeRMW *RMW) {
John Porto7e93c622015-06-23 10:58:57 -07004821 // If the beacon variable's live range does not end in this
4822 // instruction, then it must end in the modified Store instruction
4823 // that follows. This means that the original Store instruction is
4824 // still there, either because the value being stored is used beyond
4825 // the Store instruction, or because dead code elimination did not
4826 // happen. In either case, we cancel RMW lowering (and the caller
4827 // deletes the RMW instruction).
4828 if (!RMW->isLastUse(RMW->getBeacon()))
4829 return;
4830 Operand *Src = RMW->getData();
4831 Type Ty = Src->getType();
John Porto921856d2015-07-07 11:56:26 -07004832 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
John Porto7e93c622015-06-23 10:58:57 -07004833 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004834 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07004835 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4836 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004837 typename Traits::X86OperandMem *AddrLo =
4838 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4839 typename Traits::X86OperandMem *AddrHi =
4840 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07004841 switch (RMW->getOp()) {
4842 default:
4843 // TODO(stichnot): Implement other arithmetic operators.
4844 break;
4845 case InstArithmetic::Add:
4846 _add_rmw(AddrLo, SrcLo);
4847 _adc_rmw(AddrHi, SrcHi);
4848 return;
4849 case InstArithmetic::Sub:
4850 _sub_rmw(AddrLo, SrcLo);
4851 _sbb_rmw(AddrHi, SrcHi);
4852 return;
4853 case InstArithmetic::And:
4854 _and_rmw(AddrLo, SrcLo);
4855 _and_rmw(AddrHi, SrcHi);
4856 return;
4857 case InstArithmetic::Or:
4858 _or_rmw(AddrLo, SrcLo);
4859 _or_rmw(AddrHi, SrcHi);
4860 return;
4861 case InstArithmetic::Xor:
4862 _xor_rmw(AddrLo, SrcLo);
4863 _xor_rmw(AddrHi, SrcHi);
4864 return;
4865 }
4866 } else {
4867 // i8, i16, i32
4868 switch (RMW->getOp()) {
4869 default:
4870 // TODO(stichnot): Implement other arithmetic operators.
4871 break;
4872 case InstArithmetic::Add:
4873 Src = legalize(Src, Legal_Reg | Legal_Imm);
4874 _add_rmw(Addr, Src);
4875 return;
4876 case InstArithmetic::Sub:
4877 Src = legalize(Src, Legal_Reg | Legal_Imm);
4878 _sub_rmw(Addr, Src);
4879 return;
4880 case InstArithmetic::And:
4881 Src = legalize(Src, Legal_Reg | Legal_Imm);
4882 _and_rmw(Addr, Src);
4883 return;
4884 case InstArithmetic::Or:
4885 Src = legalize(Src, Legal_Reg | Legal_Imm);
4886 _or_rmw(Addr, Src);
4887 return;
4888 case InstArithmetic::Xor:
4889 Src = legalize(Src, Legal_Reg | Legal_Imm);
4890 _xor_rmw(Addr, Src);
4891 return;
4892 }
4893 }
4894 llvm::report_fatal_error("Couldn't lower RMW instruction");
4895}
4896
4897template <class Machine>
4898void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
John Porto921856d2015-07-07 11:56:26 -07004899 if (const auto *RMW =
4900 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07004901 lowerRMW(RMW);
4902 } else {
4903 TargetLowering::lowerOther(Instr);
4904 }
4905}
4906
Andrew Scull9612d322015-07-06 14:53:25 -07004907/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4908/// preserve integrity of liveness analysis. Undef values are also
4909/// turned into zeroes, since loOperand() and hiOperand() don't expect
4910/// Undef input.
John Porto7e93c622015-06-23 10:58:57 -07004911template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4912 // Pause constant blinding or pooling, blinding or pooling will be done later
4913 // during phi lowering assignments
4914 BoolFlagSaver B(RandomizationPoolingPaused, true);
Jan Voung53483692015-07-16 10:47:46 -07004915 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4916 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07004917}
4918
John Porto7e93c622015-06-23 10:58:57 -07004919// There is no support for loading or emitting vector constants, so the
4920// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4921// etc. are initialized with register operations.
4922//
4923// TODO(wala): Add limited support for vector constants so that
4924// complex initialization in registers is unnecessary.
4925
4926template <class Machine>
4927Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
4928 Variable *Reg = makeReg(Ty, RegNum);
4929 // Insert a FakeDef, since otherwise the live range of Reg might
4930 // be overestimated.
4931 Context.insert(InstFakeDef::create(Func, Reg));
4932 _pxor(Reg, Reg);
4933 return Reg;
4934}
4935
4936template <class Machine>
4937Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4938 int32_t RegNum) {
4939 Variable *MinusOnes = makeReg(Ty, RegNum);
4940 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4941 Context.insert(InstFakeDef::create(Func, MinusOnes));
4942 _pcmpeq(MinusOnes, MinusOnes);
4943 return MinusOnes;
4944}
4945
4946template <class Machine>
4947Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
4948 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
4949 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
4950 _psub(Dest, MinusOne);
4951 return Dest;
4952}
4953
4954template <class Machine>
4955Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
4956 int32_t RegNum) {
4957 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4958 Ty == IceType_v16i8);
4959 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4960 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4961 SizeT Shift =
4962 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
4963 _psll(Reg, Ctx->getConstantInt8(Shift));
4964 return Reg;
4965 } else {
4966 // SSE has no left shift operation for vectors of 8 bit integers.
4967 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4968 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4969 Variable *Reg = makeReg(Ty, RegNum);
4970 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4971 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4972 return Reg;
4973 }
4974}
4975
Andrew Scull9612d322015-07-06 14:53:25 -07004976/// Construct a mask in a register that can be and'ed with a
4977/// floating-point value to mask off its sign bit. The value will be
4978/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4979/// for f64. Construct it as vector of ones logically right shifted
4980/// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4981/// vector constants in memory.
John Porto7e93c622015-06-23 10:58:57 -07004982template <class Machine>
4983Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4984 int32_t RegNum) {
4985 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4986 _psrl(Reg, Ctx->getConstantInt8(1));
4987 return Reg;
4988}
4989
4990template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004991typename TargetX86Base<Machine>::Traits::X86OperandMem *
John Porto7e93c622015-06-23 10:58:57 -07004992TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4993 uint32_t Offset) {
4994 // Ensure that Loc is a stack slot.
4995 assert(Slot->getWeight().isZero());
4996 assert(Slot->getRegNum() == Variable::NoRegister);
4997 // Compute the location of Loc in memory.
4998 // TODO(wala,stichnot): lea should not be required. The address of
4999 // the stack slot is known at compile time (although not until after
5000 // addProlog()).
5001 const Type PointerType = IceType_i32;
5002 Variable *Loc = makeReg(PointerType);
5003 _lea(Loc, Slot);
5004 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07005005 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07005006}
5007
Andrew Scull9612d322015-07-06 14:53:25 -07005008/// Helper for legalize() to emit the right code to lower an operand to a
5009/// register of the appropriate type.
John Porto7e93c622015-06-23 10:58:57 -07005010template <class Machine>
5011Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5012 Type Ty = Src->getType();
5013 Variable *Reg = makeReg(Ty, RegNum);
5014 if (isVectorType(Ty)) {
5015 _movp(Reg, Src);
5016 } else {
5017 _mov(Reg, Src);
5018 }
5019 return Reg;
5020}
5021
5022template <class Machine>
5023Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
5024 int32_t RegNum) {
5025 Type Ty = From->getType();
5026 // Assert that a physical register is allowed. To date, all calls
5027 // to legalize() allow a physical register. If a physical register
5028 // needs to be explicitly disallowed, then new code will need to be
5029 // written to force a spill.
5030 assert(Allowed & Legal_Reg);
5031 // If we're asking for a specific physical register, make sure we're
5032 // not allowing any other operand kinds. (This could be future
5033 // work, e.g. allow the shl shift amount to be either an immediate
5034 // or in ecx.)
5035 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
5036
John Porto921856d2015-07-07 11:56:26 -07005037 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
John Porto7e93c622015-06-23 10:58:57 -07005038 // Before doing anything with a Mem operand, we need to ensure
5039 // that the Base and Index components are in physical registers.
5040 Variable *Base = Mem->getBase();
5041 Variable *Index = Mem->getIndex();
5042 Variable *RegBase = nullptr;
5043 Variable *RegIndex = nullptr;
5044 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005045 RegBase = legalizeToReg(Base);
John Porto7e93c622015-06-23 10:58:57 -07005046 }
5047 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005048 RegIndex = legalizeToReg(Index);
John Porto7e93c622015-06-23 10:58:57 -07005049 }
5050 if (Base != RegBase || Index != RegIndex) {
John Porto921856d2015-07-07 11:56:26 -07005051 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
5052 RegIndex, Mem->getShift(),
5053 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005054 }
5055
5056 // For all Memory Operands, we do randomization/pooling here
5057 From = randomizeOrPoolImmediate(Mem);
5058
5059 if (!(Allowed & Legal_Mem)) {
5060 From = copyToReg(From, RegNum);
5061 }
5062 return From;
5063 }
5064 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
5065 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07005066 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07005067 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07005068 return From;
5069 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07005070 }
5071 // There should be no constants of vector type (other than undef).
5072 assert(!isVectorType(Ty));
5073
5074 // If the operand is an 32 bit constant integer, we should check
5075 // whether we need to randomize it or pool it.
5076 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
5077 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
5078 if (NewConst != Const) {
5079 return NewConst;
5080 }
5081 }
5082
5083 // Convert a scalar floating point constant into an explicit
5084 // memory operand.
5085 if (isScalarFloatingType(Ty)) {
5086 Variable *Base = nullptr;
5087 std::string Buffer;
5088 llvm::raw_string_ostream StrBuf(Buffer);
5089 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
5090 llvm::cast<Constant>(From)->setShouldBePooled(true);
5091 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
John Porto921856d2015-07-07 11:56:26 -07005092 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07005093 }
5094 bool NeedsReg = false;
5095 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
5096 // Immediate specifically not allowed
5097 NeedsReg = true;
5098 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5099 // On x86, FP constants are lowered to mem operands.
5100 NeedsReg = true;
5101 if (NeedsReg) {
5102 From = copyToReg(From, RegNum);
5103 }
5104 return From;
5105 }
5106 if (auto Var = llvm::dyn_cast<Variable>(From)) {
5107 // Check if the variable is guaranteed a physical register. This
5108 // can happen either when the variable is pre-colored or when it is
5109 // assigned infinite weight.
5110 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
5111 // We need a new physical register for the operand if:
5112 // Mem is not allowed and Var isn't guaranteed a physical
5113 // register, or
5114 // RegNum is required and Var->getRegNum() doesn't match.
5115 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5116 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5117 From = copyToReg(From, RegNum);
5118 }
5119 return From;
5120 }
5121 llvm_unreachable("Unhandled operand kind in legalize()");
5122 return From;
5123}
5124
Andrew Scull9612d322015-07-06 14:53:25 -07005125/// Provide a trivial wrapper to legalize() for this common usage.
John Porto7e93c622015-06-23 10:58:57 -07005126template <class Machine>
Andrew Scull97f460d2015-07-21 10:07:42 -07005127Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005128 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5129}
5130
Jan Voungfbdd2442015-07-15 12:36:20 -07005131/// Legalize undef values to concrete values.
5132template <class Machine>
5133Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
5134 Type Ty = From->getType();
5135 if (llvm::isa<ConstantUndef>(From)) {
5136 // Lower undefs to zero. Another option is to lower undefs to an
5137 // uninitialized register; however, using an uninitialized register
5138 // results in less predictable code.
5139 //
5140 // If in the future the implementation is changed to lower undef
5141 // values to uninitialized registers, a FakeDef will be needed:
5142 // Context.insert(InstFakeDef::create(Func, Reg));
5143 // This is in order to ensure that the live range of Reg is not
5144 // overestimated. If the constant being lowered is a 64 bit value,
5145 // then the result should be split and the lo and hi components will
5146 // need to go in uninitialized registers.
5147 if (isVectorType(Ty))
5148 return makeVectorOfZeros(Ty, RegNum);
5149 return Ctx->getConstantZero(Ty);
5150 }
5151 return From;
5152}
5153
Andrew Scull9612d322015-07-06 14:53:25 -07005154/// For the cmp instruction, if Src1 is an immediate, or known to be a
5155/// physical register, we can allow Src0 to be a memory operand.
5156/// Otherwise, Src0 must be copied into a physical register.
5157/// (Actually, either Src0 or Src1 can be chosen for the physical
5158/// register, but unfortunately we have to commit to one or the other
5159/// before register allocation.)
John Porto7e93c622015-06-23 10:58:57 -07005160template <class Machine>
5161Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5162 Operand *Src1) {
5163 bool IsSrc1ImmOrReg = false;
5164 if (llvm::isa<Constant>(Src1)) {
5165 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07005166 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07005167 if (Var->hasReg())
5168 IsSrc1ImmOrReg = true;
5169 }
5170 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5171}
5172
5173template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005174typename TargetX86Base<Machine>::Traits::X86OperandMem *
5175TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5176 bool DoLegalize) {
5177 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5178 // It may be the case that address mode optimization already creates an
5179 // Traits::X86OperandMem, so in that case it wouldn't need another level of
5180 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07005181 if (!Mem) {
5182 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5183 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5184 assert(Base || Offset);
5185 if (Offset) {
5186 // During memory operand building, we do not blind or pool
5187 // the constant offset, we will work on the whole memory
5188 // operand later as one entity later, this save one instruction.
5189 // By turning blinding and pooling off, we guarantee
Jan Voungfbdd2442015-07-15 12:36:20 -07005190 // legalize(Offset) will return a Constant*.
John Porto7e93c622015-06-23 10:58:57 -07005191 {
5192 BoolFlagSaver B(RandomizationPoolingPaused, true);
5193
5194 Offset = llvm::cast<Constant>(legalize(Offset));
5195 }
5196
5197 assert(llvm::isa<ConstantInteger32>(Offset) ||
5198 llvm::isa<ConstantRelocatable>(Offset));
5199 }
John Porto921856d2015-07-07 11:56:26 -07005200 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07005201 }
5202 // Do legalization, which contains randomization/pooling
5203 // or do randomization/pooling.
John Porto921856d2015-07-07 11:56:26 -07005204 return llvm::cast<typename Traits::X86OperandMem>(
John Porto7e93c622015-06-23 10:58:57 -07005205 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5206}
5207
5208template <class Machine>
5209Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5210 // There aren't any 64-bit integer registers for x86-32.
5211 assert(Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07005212 Variable *Reg = Func->makeVariable(Type);
John Porto7e93c622015-06-23 10:58:57 -07005213 if (RegNum == Variable::NoRegister)
5214 Reg->setWeightInfinite();
5215 else
5216 Reg->setRegNum(RegNum);
5217 return Reg;
5218}
5219
5220template <class Machine> void TargetX86Base<Machine>::postLower() {
5221 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5222 return;
5223 inferTwoAddress();
5224}
5225
5226template <class Machine>
5227void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5228 llvm::SmallVectorImpl<int32_t> &Permutation,
5229 const llvm::SmallBitVector &ExcludeRegisters) const {
John Porto921856d2015-07-07 11:56:26 -07005230 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
5231 ExcludeRegisters);
John Porto7e93c622015-06-23 10:58:57 -07005232}
5233
5234template <class Machine>
5235void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005236 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005237 return;
5238 Ostream &Str = Ctx->getStrEmit();
5239 Str << getConstantPrefix() << C->getValue();
5240}
5241
5242template <class Machine>
5243void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
5244 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5245}
5246
5247template <class Machine>
5248void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005249 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005250 return;
5251 Ostream &Str = Ctx->getStrEmit();
5252 C->emitPoolLabel(Str);
5253}
5254
5255template <class Machine>
5256void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005257 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005258 return;
5259 Ostream &Str = Ctx->getStrEmit();
5260 C->emitPoolLabel(Str);
5261}
5262
5263template <class Machine>
5264void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5265 llvm::report_fatal_error("undef value encountered by emitter.");
5266}
5267
Andrew Scull9612d322015-07-06 14:53:25 -07005268/// Randomize or pool an Immediate.
John Porto7e93c622015-06-23 10:58:57 -07005269template <class Machine>
5270Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5271 int32_t RegNum) {
5272 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5273 llvm::isa<ConstantRelocatable>(Immediate));
5274 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5275 RandomizationPoolingPaused == true) {
5276 // Immediates randomization/pooling off or paused
5277 return Immediate;
5278 }
5279 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5280 Ctx->statsUpdateRPImms();
5281 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5282 RPI_Randomize) {
5283 // blind the constant
5284 // FROM:
5285 // imm
5286 // TO:
5287 // insert: mov imm+cookie, Reg
5288 // insert: lea -cookie[Reg], Reg
5289 // => Reg
5290 // If we have already assigned a phy register, we must come from
5291 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5292 // the assigned register as this assignment is that start of its use-def
5293 // chain. So we add RegNum argument here.
5294 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
5295 // the flags.
5296 Variable *Reg = makeReg(IceType_i32, RegNum);
5297 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5298 uint32_t Value = Integer->getValue();
5299 uint32_t Cookie = Ctx->getRandomizationCookie();
5300 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5301 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
John Porto921856d2015-07-07 11:56:26 -07005302 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5303 nullptr, 0));
John Porto7e93c622015-06-23 10:58:57 -07005304 // make sure liveness analysis won't kill this variable, otherwise a
Jan Voungf645d852015-07-09 10:35:09 -07005305 // liveness assertion will be triggered.
John Porto7e93c622015-06-23 10:58:57 -07005306 _set_dest_nonkillable();
5307 if (Immediate->getType() != IceType_i32) {
5308 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5309 _mov(TruncReg, Reg);
5310 return TruncReg;
5311 }
5312 return Reg;
5313 }
5314 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5315 // pool the constant
5316 // FROM:
5317 // imm
5318 // TO:
5319 // insert: mov $label, Reg
5320 // => Reg
5321 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5322 Immediate->setShouldBePooled(true);
5323 // if we have already assigned a phy register, we must come from
5324 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5325 // the assigned register as this assignment is that start of its use-def
5326 // chain. So we add RegNum argument here.
5327 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5328 IceString Label;
5329 llvm::raw_string_ostream Label_stream(Label);
5330 Immediate->emitPoolLabel(Label_stream);
5331 const RelocOffsetT Offset = 0;
5332 const bool SuppressMangling = true;
5333 Constant *Symbol =
5334 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005335 typename Traits::X86OperandMem *MemOperand =
5336 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5337 Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005338 _mov(Reg, MemOperand);
5339 return Reg;
5340 }
5341 assert("Unsupported -randomize-pool-immediates option" && false);
5342 }
5343 // the constant Immediate is not eligible for blinding/pooling
5344 return Immediate;
5345}
5346
5347template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005348typename TargetX86Base<Machine>::Traits::X86OperandMem *
5349TargetX86Base<Machine>::randomizeOrPoolImmediate(
5350 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005351 assert(MemOperand);
5352 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5353 RandomizationPoolingPaused == true) {
5354 // immediates randomization/pooling is turned off
5355 return MemOperand;
5356 }
5357
5358 // If this memory operand is already a randommized one, we do
5359 // not randomize it again.
5360 if (MemOperand->getRandomized())
5361 return MemOperand;
5362
5363 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5364 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5365 // The offset of this mem operand should be blinded or pooled
5366 Ctx->statsUpdateRPImms();
5367 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5368 RPI_Randomize) {
5369 // blind the constant offset
5370 // FROM:
5371 // offset[base, index, shift]
5372 // TO:
5373 // insert: lea offset+cookie[base], RegTemp
5374 // => -cookie[RegTemp, index, shift]
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005375 uint32_t Value =
5376 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5377 ->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005378 uint32_t Cookie = Ctx->getRandomizationCookie();
5379 Constant *Mask1 = Ctx->getConstantInt(
5380 MemOperand->getOffset()->getType(), Cookie + Value);
5381 Constant *Mask2 =
5382 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5383
John Porto921856d2015-07-07 11:56:26 -07005384 typename Traits::X86OperandMem *TempMemOperand =
5385 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5386 MemOperand->getBase(), Mask1);
John Porto7e93c622015-06-23 10:58:57 -07005387 // If we have already assigned a physical register, we must come from
5388 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5389 // the assigned register as this assignment is that start of its use-def
5390 // chain. So we add RegNum argument here.
5391 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5392 _lea(RegTemp, TempMemOperand);
5393 // As source operand doesn't use the dstreg, we don't need to add
5394 // _set_dest_nonkillable().
5395 // But if we use the same Dest Reg, that is, with RegNum
5396 // assigned, we should add this _set_dest_nonkillable()
5397 if (RegNum != Variable::NoRegister)
5398 _set_dest_nonkillable();
5399
John Porto921856d2015-07-07 11:56:26 -07005400 typename Traits::X86OperandMem *NewMemOperand =
5401 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5402 Mask2, MemOperand->getIndex(),
5403 MemOperand->getShift(),
5404 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005405
Jan Voungfbdd2442015-07-15 12:36:20 -07005406 // Label this memory operand as randomized, so we won't randomize it
5407 // again in case we call legalize() multiple times on this memory
John Porto7e93c622015-06-23 10:58:57 -07005408 // operand.
5409 NewMemOperand->setRandomized(true);
5410 return NewMemOperand;
5411 }
5412 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5413 // pool the constant offset
5414 // FROM:
5415 // offset[base, index, shift]
5416 // TO:
5417 // insert: mov $label, RegTemp
5418 // insert: lea [base, RegTemp], RegTemp
5419 // =>[RegTemp, index, shift]
5420 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5421 RPI_Pool);
5422 // Memory operand should never exist as source operands in phi
5423 // lowering assignments, so there is no need to reuse any registers
5424 // here. For phi lowering, we should not ask for new physical
5425 // registers in general.
5426 // However, if we do meet Memory Operand during phi lowering, we
5427 // should not blind or pool the immediates for now.
5428 if (RegNum != Variable::NoRegister)
5429 return MemOperand;
5430 Variable *RegTemp = makeReg(IceType_i32);
5431 IceString Label;
5432 llvm::raw_string_ostream Label_stream(Label);
5433 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5434 MemOperand->getOffset()->setShouldBePooled(true);
5435 const RelocOffsetT SymOffset = 0;
5436 bool SuppressMangling = true;
5437 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5438 SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005439 typename Traits::X86OperandMem *SymbolOperand =
5440 Traits::X86OperandMem::create(
5441 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005442 _mov(RegTemp, SymbolOperand);
5443 // If we have a base variable here, we should add the lea instruction
5444 // to add the value of the base variable to RegTemp. If there is no
5445 // base variable, we won't need this lea instruction.
5446 if (MemOperand->getBase()) {
John Porto921856d2015-07-07 11:56:26 -07005447 typename Traits::X86OperandMem *CalculateOperand =
5448 Traits::X86OperandMem::create(
5449 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5450 RegTemp, 0, MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005451 _lea(RegTemp, CalculateOperand);
5452 _set_dest_nonkillable();
5453 }
John Porto921856d2015-07-07 11:56:26 -07005454 typename Traits::X86OperandMem *NewMemOperand =
5455 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5456 nullptr, MemOperand->getIndex(),
5457 MemOperand->getShift(),
5458 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005459 return NewMemOperand;
5460 }
5461 assert("Unsupported -randomize-pool-immediates option" && false);
5462 }
5463 }
5464 // the offset is not eligible for blinding or pooling, return the original
5465 // mem operand
5466 return MemOperand;
5467}
5468
5469} // end of namespace X86Internal
5470} // end of namespace Ice
5471
5472#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H