blob: 8dad58eae0e4ae2970de7091739b471b69a78e35 [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringX86Base class, which
12/// consists almost entirely of the lowering sequence for each
13/// high-level instruction.
14///
John Porto7e93c622015-06-23 10:58:57 -070015//===----------------------------------------------------------------------===//
16
17#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
19
John Porto7e93c622015-06-23 10:58:57 -070020#include "IceCfg.h"
21#include "IceCfgNode.h"
22#include "IceClFlags.h"
23#include "IceDefs.h"
24#include "IceELFObjectWriter.h"
25#include "IceGlobalInits.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070030#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070031
Andrew Scull87f80c12015-07-20 10:19:16 -070032#include <stack>
33
John Porto7e93c622015-06-23 10:58:57 -070034namespace Ice {
35namespace X86Internal {
36
John Porto921856d2015-07-07 11:56:26 -070037/// A helper class to ease the settings of RandomizationPoolingPause to disable
38/// constant blinding or pooling for some translation phases.
John Porto7e93c622015-06-23 10:58:57 -070039class BoolFlagSaver {
40 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43
44public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; }
47
48private:
49 const bool OldValue;
50 bool &Flag;
51};
52
53template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55
56public:
57 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070060 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -070061 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -070062 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -070063 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -070064 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 /// we encounter an instruction that ends Var's live range. We disable the
66 /// folding optimization when Var is live beyond this basic block. Note that
67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -070069 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0;
74};
75
76template <class MachineTraits> class BoolFolding {
77public:
78 enum BoolFoldingProducerKind {
79 PK_None,
80 PK_Icmp32,
81 PK_Icmp64,
82 PK_Fcmp,
83 PK_Trunc
84 };
85
Andrew Scull9612d322015-07-06 14:53:25 -070086 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -070087 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -070088 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -070089 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
90
91private:
92 BoolFolding(const BoolFolding &) = delete;
93 BoolFolding &operator=(const BoolFolding &) = delete;
94
95public:
96 BoolFolding() = default;
97 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
98 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
99 static bool hasComplexLowering(const Inst *Instr);
100 void init(CfgNode *Node);
101 const Inst *getProducerFor(const Operand *Opnd) const;
102 void dump(const Cfg *Func) const;
103
104private:
Andrew Scull9612d322015-07-06 14:53:25 -0700105 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700106 bool containsValid(SizeT VarNum) const {
107 auto Element = Producers.find(VarNum);
108 return Element != Producers.end() && Element->second.Instr != nullptr;
109 }
110 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Andrew Scull9612d322015-07-06 14:53:25 -0700111 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Porto7e93c622015-06-23 10:58:57 -0700112 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
113};
114
115template <class MachineTraits>
116BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
118
119template <class MachineTraits>
120typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
121BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
122 if (llvm::isa<InstIcmp>(Instr)) {
123 if (Instr->getSrc(0)->getType() != IceType_i64)
124 return PK_Icmp32;
125 return PK_None; // TODO(stichnot): actually PK_Icmp64;
126 }
127 return PK_None; // TODO(stichnot): remove this
128
129 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp;
131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
132 switch (Cast->getCastKind()) {
133 default:
134 return PK_None;
135 case InstCast::Trunc:
136 return PK_Trunc;
137 }
138 }
139 return PK_None;
140}
141
142template <class MachineTraits>
143typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
144BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
145 if (llvm::isa<InstBr>(Instr))
146 return CK_Br;
147 if (llvm::isa<InstSelect>(Instr))
148 return CK_Select;
149 return CK_None; // TODO(stichnot): remove this
150
151 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
152 switch (Cast->getCastKind()) {
153 default:
154 return CK_None;
155 case InstCast::Sext:
156 return CK_Sext;
157 case InstCast::Zext:
158 return CK_Zext;
159 }
160 }
161 return CK_None;
162}
163
John Porto921856d2015-07-07 11:56:26 -0700164/// Returns true if the producing instruction has a "complex" lowering sequence.
165/// This generally means that its lowering sequence requires more than one
166/// conditional branch, namely 64-bit integer compares and some floating-point
167/// compares. When this is true, and there is more than one consumer, we prefer
168/// to disable the folding optimization because it minimizes branches.
John Porto7e93c622015-06-23 10:58:57 -0700169template <class MachineTraits>
170bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
171 switch (getProducerKind(Instr)) {
172 default:
173 return false;
174 case PK_Icmp64:
175 return true;
176 case PK_Fcmp:
177 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
John Porto5d0acff2015-06-30 15:29:21 -0700178 .C2 != MachineTraits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700179 }
180}
181
182template <class MachineTraits>
183void BoolFolding<MachineTraits>::init(CfgNode *Node) {
184 Producers.clear();
185 for (Inst &Instr : Node->getInsts()) {
186 // Check whether Instr is a valid producer.
187 Variable *Var = Instr.getDest();
188 if (!Instr.isDeleted() // only consider non-deleted instructions
189 && Var // only instructions with an actual dest var
190 && Var->getType() == IceType_i1 // only bool-type dest vars
191 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
192 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
193 }
194 // Check each src variable against the map.
195 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
196 Operand *Src = Instr.getSrc(I);
197 SizeT NumVars = Src->getNumVars();
198 for (SizeT J = 0; J < NumVars; ++J) {
199 const Variable *Var = Src->getVar(J);
200 SizeT VarNum = Var->getIndex();
201 if (containsValid(VarNum)) {
202 if (I != 0 // All valid consumers use Var as the first source operand
203 || getConsumerKind(&Instr) == CK_None // must be white-listed
204 || (Producers[VarNum].IsComplex && // complex can't be multi-use
205 Producers[VarNum].NumUses > 0)) {
206 setInvalid(VarNum);
207 continue;
208 }
209 ++Producers[VarNum].NumUses;
210 if (Instr.isLastUse(Var)) {
211 Producers[VarNum].IsLiveOut = false;
212 }
213 }
214 }
215 }
216 }
217 for (auto &I : Producers) {
218 // Ignore entries previously marked invalid.
219 if (I.second.Instr == nullptr)
220 continue;
221 // Disable the producer if its dest may be live beyond this block.
222 if (I.second.IsLiveOut) {
223 setInvalid(I.first);
224 continue;
225 }
John Porto921856d2015-07-07 11:56:26 -0700226 // Mark as "dead" rather than outright deleting. This is so that other
227 // peephole style optimizations during or before lowering have access to
228 // this instruction in undeleted form. See for example
229 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700230 I.second.Instr->setDead();
231 }
232}
233
234template <class MachineTraits>
235const Inst *
236BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
237 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
238 if (Var == nullptr)
239 return nullptr;
240 SizeT VarNum = Var->getIndex();
241 auto Element = Producers.find(VarNum);
242 if (Element == Producers.end())
243 return nullptr;
244 return Element->second.Instr;
245}
246
247template <class MachineTraits>
248void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700249 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700250 return;
251 OstreamLocker L(Func->getContext());
252 Ostream &Str = Func->getContext()->getStrDump();
253 for (auto &I : Producers) {
254 if (I.second.Instr == nullptr)
255 continue;
256 Str << "Found foldable producer:\n ";
257 I.second.Instr->dump(Func);
258 Str << "\n";
259 }
260}
261
262template <class Machine>
263void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
264 FoldingInfo.init(Node);
265 FoldingInfo.dump(Func);
266}
267
268template <class Machine>
269TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
John Porto5aeed952015-07-21 13:39:09 -0700270 : TargetLowering(Func) {
John Porto7e93c622015-06-23 10:58:57 -0700271 static_assert(
272 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
273 (TargetInstructionSet::X86InstructionSet_End -
274 TargetInstructionSet::X86InstructionSet_Begin),
275 "Traits::InstructionSet range different from TargetInstructionSet");
276 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
277 TargetInstructionSet::BaseInstructionSet) {
278 InstructionSet = static_cast<typename Traits::InstructionSet>(
279 (Func->getContext()->getFlags().getTargetInstructionSet() -
280 TargetInstructionSet::X86InstructionSet_Begin) +
281 Traits::InstructionSet::Begin);
282 }
John Porto921856d2015-07-07 11:56:26 -0700283 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
284 // initialize in some sort of static initializer for the class.
John Porto5d0acff2015-06-30 15:29:21 -0700285 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
286 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
287 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
288 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
289 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
290 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
John Porto921856d2015-07-07 11:56:26 -0700291
292 Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
293 &FloatRegisters, &VectorRegisters, &ScratchRegs);
294
John Porto7e93c622015-06-23 10:58:57 -0700295 TypeToRegisterSet[IceType_void] = InvalidRegisters;
296 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
297 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
298 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
299 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
300 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
301 TypeToRegisterSet[IceType_f32] = FloatRegisters;
302 TypeToRegisterSet[IceType_f64] = FloatRegisters;
303 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
304 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
305 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
307 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
308 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
309 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
310}
311
312template <class Machine> void TargetX86Base<Machine>::translateO2() {
313 TimerMarker T(TimerStack::TT_O2, Func);
314
315 if (!Ctx->getFlags().getPhiEdgeSplit()) {
316 // Lower Phi instructions.
317 Func->placePhiLoads();
318 if (Func->hasError())
319 return;
320 Func->placePhiStores();
321 if (Func->hasError())
322 return;
323 Func->deletePhis();
324 if (Func->hasError())
325 return;
326 Func->dump("After Phi lowering");
327 }
328
329 // Address mode optimization.
330 Func->getVMetadata()->init(VMK_SingleDefs);
331 Func->doAddressOpt();
332
333 // Find read-modify-write opportunities. Do this after address mode
334 // optimization so that doAddressOpt() doesn't need to be applied to RMW
335 // instructions as well.
336 findRMW();
337 Func->dump("After RMW transform");
338
339 // Argument lowering
340 Func->doArgLowering();
341
John Porto921856d2015-07-07 11:56:26 -0700342 // Target lowering. This requires liveness analysis for some parts of the
343 // lowering decisions, such as compare/branch fusing. If non-lightweight
344 // liveness analysis is used, the instructions need to be renumbered first
345 // TODO: This renumbering should only be necessary if we're actually
346 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700347 Func->renumberInstructions();
348 if (Func->hasError())
349 return;
350
John Porto921856d2015-07-07 11:56:26 -0700351 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
352 // livenessLightweight(). However, for some reason that slows down the rest
353 // of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700354 Func->liveness(Liveness_Basic);
355 if (Func->hasError())
356 return;
357 Func->dump("After x86 address mode opt");
358
359 // Disable constant blinding or pooling for load optimization.
360 {
361 BoolFlagSaver B(RandomizationPoolingPaused, true);
362 doLoadOpt();
363 }
364 Func->genCode();
365 if (Func->hasError())
366 return;
367 Func->dump("After x86 codegen");
368
John Porto921856d2015-07-07 11:56:26 -0700369 // Register allocation. This requires instruction renumbering and full
370 // liveness analysis.
John Porto7e93c622015-06-23 10:58:57 -0700371 Func->renumberInstructions();
372 if (Func->hasError())
373 return;
374 Func->liveness(Liveness_Intervals);
375 if (Func->hasError())
376 return;
John Porto921856d2015-07-07 11:56:26 -0700377 // Validate the live range computations. The expensive validation call is
378 // deliberately only made when assertions are enabled.
John Porto7e93c622015-06-23 10:58:57 -0700379 assert(Func->validateLiveness());
John Porto921856d2015-07-07 11:56:26 -0700380 // The post-codegen dump is done here, after liveness analysis and associated
381 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700382 Func->dump("After initial x8632 codegen");
383 Func->getVMetadata()->init(VMK_All);
384 regAlloc(RAK_Global);
385 if (Func->hasError())
386 return;
387 Func->dump("After linear scan regalloc");
388
389 if (Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700390 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700391 Func->dump("After advanced Phi lowering");
392 }
393
394 // Stack frame mapping.
395 Func->genFrame();
396 if (Func->hasError())
397 return;
398 Func->dump("After stack frame mapping");
399
400 Func->contractEmptyNodes();
401 Func->reorderNodes();
402
Qining Lu969f6a32015-07-31 09:58:34 -0700403 // Shuffle basic block order if -reorder-basic-blocks is enabled.
404 Func->shuffleNodes();
405
John Porto921856d2015-07-07 11:56:26 -0700406 // Branch optimization. This needs to be done just before code emission. In
407 // particular, no transformations that insert or reorder CfgNodes should be
408 // done after branch optimization. We go ahead and do it before nop insertion
409 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700410 Func->doBranchOpt();
411 Func->dump("After branch optimization");
412
Qining Lu969f6a32015-07-31 09:58:34 -0700413 // Nop insertion if -nop-insertion is enabled.
414 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700415
416 // Mark nodes that require sandbox alignment
417 if (Ctx->getFlags().getUseSandboxing())
418 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700419}
420
421template <class Machine> void TargetX86Base<Machine>::translateOm1() {
422 TimerMarker T(TimerStack::TT_Om1, Func);
423
424 Func->placePhiLoads();
425 if (Func->hasError())
426 return;
427 Func->placePhiStores();
428 if (Func->hasError())
429 return;
430 Func->deletePhis();
431 if (Func->hasError())
432 return;
433 Func->dump("After Phi lowering");
434
435 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700436 Func->genCode();
437 if (Func->hasError())
438 return;
439 Func->dump("After initial x8632 codegen");
440
441 regAlloc(RAK_InfOnly);
442 if (Func->hasError())
443 return;
444 Func->dump("After regalloc of infinite-weight variables");
445
446 Func->genFrame();
447 if (Func->hasError())
448 return;
449 Func->dump("After stack frame mapping");
450
Qining Lu969f6a32015-07-31 09:58:34 -0700451 // Shuffle basic block order if -reorder-basic-blocks is enabled.
452 Func->shuffleNodes();
453
454 // Nop insertion if -nop-insertion is enabled.
455 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700456
457 // Mark nodes that require sandbox alignment
458 if (Ctx->getFlags().getUseSandboxing())
459 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700460}
461
John Porto5aeed952015-07-21 13:39:09 -0700462inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700463 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700464 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700465 if (isVectorType(Ty))
466 return false;
467 bool isI64 = Ty == IceType_i64;
468
469 switch (Arith->getOp()) {
470 // Not handled for lack of simple lowering:
471 // shift on i64
472 // mul, udiv, urem, sdiv, srem, frem
473 // Not handled for lack of RMW instructions:
474 // fadd, fsub, fmul, fdiv (also vector types)
475 default:
476 return false;
477 case InstArithmetic::Add:
478 case InstArithmetic::Sub:
479 case InstArithmetic::And:
480 case InstArithmetic::Or:
481 case InstArithmetic::Xor:
482 return true;
483 case InstArithmetic::Shl:
484 case InstArithmetic::Lshr:
485 case InstArithmetic::Ashr:
486 return false; // TODO(stichnot): implement
487 return !isI64;
488 }
489}
490
John Porto921856d2015-07-07 11:56:26 -0700491template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700492bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
493 if (A == B)
494 return true;
John Porto921856d2015-07-07 11:56:26 -0700495 if (auto *MemA = llvm::dyn_cast<
496 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
497 if (auto *MemB = llvm::dyn_cast<
498 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
John Porto7e93c622015-06-23 10:58:57 -0700499 return MemA->getBase() == MemB->getBase() &&
500 MemA->getOffset() == MemB->getOffset() &&
501 MemA->getIndex() == MemB->getIndex() &&
502 MemA->getShift() == MemB->getShift() &&
503 MemA->getSegmentRegister() == MemB->getSegmentRegister();
504 }
505 }
506 return false;
507}
508
509template <class Machine> void TargetX86Base<Machine>::findRMW() {
510 Func->dump("Before RMW");
511 OstreamLocker L(Func->getContext());
512 Ostream &Str = Func->getContext()->getStrDump();
513 for (CfgNode *Node : Func->getNodes()) {
514 // Walk through the instructions, considering each sequence of 3
515 // instructions, and look for the particular RMW pattern. Note that this
516 // search can be "broken" (false negatives) if there are intervening deleted
517 // instructions, or intervening instructions that could be safely moved out
518 // of the way to reveal an RMW pattern.
519 auto E = Node->getInsts().end();
520 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
521 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
522 // Make I3 skip over deleted instructions.
523 while (I3 != E && I3->isDeleted())
524 ++I3;
525 if (I1 == E || I2 == E || I3 == E)
526 continue;
527 assert(!I1->isDeleted());
528 assert(!I2->isDeleted());
529 assert(!I3->isDeleted());
530 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
531 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
532 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
533 // Look for:
534 // a = Load addr
535 // b = <op> a, other
536 // Store b, addr
537 // Change to:
538 // a = Load addr
539 // b = <op> a, other
540 // x = FakeDef
541 // RMW <op>, addr, other, x
542 // b = Store b, addr, x
543 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
544 // called on the updated Store instruction, to avoid liveness
545 // problems later.
546 //
547 // With this transformation, the Store instruction acquires a Dest
548 // variable and is now subject to dead code elimination if there are
549 // no more uses of "b". Variable "x" is a beacon for determining
550 // whether the Store instruction gets dead-code eliminated. If the
551 // Store instruction is eliminated, then it must be the case that
552 // the RMW instruction ends x's live range, and therefore the RMW
553 // instruction will be retained and later lowered. On the other
554 // hand, if the RMW instruction does not end x's live range, then
555 // the Store instruction must still be present, and therefore the
556 // RMW instruction is ignored during lowering because it is
557 // redundant with the Store instruction.
558 //
559 // Note that if "a" has further uses, the RMW transformation may
560 // still trigger, resulting in two loads and one store, which is
561 // worse than the original one load and one store. However, this is
562 // probably rare, and caching probably keeps it just as fast.
John Porto921856d2015-07-07 11:56:26 -0700563 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
564 Store->getAddr()))
John Porto7e93c622015-06-23 10:58:57 -0700565 continue;
566 Operand *ArithSrcFromLoad = Arith->getSrc(0);
567 Operand *ArithSrcOther = Arith->getSrc(1);
568 if (ArithSrcFromLoad != Load->getDest()) {
569 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
570 continue;
571 std::swap(ArithSrcFromLoad, ArithSrcOther);
572 }
573 if (Arith->getDest() != Store->getData())
574 continue;
575 if (!canRMW(Arith))
576 continue;
577 if (Func->isVerbose(IceV_RMW)) {
578 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
579 Load->dump(Func);
580 Str << "\n ";
581 Arith->dump(Func);
582 Str << "\n ";
583 Store->dump(Func);
584 Str << "\n";
585 }
John Porto5aeed952015-07-21 13:39:09 -0700586 Variable *Beacon = Func->makeVariable(IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -0700587 Beacon->setWeight(0);
588 Store->setRmwBeacon(Beacon);
589 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
590 Node->getInsts().insert(I3, BeaconDef);
John Porto921856d2015-07-07 11:56:26 -0700591 auto *RMW = Traits::Insts::FakeRMW::create(
John Porto7e93c622015-06-23 10:58:57 -0700592 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
593 Node->getInsts().insert(I3, RMW);
594 }
595 }
596 }
597 }
598 }
599}
600
601// Converts a ConstantInteger32 operand into its constant value, or
602// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700603inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
John Porto7e93c622015-06-23 10:58:57 -0700604 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
605 return Integer->getValue();
606 return Intrinsics::MemoryOrderInvalid;
607}
608
Andrew Scull9612d322015-07-06 14:53:25 -0700609/// Determines whether the dest of a Load instruction can be folded
610/// into one of the src operands of a 2-operand instruction. This is
611/// true as long as the load dest matches exactly one of the binary
612/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
613/// the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700614inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
615 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700616 if (Src0 == LoadDest && Src1 != LoadDest) {
617 Src0 = LoadSrc;
618 return true;
619 }
620 if (Src0 != LoadDest && Src1 == LoadDest) {
621 Src1 = LoadSrc;
622 return true;
623 }
624 return false;
625}
626
627template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
628 for (CfgNode *Node : Func->getNodes()) {
629 Context.init(Node);
630 while (!Context.atEnd()) {
631 Variable *LoadDest = nullptr;
632 Operand *LoadSrc = nullptr;
633 Inst *CurInst = Context.getCur();
634 Inst *Next = Context.getNextInst();
635 // Determine whether the current instruction is a Load
636 // instruction or equivalent.
637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
638 // An InstLoad always qualifies.
639 LoadDest = Load->getDest();
640 const bool DoLegalize = false;
641 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
642 LoadDest->getType(), DoLegalize);
643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
644 // An AtomicLoad intrinsic qualifies as long as it has a valid
645 // memory ordering, and can be implemented in a single
646 // instruction (i.e., not i64).
647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
648 if (ID == Intrinsics::AtomicLoad &&
649 Intrin->getDest()->getType() != IceType_i64 &&
650 Intrinsics::isMemoryOrderValid(
651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
652 LoadDest = Intrin->getDest();
653 const bool DoLegalize = false;
654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
655 DoLegalize);
656 }
657 }
658 // A Load instruction can be folded into the following
659 // instruction only if the following instruction ends the Load's
660 // Dest variable's live range.
661 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
662 assert(LoadSrc);
663 Inst *NewInst = nullptr;
664 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
665 Operand *Src0 = Arith->getSrc(0);
666 Operand *Src1 = Arith->getSrc(1);
667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
668 NewInst = InstArithmetic::create(Func, Arith->getOp(),
669 Arith->getDest(), Src0, Src1);
670 }
671 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
672 Operand *Src0 = Icmp->getSrc(0);
673 Operand *Src1 = Icmp->getSrc(1);
674 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
675 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
676 Icmp->getDest(), Src0, Src1);
677 }
678 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
679 Operand *Src0 = Fcmp->getSrc(0);
680 Operand *Src1 = Fcmp->getSrc(1);
681 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
682 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
683 Fcmp->getDest(), Src0, Src1);
684 }
685 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
686 Operand *Src0 = Select->getTrueOperand();
687 Operand *Src1 = Select->getFalseOperand();
688 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
689 NewInst = InstSelect::create(Func, Select->getDest(),
690 Select->getCondition(), Src0, Src1);
691 }
692 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
693 // The load dest can always be folded into a Cast
694 // instruction.
695 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
696 if (Src0 == LoadDest) {
697 NewInst = InstCast::create(Func, Cast->getCastKind(),
698 Cast->getDest(), LoadSrc);
699 }
700 }
701 if (NewInst) {
702 CurInst->setDeleted();
703 Next->setDeleted();
704 Context.insert(NewInst);
705 // Update NewInst->LiveRangesEnded so that target lowering
706 // may benefit. Also update NewInst->HasSideEffects.
707 NewInst->spliceLivenessInfo(Next, CurInst);
708 }
709 }
710 Context.advanceCur();
711 Context.advanceNext();
712 }
713 }
714 Func->dump("After load optimization");
715}
716
717template <class Machine>
718bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
John Porto921856d2015-07-07 11:56:26 -0700719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700720 return Br->optimizeBranch(NextNode);
721 }
722 return false;
723}
724
725template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700726Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
727 if (Ty == IceType_void)
728 Ty = IceType_i32;
729 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
John Porto7e93c622015-06-23 10:58:57 -0700731 assert(RegNum < PhysicalRegisters[Ty].size());
732 Variable *Reg = PhysicalRegisters[Ty][RegNum];
733 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700734 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700735 Reg->setRegNum(RegNum);
736 PhysicalRegisters[Ty][RegNum] = Reg;
737 // Specially mark esp as an "argument" so that it is considered
738 // live upon function entry.
John Porto5d0acff2015-06-30 15:29:21 -0700739 if (RegNum == Traits::RegisterSet::Reg_esp) {
John Porto7e93c622015-06-23 10:58:57 -0700740 Func->addImplicitArg(Reg);
741 Reg->setIgnoreLiveness();
742 }
743 }
744 return Reg;
745}
746
747template <class Machine>
748IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
John Porto921856d2015-07-07 11:56:26 -0700749 return Traits::getRegName(RegNum, Ty);
John Porto7e93c622015-06-23 10:58:57 -0700750}
751
752template <class Machine>
753void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700754 if (!BuildDefs::dump())
755 return;
John Porto7e93c622015-06-23 10:58:57 -0700756 Ostream &Str = Ctx->getStrEmit();
757 if (Var->hasReg()) {
758 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
759 return;
760 }
761 if (Var->getWeight().isInf()) {
762 llvm_unreachable("Infinite-weight Variable has no register assigned");
763 }
764 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700765 int32_t BaseRegNum = Var->getBaseRegNum();
766 if (BaseRegNum == Variable::NoRegister) {
767 BaseRegNum = getFrameOrStackReg();
768 if (!hasFramePointer())
769 Offset += getStackAdjustment();
770 }
John Porto7e93c622015-06-23 10:58:57 -0700771 if (Offset)
772 Str << Offset;
773 const Type FrameSPTy = IceType_i32;
Jan Voung28068ad2015-07-31 12:58:46 -0700774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700775}
776
777template <class Machine>
John Porto5d0acff2015-06-30 15:29:21 -0700778typename TargetX86Base<Machine>::Traits::Address
John Porto7e93c622015-06-23 10:58:57 -0700779TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
780 if (Var->hasReg())
781 llvm_unreachable("Stack Variable has a register assigned");
782 if (Var->getWeight().isInf()) {
783 llvm_unreachable("Infinite-weight Variable has no register assigned");
784 }
785 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700786 int32_t BaseRegNum = Var->getBaseRegNum();
787 if (Var->getBaseRegNum() == Variable::NoRegister) {
788 BaseRegNum = getFrameOrStackReg();
789 if (!hasFramePointer())
790 Offset += getStackAdjustment();
791 }
John Porto5d0acff2015-06-30 15:29:21 -0700792 return typename Traits::Address(
Jan Voung28068ad2015-07-31 12:58:46 -0700793 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
John Porto7e93c622015-06-23 10:58:57 -0700794}
795
Andrew Scull9612d322015-07-06 14:53:25 -0700796/// Helper function for addProlog().
797///
798/// This assumes Arg is an argument passed on the stack. This sets the
799/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
800/// width. For an I64 arg that has been split into Lo and Hi components,
801/// it calls itself recursively on the components, taking care to handle
802/// Lo first because of the little-endian architecture. Lastly, this
803/// function generates an instruction to copy Arg into its assigned
804/// register if applicable.
John Porto7e93c622015-06-23 10:58:57 -0700805template <class Machine>
806void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
807 Variable *FramePtr,
808 size_t BasicFrameOffset,
809 size_t &InArgsSizeBytes) {
810 Variable *Lo = Arg->getLo();
811 Variable *Hi = Arg->getHi();
812 Type Ty = Arg->getType();
813 if (Lo && Hi && Ty == IceType_i64) {
John Porto729b5f62015-08-06 07:44:30 -0700814 // TODO(jpp): This special case is not needed for x86-64.
John Porto7e93c622015-06-23 10:58:57 -0700815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
819 return;
820 }
821 if (isVectorType(Ty)) {
822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
823 }
824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
826 if (Arg->hasReg()) {
827 assert(Ty != IceType_i64);
John Porto921856d2015-07-07 11:56:26 -0700828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
830 if (isVectorType(Arg->getType())) {
831 _movp(Arg, Mem);
832 } else {
833 _mov(Arg, Mem);
834 }
John Porto921856d2015-07-07 11:56:26 -0700835 // This argument-copying instruction uses an explicit Traits::X86OperandMem
836 // operand instead of a Variable, so its fill-from-stack operation has to be
837 // tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -0700838 Ctx->statsUpdateFills();
839 }
840}
841
842template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
John Porto729b5f62015-08-06 07:44:30 -0700843 // TODO(jpp): this is wrong for x86-64.
John Porto7e93c622015-06-23 10:58:57 -0700844 return IceType_i32;
845}
846
John Porto7e93c622015-06-23 10:58:57 -0700847template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
848 switch (Var->getType()) {
849 default:
850 return;
851 case IceType_i64:
852 // TODO: Only consider F64 if we need to push each half when
853 // passing as an argument to a function call. Note that each half
854 // is still typed as I32.
855 case IceType_f64:
856 break;
857 }
858 Variable *Lo = Var->getLo();
859 Variable *Hi = Var->getHi();
860 if (Lo) {
861 assert(Hi);
862 return;
863 }
864 assert(Hi == nullptr);
John Porto5aeed952015-07-21 13:39:09 -0700865 Lo = Func->makeVariable(IceType_i32);
866 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700867 if (BuildDefs::dump()) {
John Porto7e93c622015-06-23 10:58:57 -0700868 Lo->setName(Func, Var->getName(Func) + "__lo");
869 Hi->setName(Func, Var->getName(Func) + "__hi");
870 }
871 Var->setLoHi(Lo, Hi);
872 if (Var->getIsArg()) {
873 Lo->setIsArg();
874 Hi->setIsArg();
875 }
876}
877
878template <class Machine>
879Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
880 assert(Operand->getType() == IceType_i64 ||
881 Operand->getType() == IceType_f64);
882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
883 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700885 split64(Var);
886 return Var->getLo();
887 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -0700891 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700892 return legalize(ConstInt);
893 }
John Porto921856d2015-07-07 11:56:26 -0700894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
895 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
897 Mem->getShift(), Mem->getSegmentRegister());
898 // Test if we should randomize or pool the offset, if so randomize it or
899 // pool it then create mem operand with the blinded/pooled constant.
900 // Otherwise, return the mem operand as ordinary mem operand.
901 return legalize(MemOperand);
902 }
903 llvm_unreachable("Unsupported operand type");
904 return nullptr;
905}
906
907template <class Machine>
908Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
909 assert(Operand->getType() == IceType_i64 ||
910 Operand->getType() == IceType_f64);
911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
912 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700914 split64(Var);
915 return Var->getHi();
916 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700919 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -0700920 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700921 return legalize(ConstInt);
922 }
John Porto921856d2015-07-07 11:56:26 -0700923 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700924 Constant *Offset = Mem->getOffset();
925 if (Offset == nullptr) {
926 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -0700927 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700928 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -0700929 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700930 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
931 Offset =
932 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
933 SymOffset->getSuppressMangling());
934 }
John Porto921856d2015-07-07 11:56:26 -0700935 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700936 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
937 Mem->getShift(), Mem->getSegmentRegister());
938 // Test if the Offset is an eligible i32 constants for randomization and
939 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
940 // operand.
941 return legalize(MemOperand);
942 }
943 llvm_unreachable("Unsupported operand type");
944 return nullptr;
945}
946
947template <class Machine>
948llvm::SmallBitVector
949TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
950 RegSetMask Exclude) const {
John Porto921856d2015-07-07 11:56:26 -0700951 return Traits::getRegisterSet(Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -0700952}
953
954template <class Machine>
955void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
956 IsEbpBasedFrame = true;
957 // Conservatively require the stack to be aligned. Some stack
958 // adjustment operations implemented below assume that the stack is
959 // aligned before the alloca. All the alloca code ensures that the
960 // stack alignment is preserved after the alloca. The stack alignment
961 // restriction can be relaxed in some cases.
962 NeedsStackAlignment = true;
963
964 // TODO(stichnot): minimize the number of adjustments of esp, etc.
John Porto5d0acff2015-06-30 15:29:21 -0700965 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -0700966 Operand *TotalSize = legalize(Inst->getSizeInBytes());
967 Variable *Dest = Inst->getDest();
968 uint32_t AlignmentParam = Inst->getAlignInBytes();
969 // For default align=0, set it to the real value 1, to avoid any
970 // bit-manipulation problems below.
971 AlignmentParam = std::max(AlignmentParam, 1u);
972
973 // LLVM enforces power of 2 alignment.
974 assert(llvm::isPowerOf2_32(AlignmentParam));
975 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
976
977 uint32_t Alignment =
978 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
979 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
980 _and(esp, Ctx->getConstantInt32(-Alignment));
981 }
982 if (const auto *ConstantTotalSize =
983 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
984 uint32_t Value = ConstantTotalSize->getValue();
985 Value = Utils::applyAlignment(Value, Alignment);
986 _sub(esp, Ctx->getConstantInt32(Value));
987 } else {
988 // Non-constant sizes need to be adjusted to the next highest
989 // multiple of the required alignment at runtime.
990 Variable *T = makeReg(IceType_i32);
991 _mov(T, TotalSize);
992 _add(T, Ctx->getConstantInt32(Alignment - 1));
993 _and(T, Ctx->getConstantInt32(-Alignment));
994 _sub(esp, T);
995 }
996 _mov(Dest, esp);
997}
998
Andrew Scull9612d322015-07-06 14:53:25 -0700999/// Strength-reduce scalar integer multiplication by a constant (for
1000/// i32 or narrower) for certain constants. The lea instruction can be
1001/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1002/// to multiply by powers of 2. These can be combined such that
1003/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1004/// combined with left-shifting by 2.
John Porto7e93c622015-06-23 10:58:57 -07001005template <class Machine>
1006bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1007 int32_t Src1) {
1008 // Disable this optimization for Om1 and O0, just to keep things
1009 // simple there.
1010 if (Ctx->getFlags().getOptLevel() < Opt_1)
1011 return false;
1012 Type Ty = Dest->getType();
1013 Variable *T = nullptr;
1014 if (Src1 == -1) {
1015 _mov(T, Src0);
1016 _neg(T);
1017 _mov(Dest, T);
1018 return true;
1019 }
1020 if (Src1 == 0) {
1021 _mov(Dest, Ctx->getConstantZero(Ty));
1022 return true;
1023 }
1024 if (Src1 == 1) {
1025 _mov(T, Src0);
1026 _mov(Dest, T);
1027 return true;
1028 }
1029 // Don't bother with the edge case where Src1 == MININT.
1030 if (Src1 == -Src1)
1031 return false;
1032 const bool Src1IsNegative = Src1 < 0;
1033 if (Src1IsNegative)
1034 Src1 = -Src1;
1035 uint32_t Count9 = 0;
1036 uint32_t Count5 = 0;
1037 uint32_t Count3 = 0;
1038 uint32_t Count2 = 0;
1039 uint32_t CountOps = 0;
1040 while (Src1 > 1) {
1041 if (Src1 % 9 == 0) {
1042 ++CountOps;
1043 ++Count9;
1044 Src1 /= 9;
1045 } else if (Src1 % 5 == 0) {
1046 ++CountOps;
1047 ++Count5;
1048 Src1 /= 5;
1049 } else if (Src1 % 3 == 0) {
1050 ++CountOps;
1051 ++Count3;
1052 Src1 /= 3;
1053 } else if (Src1 % 2 == 0) {
1054 if (Count2 == 0)
1055 ++CountOps;
1056 ++Count2;
1057 Src1 /= 2;
1058 } else {
1059 return false;
1060 }
1061 }
1062 // Lea optimization only works for i16 and i32 types, not i8.
1063 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1064 return false;
1065 // Limit the number of lea/shl operations for a single multiply, to
1066 // a somewhat arbitrary choice of 3.
1067 const uint32_t MaxOpsForOptimizedMul = 3;
1068 if (CountOps > MaxOpsForOptimizedMul)
1069 return false;
1070 _mov(T, Src0);
1071 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1072 for (uint32_t i = 0; i < Count9; ++i) {
1073 const uint16_t Shift = 3; // log2(9-1)
John Porto921856d2015-07-07 11:56:26 -07001074 _lea(T,
1075 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001076 _set_dest_nonkillable();
1077 }
1078 for (uint32_t i = 0; i < Count5; ++i) {
1079 const uint16_t Shift = 2; // log2(5-1)
John Porto921856d2015-07-07 11:56:26 -07001080 _lea(T,
1081 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001082 _set_dest_nonkillable();
1083 }
1084 for (uint32_t i = 0; i < Count3; ++i) {
1085 const uint16_t Shift = 1; // log2(3-1)
John Porto921856d2015-07-07 11:56:26 -07001086 _lea(T,
1087 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001088 _set_dest_nonkillable();
1089 }
1090 if (Count2) {
1091 _shl(T, Ctx->getConstantInt(Ty, Count2));
1092 }
1093 if (Src1IsNegative)
1094 _neg(T);
1095 _mov(Dest, T);
1096 return true;
1097}
1098
1099template <class Machine>
1100void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1101 Variable *Dest = Inst->getDest();
1102 Operand *Src0 = legalize(Inst->getSrc(0));
1103 Operand *Src1 = legalize(Inst->getSrc(1));
1104 if (Inst->isCommutative()) {
1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1106 std::swap(Src0, Src1);
1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1108 std::swap(Src0, Src1);
1109 }
1110 if (Dest->getType() == IceType_i64) {
1111 // These helper-call-involved instructions are lowered in this
1112 // separate switch. This is because loOperand() and hiOperand()
1113 // may insert redundant instructions for constant blinding and
1114 // pooling. Such redundant instructions will fail liveness analysis
1115 // under -Om1 setting. And, actually these arguments do not need
1116 // to be processed with loOperand() and hiOperand() to be used.
1117 switch (Inst->getOp()) {
1118 case InstArithmetic::Udiv: {
1119 const SizeT MaxSrcs = 2;
1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1121 Call->addArg(Inst->getSrc(0));
1122 Call->addArg(Inst->getSrc(1));
1123 lowerCall(Call);
1124 return;
1125 }
1126 case InstArithmetic::Sdiv: {
1127 const SizeT MaxSrcs = 2;
1128 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1129 Call->addArg(Inst->getSrc(0));
1130 Call->addArg(Inst->getSrc(1));
1131 lowerCall(Call);
1132 return;
1133 }
1134 case InstArithmetic::Urem: {
1135 const SizeT MaxSrcs = 2;
1136 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1137 Call->addArg(Inst->getSrc(0));
1138 Call->addArg(Inst->getSrc(1));
1139 lowerCall(Call);
1140 return;
1141 }
1142 case InstArithmetic::Srem: {
1143 const SizeT MaxSrcs = 2;
1144 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1145 Call->addArg(Inst->getSrc(0));
1146 Call->addArg(Inst->getSrc(1));
1147 lowerCall(Call);
1148 return;
1149 }
1150 default:
1151 break;
1152 }
1153
1154 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1155 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1156 Operand *Src0Lo = loOperand(Src0);
1157 Operand *Src0Hi = hiOperand(Src0);
1158 Operand *Src1Lo = loOperand(Src1);
1159 Operand *Src1Hi = hiOperand(Src1);
1160 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1161 switch (Inst->getOp()) {
1162 case InstArithmetic::_num:
1163 llvm_unreachable("Unknown arithmetic operator");
1164 break;
1165 case InstArithmetic::Add:
1166 _mov(T_Lo, Src0Lo);
1167 _add(T_Lo, Src1Lo);
1168 _mov(DestLo, T_Lo);
1169 _mov(T_Hi, Src0Hi);
1170 _adc(T_Hi, Src1Hi);
1171 _mov(DestHi, T_Hi);
1172 break;
1173 case InstArithmetic::And:
1174 _mov(T_Lo, Src0Lo);
1175 _and(T_Lo, Src1Lo);
1176 _mov(DestLo, T_Lo);
1177 _mov(T_Hi, Src0Hi);
1178 _and(T_Hi, Src1Hi);
1179 _mov(DestHi, T_Hi);
1180 break;
1181 case InstArithmetic::Or:
1182 _mov(T_Lo, Src0Lo);
1183 _or(T_Lo, Src1Lo);
1184 _mov(DestLo, T_Lo);
1185 _mov(T_Hi, Src0Hi);
1186 _or(T_Hi, Src1Hi);
1187 _mov(DestHi, T_Hi);
1188 break;
1189 case InstArithmetic::Xor:
1190 _mov(T_Lo, Src0Lo);
1191 _xor(T_Lo, Src1Lo);
1192 _mov(DestLo, T_Lo);
1193 _mov(T_Hi, Src0Hi);
1194 _xor(T_Hi, Src1Hi);
1195 _mov(DestHi, T_Hi);
1196 break;
1197 case InstArithmetic::Sub:
1198 _mov(T_Lo, Src0Lo);
1199 _sub(T_Lo, Src1Lo);
1200 _mov(DestLo, T_Lo);
1201 _mov(T_Hi, Src0Hi);
1202 _sbb(T_Hi, Src1Hi);
1203 _mov(DestHi, T_Hi);
1204 break;
1205 case InstArithmetic::Mul: {
1206 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001207 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1208 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001209 // gcc does the following:
1210 // a=b*c ==>
1211 // t1 = b.hi; t1 *=(imul) c.lo
1212 // t2 = c.hi; t2 *=(imul) b.lo
1213 // t3:eax = b.lo
1214 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1215 // a.lo = t4.lo
1216 // t4.hi += t1
1217 // t4.hi += t2
1218 // a.hi = t4.hi
1219 // The mul instruction cannot take an immediate operand.
1220 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1221 _mov(T_1, Src0Hi);
1222 _imul(T_1, Src1Lo);
1223 _mov(T_2, Src1Hi);
1224 _imul(T_2, Src0Lo);
John Porto5d0acff2015-06-30 15:29:21 -07001225 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001226 _mul(T_4Lo, T_3, Src1Lo);
1227 // The mul instruction produces two dest variables, edx:eax. We
1228 // create a fake definition of edx to account for this.
1229 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1230 _mov(DestLo, T_4Lo);
1231 _add(T_4Hi, T_1);
1232 _add(T_4Hi, T_2);
1233 _mov(DestHi, T_4Hi);
1234 } break;
1235 case InstArithmetic::Shl: {
1236 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1237 // gcc does the following:
1238 // a=b<<c ==>
1239 // t1:ecx = c.lo & 0xff
1240 // t2 = b.lo
1241 // t3 = b.hi
1242 // t3 = shld t3, t2, t1
1243 // t2 = shl t2, t1
1244 // test t1, 0x20
1245 // je L1
1246 // use(t3)
1247 // t3 = t2
1248 // t2 = 0
1249 // L1:
1250 // a.lo = t2
1251 // a.hi = t3
1252 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1253 Constant *BitTest = Ctx->getConstantInt32(0x20);
1254 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001255 typename Traits::Insts::Label *Label =
1256 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001257 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001258 _mov(T_2, Src0Lo);
1259 _mov(T_3, Src0Hi);
1260 _shld(T_3, T_2, T_1);
1261 _shl(T_2, T_1);
1262 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001263 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001264 // T_2 and T_3 are being assigned again because of the
1265 // intra-block control flow, so we need the _mov_nonkillable
1266 // variant to avoid liveness problems.
1267 _mov_nonkillable(T_3, T_2);
1268 _mov_nonkillable(T_2, Zero);
1269 Context.insert(Label);
1270 _mov(DestLo, T_2);
1271 _mov(DestHi, T_3);
1272 } break;
1273 case InstArithmetic::Lshr: {
1274 // a=b>>c (unsigned) ==>
1275 // t1:ecx = c.lo & 0xff
1276 // t2 = b.lo
1277 // t3 = b.hi
1278 // t2 = shrd t2, t3, t1
1279 // t3 = shr t3, t1
1280 // test t1, 0x20
1281 // je L1
1282 // use(t2)
1283 // t2 = t3
1284 // t3 = 0
1285 // L1:
1286 // a.lo = t2
1287 // a.hi = t3
1288 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1289 Constant *BitTest = Ctx->getConstantInt32(0x20);
1290 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001291 typename Traits::Insts::Label *Label =
1292 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001293 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001294 _mov(T_2, Src0Lo);
1295 _mov(T_3, Src0Hi);
1296 _shrd(T_2, T_3, T_1);
1297 _shr(T_3, T_1);
1298 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001299 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001300 // T_2 and T_3 are being assigned again because of the
1301 // intra-block control flow, so we need the _mov_nonkillable
1302 // variant to avoid liveness problems.
1303 _mov_nonkillable(T_2, T_3);
1304 _mov_nonkillable(T_3, Zero);
1305 Context.insert(Label);
1306 _mov(DestLo, T_2);
1307 _mov(DestHi, T_3);
1308 } break;
1309 case InstArithmetic::Ashr: {
1310 // a=b>>c (signed) ==>
1311 // t1:ecx = c.lo & 0xff
1312 // t2 = b.lo
1313 // t3 = b.hi
1314 // t2 = shrd t2, t3, t1
1315 // t3 = sar t3, t1
1316 // test t1, 0x20
1317 // je L1
1318 // use(t2)
1319 // t2 = t3
1320 // t3 = sar t3, 0x1f
1321 // L1:
1322 // a.lo = t2
1323 // a.hi = t3
1324 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1325 Constant *BitTest = Ctx->getConstantInt32(0x20);
1326 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
John Porto921856d2015-07-07 11:56:26 -07001327 typename Traits::Insts::Label *Label =
1328 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001329 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001330 _mov(T_2, Src0Lo);
1331 _mov(T_3, Src0Hi);
1332 _shrd(T_2, T_3, T_1);
1333 _sar(T_3, T_1);
1334 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001335 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001336 // T_2 and T_3 are being assigned again because of the
1337 // intra-block control flow, so T_2 needs the _mov_nonkillable
1338 // variant to avoid liveness problems. T_3 doesn't need special
1339 // treatment because it is reassigned via _sar instead of _mov.
1340 _mov_nonkillable(T_2, T_3);
1341 _sar(T_3, SignExtend);
1342 Context.insert(Label);
1343 _mov(DestLo, T_2);
1344 _mov(DestHi, T_3);
1345 } break;
1346 case InstArithmetic::Fadd:
1347 case InstArithmetic::Fsub:
1348 case InstArithmetic::Fmul:
1349 case InstArithmetic::Fdiv:
1350 case InstArithmetic::Frem:
1351 llvm_unreachable("FP instruction with i64 type");
1352 break;
1353 case InstArithmetic::Udiv:
1354 case InstArithmetic::Sdiv:
1355 case InstArithmetic::Urem:
1356 case InstArithmetic::Srem:
1357 llvm_unreachable("Call-helper-involved instruction for i64 type \
1358 should have already been handled before");
1359 break;
1360 }
1361 return;
1362 }
1363 if (isVectorType(Dest->getType())) {
1364 // TODO: Trap on integer divide and integer modulo by zero.
1365 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto921856d2015-07-07 11:56:26 -07001366 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001367 Src1 = legalizeToReg(Src1);
John Porto7e93c622015-06-23 10:58:57 -07001368 switch (Inst->getOp()) {
1369 case InstArithmetic::_num:
1370 llvm_unreachable("Unknown arithmetic operator");
1371 break;
1372 case InstArithmetic::Add: {
1373 Variable *T = makeReg(Dest->getType());
1374 _movp(T, Src0);
1375 _padd(T, Src1);
1376 _movp(Dest, T);
1377 } break;
1378 case InstArithmetic::And: {
1379 Variable *T = makeReg(Dest->getType());
1380 _movp(T, Src0);
1381 _pand(T, Src1);
1382 _movp(Dest, T);
1383 } break;
1384 case InstArithmetic::Or: {
1385 Variable *T = makeReg(Dest->getType());
1386 _movp(T, Src0);
1387 _por(T, Src1);
1388 _movp(Dest, T);
1389 } break;
1390 case InstArithmetic::Xor: {
1391 Variable *T = makeReg(Dest->getType());
1392 _movp(T, Src0);
1393 _pxor(T, Src1);
1394 _movp(Dest, T);
1395 } break;
1396 case InstArithmetic::Sub: {
1397 Variable *T = makeReg(Dest->getType());
1398 _movp(T, Src0);
1399 _psub(T, Src1);
1400 _movp(Dest, T);
1401 } break;
1402 case InstArithmetic::Mul: {
1403 bool TypesAreValidForPmull =
1404 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1405 bool InstructionSetIsValidForPmull =
John Porto5d0acff2015-06-30 15:29:21 -07001406 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07001407 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1408 Variable *T = makeReg(Dest->getType());
1409 _movp(T, Src0);
1410 _pmull(T, Src1);
1411 _movp(Dest, T);
1412 } else if (Dest->getType() == IceType_v4i32) {
1413 // Lowering sequence:
1414 // Note: The mask arguments have index 0 on the left.
1415 //
1416 // movups T1, Src0
1417 // pshufd T2, Src0, {1,0,3,0}
1418 // pshufd T3, Src1, {1,0,3,0}
1419 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1420 // pmuludq T1, Src1
1421 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1422 // pmuludq T2, T3
1423 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1424 // shufps T1, T2, {0,2,0,2}
1425 // pshufd T4, T1, {0,2,1,3}
1426 // movups Dest, T4
1427
1428 // Mask that directs pshufd to create a vector with entries
1429 // Src[1, 0, 3, 0]
1430 const unsigned Constant1030 = 0x31;
1431 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1432 // Mask that directs shufps to create a vector with entries
1433 // Dest[0, 2], Src[0, 2]
1434 const unsigned Mask0202 = 0x88;
1435 // Mask that directs pshufd to create a vector with entries
1436 // Src[0, 2, 1, 3]
1437 const unsigned Mask0213 = 0xd8;
1438 Variable *T1 = makeReg(IceType_v4i32);
1439 Variable *T2 = makeReg(IceType_v4i32);
1440 Variable *T3 = makeReg(IceType_v4i32);
1441 Variable *T4 = makeReg(IceType_v4i32);
1442 _movp(T1, Src0);
1443 _pshufd(T2, Src0, Mask1030);
1444 _pshufd(T3, Src1, Mask1030);
1445 _pmuludq(T1, Src1);
1446 _pmuludq(T2, T3);
1447 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1448 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1449 _movp(Dest, T4);
1450 } else {
1451 assert(Dest->getType() == IceType_v16i8);
1452 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1453 }
1454 } break;
1455 case InstArithmetic::Shl:
1456 case InstArithmetic::Lshr:
1457 case InstArithmetic::Ashr:
1458 case InstArithmetic::Udiv:
1459 case InstArithmetic::Urem:
1460 case InstArithmetic::Sdiv:
1461 case InstArithmetic::Srem:
1462 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1463 break;
1464 case InstArithmetic::Fadd: {
1465 Variable *T = makeReg(Dest->getType());
1466 _movp(T, Src0);
1467 _addps(T, Src1);
1468 _movp(Dest, T);
1469 } break;
1470 case InstArithmetic::Fsub: {
1471 Variable *T = makeReg(Dest->getType());
1472 _movp(T, Src0);
1473 _subps(T, Src1);
1474 _movp(Dest, T);
1475 } break;
1476 case InstArithmetic::Fmul: {
1477 Variable *T = makeReg(Dest->getType());
1478 _movp(T, Src0);
1479 _mulps(T, Src1);
1480 _movp(Dest, T);
1481 } break;
1482 case InstArithmetic::Fdiv: {
1483 Variable *T = makeReg(Dest->getType());
1484 _movp(T, Src0);
1485 _divps(T, Src1);
1486 _movp(Dest, T);
1487 } break;
1488 case InstArithmetic::Frem:
1489 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1490 break;
1491 }
1492 return;
1493 }
1494 Variable *T_edx = nullptr;
1495 Variable *T = nullptr;
1496 switch (Inst->getOp()) {
1497 case InstArithmetic::_num:
1498 llvm_unreachable("Unknown arithmetic operator");
1499 break;
1500 case InstArithmetic::Add:
1501 _mov(T, Src0);
1502 _add(T, Src1);
1503 _mov(Dest, T);
1504 break;
1505 case InstArithmetic::And:
1506 _mov(T, Src0);
1507 _and(T, Src1);
1508 _mov(Dest, T);
1509 break;
1510 case InstArithmetic::Or:
1511 _mov(T, Src0);
1512 _or(T, Src1);
1513 _mov(Dest, T);
1514 break;
1515 case InstArithmetic::Xor:
1516 _mov(T, Src0);
1517 _xor(T, Src1);
1518 _mov(Dest, T);
1519 break;
1520 case InstArithmetic::Sub:
1521 _mov(T, Src0);
1522 _sub(T, Src1);
1523 _mov(Dest, T);
1524 break;
1525 case InstArithmetic::Mul:
1526 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1527 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1528 return;
1529 }
1530 // The 8-bit version of imul only allows the form "imul r/m8"
1531 // where T must be in eax.
1532 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001533 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001534 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1535 } else {
1536 _mov(T, Src0);
1537 }
1538 _imul(T, Src1);
1539 _mov(Dest, T);
1540 break;
1541 case InstArithmetic::Shl:
1542 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001543 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001544 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001545 _shl(T, Src1);
1546 _mov(Dest, T);
1547 break;
1548 case InstArithmetic::Lshr:
1549 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001550 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001551 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001552 _shr(T, Src1);
1553 _mov(Dest, T);
1554 break;
1555 case InstArithmetic::Ashr:
1556 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001557 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001558 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001559 _sar(T, Src1);
1560 _mov(Dest, T);
1561 break;
1562 case InstArithmetic::Udiv:
1563 // div and idiv are the few arithmetic operators that do not allow
1564 // immediates as the operand.
1565 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1566 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001567 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1568 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1569 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1570 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1571 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1572 // d[%lh], which means the X86 target lowering (and the register
1573 // allocator) would have to be aware of this restriction. For now, we
1574 // simply zero %eax completely, and move the dividend into %al.
1575 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1576 Context.insert(InstFakeDef::create(Func, T_eax));
1577 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001578 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001579 _div(T, Src1, T);
John Porto7e93c622015-06-23 10:58:57 -07001580 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001581 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001582 } else {
1583 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto5d0acff2015-06-30 15:29:21 -07001584 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1585 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001586 _div(T, Src1, T_edx);
1587 _mov(Dest, T);
1588 }
1589 break;
1590 case InstArithmetic::Sdiv:
1591 // TODO(stichnot): Enable this after doing better performance
1592 // and cross testing.
1593 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1594 // Optimize division by constant power of 2, but not for Om1
1595 // or O0, just to keep things simple there.
1596 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1597 int32_t Divisor = C->getValue();
1598 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1599 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1600 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1601 Type Ty = Dest->getType();
1602 // LLVM does the following for dest=src/(1<<log):
1603 // t=src
1604 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1605 // shr t,typewidth-log
1606 // add t,src
1607 // sar t,log
1608 // dest=t
1609 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1610 _mov(T, Src0);
1611 // If for some reason we are dividing by 1, just treat it
1612 // like an assignment.
1613 if (LogDiv > 0) {
1614 // The initial sar is unnecessary when dividing by 2.
1615 if (LogDiv > 1)
1616 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1617 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1618 _add(T, Src0);
1619 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1620 }
1621 _mov(Dest, T);
1622 return;
1623 }
1624 }
1625 }
1626 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1627 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001628 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001629 _cbwdq(T, T);
1630 _idiv(T, Src1, T);
1631 _mov(Dest, T);
1632 } else {
John Porto5d0acff2015-06-30 15:29:21 -07001633 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1634 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001635 _cbwdq(T_edx, T);
1636 _idiv(T, Src1, T_edx);
1637 _mov(Dest, T);
1638 }
1639 break;
1640 case InstArithmetic::Urem:
1641 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1642 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001643 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1644 Context.insert(InstFakeDef::create(Func, T_eax));
1645 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001646 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto59f2d922015-07-31 13:45:48 -07001647 _div(T, Src1, T);
John Porto448c16f2015-07-28 16:56:29 -07001648 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1650 // this ever becomes a problem we can introduce a pseudo rem instruction
1651 // that returns the remainder in %al directly (and uses a mov for copying
1652 // %ah to %al.)
1653 static constexpr uint8_t AlSizeInBits = 8;
1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
John Porto59f2d922015-07-31 13:45:48 -07001655 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001656 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001657 } else {
1658 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto5d0acff2015-06-30 15:29:21 -07001659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001661 _div(T_edx, Src1, T);
1662 _mov(Dest, T_edx);
1663 }
1664 break;
1665 case InstArithmetic::Srem:
1666 // TODO(stichnot): Enable this after doing better performance
1667 // and cross testing.
1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1669 // Optimize mod by constant power of 2, but not for Om1 or O0,
1670 // just to keep things simple there.
1671 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1672 int32_t Divisor = C->getValue();
1673 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1674 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1675 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1676 Type Ty = Dest->getType();
1677 // LLVM does the following for dest=src%(1<<log):
1678 // t=src
1679 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1680 // shr t,typewidth-log
1681 // add t,src
1682 // and t, -(1<<log)
1683 // sub t,src
1684 // neg t
1685 // dest=t
1686 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1687 // If for some reason we are dividing by 1, just assign 0.
1688 if (LogDiv == 0) {
1689 _mov(Dest, Ctx->getConstantZero(Ty));
1690 return;
1691 }
1692 _mov(T, Src0);
1693 // The initial sar is unnecessary when dividing by 2.
1694 if (LogDiv > 1)
1695 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1696 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1697 _add(T, Src0);
1698 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1699 _sub(T, Src0);
1700 _neg(T);
1701 _mov(Dest, T);
1702 return;
1703 }
1704 }
1705 }
1706 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1707 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001708 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001709 // T is %al.
John Porto7e93c622015-06-23 10:58:57 -07001710 _cbwdq(T, T);
John Porto448c16f2015-07-28 16:56:29 -07001711 _idiv(T, Src1, T);
1712 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1713 Context.insert(InstFakeDef::create(Func, T_eax));
1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1716 // this ever becomes a problem we can introduce a pseudo rem instruction
1717 // that returns the remainder in %al directly (and uses a mov for copying
1718 // %ah to %al.)
1719 static constexpr uint8_t AlSizeInBits = 8;
1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1721 _mov(Dest, T);
1722 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001723 } else {
John Porto5d0acff2015-06-30 15:29:21 -07001724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001726 _cbwdq(T_edx, T);
1727 _idiv(T_edx, Src1, T);
1728 _mov(Dest, T_edx);
1729 }
1730 break;
1731 case InstArithmetic::Fadd:
1732 _mov(T, Src0);
1733 _addss(T, Src1);
1734 _mov(Dest, T);
1735 break;
1736 case InstArithmetic::Fsub:
1737 _mov(T, Src0);
1738 _subss(T, Src1);
1739 _mov(Dest, T);
1740 break;
1741 case InstArithmetic::Fmul:
1742 _mov(T, Src0);
1743 _mulss(T, Src1);
1744 _mov(Dest, T);
1745 break;
1746 case InstArithmetic::Fdiv:
1747 _mov(T, Src0);
1748 _divss(T, Src1);
1749 _mov(Dest, T);
1750 break;
1751 case InstArithmetic::Frem: {
1752 const SizeT MaxSrcs = 2;
1753 Type Ty = Dest->getType();
1754 InstCall *Call = makeHelperCall(
1755 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1756 Call->addArg(Src0);
1757 Call->addArg(Src1);
1758 return lowerCall(Call);
1759 }
1760 }
1761}
1762
1763template <class Machine>
1764void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1765 Variable *Dest = Inst->getDest();
1766 Operand *Src0 = Inst->getSrc(0);
1767 assert(Dest->getType() == Src0->getType());
1768 if (Dest->getType() == IceType_i64) {
1769 Src0 = legalize(Src0);
1770 Operand *Src0Lo = loOperand(Src0);
1771 Operand *Src0Hi = hiOperand(Src0);
1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1774 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1775 _mov(T_Lo, Src0Lo);
1776 _mov(DestLo, T_Lo);
1777 _mov(T_Hi, Src0Hi);
1778 _mov(DestHi, T_Hi);
1779 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001780 Operand *Src0Legal;
John Porto7e93c622015-06-23 10:58:57 -07001781 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001782 // If Dest already has a physical register, then only basic legalization
1783 // is needed, as the source operand can be a register, immediate, or
1784 // memory.
1785 Src0Legal = legalize(Src0);
John Porto7e93c622015-06-23 10:58:57 -07001786 } else {
1787 // If Dest could be a stack operand, then RI must be a physical
1788 // register or a scalar integer immediate.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001789 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
John Porto7e93c622015-06-23 10:58:57 -07001790 }
1791 if (isVectorType(Dest->getType()))
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001792 _movp(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001793 else
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001794 _mov(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001795 }
1796}
1797
1798template <class Machine>
1799void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
1800 if (Inst->isUnconditional()) {
1801 _br(Inst->getTargetUnconditional());
1802 return;
1803 }
1804 Operand *Cond = Inst->getCondition();
1805
1806 // Handle folding opportunities.
1807 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1808 assert(Producer->isDeleted());
1809 switch (BoolFolding::getProducerKind(Producer)) {
1810 default:
1811 break;
1812 case BoolFolding::PK_Icmp32: {
1813 // TODO(stichnot): Refactor similarities between this block and
1814 // the corresponding code in lowerIcmp().
1815 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
1816 Operand *Src0 = Producer->getSrc(0);
1817 Operand *Src1 = legalize(Producer->getSrc(1));
1818 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
1819 _cmp(Src0RM, Src1);
1820 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
1821 Inst->getTargetFalse());
1822 return;
1823 }
1824 }
1825 }
1826
1827 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1828 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1829 _cmp(Src0, Zero);
John Porto5d0acff2015-06-30 15:29:21 -07001830 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07001831}
1832
1833template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07001834void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1835 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1836 InstCast::OpKind CastKind = Inst->getCastKind();
1837 Variable *Dest = Inst->getDest();
1838 switch (CastKind) {
1839 default:
1840 Func->setError("Cast type not supported");
1841 return;
1842 case InstCast::Sext: {
1843 // Src0RM is the source operand legalized to physical register or memory,
1844 // but not immediate, since the relevant x86 native instructions don't
1845 // allow an immediate operand. If the operand is an immediate, we could
1846 // consider computing the strength-reduced result at translation time,
1847 // but we're unlikely to see something like that in the bitcode that
1848 // the optimizer wouldn't have already taken care of.
1849 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1850 if (isVectorType(Dest->getType())) {
1851 Type DestTy = Dest->getType();
1852 if (DestTy == IceType_v16i8) {
1853 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1854 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1855 Variable *T = makeReg(DestTy);
1856 _movp(T, Src0RM);
1857 _pand(T, OneMask);
1858 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1859 _pcmpgt(T, Zeros);
1860 _movp(Dest, T);
1861 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07001862 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07001863 SizeT ShiftAmount =
1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
1865 1;
1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1867 Variable *T = makeReg(DestTy);
1868 _movp(T, Src0RM);
1869 _psll(T, ShiftConstant);
1870 _psra(T, ShiftConstant);
1871 _movp(Dest, T);
1872 }
1873 } else if (Dest->getType() == IceType_i64) {
1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1875 Constant *Shift = Ctx->getConstantInt32(31);
1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1878 Variable *T_Lo = makeReg(DestLo->getType());
1879 if (Src0RM->getType() == IceType_i32) {
1880 _mov(T_Lo, Src0RM);
1881 } else if (Src0RM->getType() == IceType_i1) {
1882 _movzx(T_Lo, Src0RM);
1883 _shl(T_Lo, Shift);
1884 _sar(T_Lo, Shift);
1885 } else {
1886 _movsx(T_Lo, Src0RM);
1887 }
1888 _mov(DestLo, T_Lo);
1889 Variable *T_Hi = nullptr;
1890 _mov(T_Hi, T_Lo);
1891 if (Src0RM->getType() != IceType_i1)
1892 // For i1, the sar instruction is already done above.
1893 _sar(T_Hi, Shift);
1894 _mov(DestHi, T_Hi);
1895 } else if (Src0RM->getType() == IceType_i1) {
1896 // t1 = src
1897 // shl t1, dst_bitwidth - 1
1898 // sar t1, dst_bitwidth - 1
1899 // dst = t1
1900 size_t DestBits =
1901 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
1902 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
1903 Variable *T = makeReg(Dest->getType());
1904 if (typeWidthInBytes(Dest->getType()) <=
1905 typeWidthInBytes(Src0RM->getType())) {
1906 _mov(T, Src0RM);
1907 } else {
1908 // Widen the source using movsx or movzx. (It doesn't matter
1909 // which one, since the following shl/sar overwrite the bits.)
1910 _movzx(T, Src0RM);
1911 }
1912 _shl(T, ShiftAmount);
1913 _sar(T, ShiftAmount);
1914 _mov(Dest, T);
1915 } else {
1916 // t1 = movsx src; dst = t1
1917 Variable *T = makeReg(Dest->getType());
1918 _movsx(T, Src0RM);
1919 _mov(Dest, T);
1920 }
1921 break;
1922 }
1923 case InstCast::Zext: {
1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1925 if (isVectorType(Dest->getType())) {
1926 // onemask = materialize(1,1,...); dest = onemask & src
1927 Type DestTy = Dest->getType();
1928 Variable *OneMask = makeVectorOfOnes(DestTy);
1929 Variable *T = makeReg(DestTy);
1930 _movp(T, Src0RM);
1931 _pand(T, OneMask);
1932 _movp(Dest, T);
1933 } else if (Dest->getType() == IceType_i64) {
1934 // t1=movzx src; dst.lo=t1; dst.hi=0
1935 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1938 Variable *Tmp = makeReg(DestLo->getType());
1939 if (Src0RM->getType() == IceType_i32) {
1940 _mov(Tmp, Src0RM);
1941 } else {
1942 _movzx(Tmp, Src0RM);
1943 }
1944 if (Src0RM->getType() == IceType_i1) {
1945 Constant *One = Ctx->getConstantInt32(1);
1946 _and(Tmp, One);
1947 }
1948 _mov(DestLo, Tmp);
1949 _mov(DestHi, Zero);
1950 } else if (Src0RM->getType() == IceType_i1) {
1951 // t = Src0RM; t &= 1; Dest = t
1952 Constant *One = Ctx->getConstantInt32(1);
1953 Type DestTy = Dest->getType();
1954 Variable *T;
1955 if (DestTy == IceType_i8) {
1956 T = makeReg(DestTy);
1957 _mov(T, Src0RM);
1958 } else {
1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
1960 T = makeReg(IceType_i32);
1961 _movzx(T, Src0RM);
1962 }
1963 _and(T, One);
1964 _mov(Dest, T);
1965 } else {
1966 // t1 = movzx src; dst = t1
1967 Variable *T = makeReg(Dest->getType());
1968 _movzx(T, Src0RM);
1969 _mov(Dest, T);
1970 }
1971 break;
1972 }
1973 case InstCast::Trunc: {
1974 if (isVectorType(Dest->getType())) {
1975 // onemask = materialize(1,1,...); dst = src & onemask
1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1977 Type Src0Ty = Src0RM->getType();
1978 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1979 Variable *T = makeReg(Dest->getType());
1980 _movp(T, Src0RM);
1981 _pand(T, OneMask);
1982 _movp(Dest, T);
1983 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07001984 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
John Porto7e93c622015-06-23 10:58:57 -07001985 if (Src0->getType() == IceType_i64)
1986 Src0 = loOperand(Src0);
1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1988 // t1 = trunc Src0RM; Dest = t1
1989 Variable *T = nullptr;
1990 _mov(T, Src0RM);
1991 if (Dest->getType() == IceType_i1)
1992 _and(T, Ctx->getConstantInt1(1));
1993 _mov(Dest, T);
1994 }
1995 break;
1996 }
1997 case InstCast::Fptrunc:
1998 case InstCast::Fpext: {
1999 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2000 // t1 = cvt Src0RM; Dest = t1
2001 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002002 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002003 _mov(Dest, T);
2004 break;
2005 }
2006 case InstCast::Fptosi:
2007 if (isVectorType(Dest->getType())) {
2008 assert(Dest->getType() == IceType_v4i32 &&
2009 Inst->getSrc(0)->getType() == IceType_v4f32);
2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002012 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002013 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002015 _movp(Dest, T);
2016 } else if (Dest->getType() == IceType_i64) {
2017 // Use a helper for converting floating-point values to 64-bit
2018 // integers. SSE2 appears to have no way to convert from xmm
2019 // registers to something like the edx:eax register pair, and
2020 // gcc and clang both want to use x87 instructions complete with
2021 // temporary manipulation of the status word. This helper is
2022 // not needed for x86-64.
2023 split64(Dest);
2024 const SizeT MaxSrcs = 1;
2025 Type SrcType = Inst->getSrc(0)->getType();
2026 InstCall *Call =
2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2028 : H_fptosi_f64_i64,
2029 Dest, MaxSrcs);
2030 Call->addArg(Inst->getSrc(0));
2031 lowerCall(Call);
2032 } else {
2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2035 Variable *T_1 = makeReg(IceType_i32);
2036 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2039 if (Dest->getType() == IceType_i1)
2040 _and(T_2, Ctx->getConstantInt1(1));
2041 _mov(Dest, T_2);
2042 }
2043 break;
2044 case InstCast::Fptoui:
2045 if (isVectorType(Dest->getType())) {
2046 assert(Dest->getType() == IceType_v4i32 &&
2047 Inst->getSrc(0)->getType() == IceType_v4f32);
2048 const SizeT MaxSrcs = 1;
2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2050 Call->addArg(Inst->getSrc(0));
2051 lowerCall(Call);
2052 } else if (Dest->getType() == IceType_i64 ||
2053 Dest->getType() == IceType_i32) {
2054 // Use a helper for both x86-32 and x86-64.
2055 split64(Dest);
2056 const SizeT MaxSrcs = 1;
2057 Type DestType = Dest->getType();
2058 Type SrcType = Inst->getSrc(0)->getType();
2059 IceString TargetString;
2060 if (isInt32Asserting32Or64(DestType)) {
2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2062 : H_fptoui_f64_i32;
2063 } else {
2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2065 : H_fptoui_f64_i64;
2066 }
2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2068 Call->addArg(Inst->getSrc(0));
2069 lowerCall(Call);
2070 return;
2071 } else {
2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2074 Variable *T_1 = makeReg(IceType_i32);
2075 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2078 if (Dest->getType() == IceType_i1)
2079 _and(T_2, Ctx->getConstantInt1(1));
2080 _mov(Dest, T_2);
2081 }
2082 break;
2083 case InstCast::Sitofp:
2084 if (isVectorType(Dest->getType())) {
2085 assert(Dest->getType() == IceType_v4f32 &&
2086 Inst->getSrc(0)->getType() == IceType_v4i32);
2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002089 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002090 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002092 _movp(Dest, T);
2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
2094 // Use a helper for x86-32.
2095 const SizeT MaxSrcs = 1;
2096 Type DestType = Dest->getType();
2097 InstCall *Call =
2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2099 : H_sitofp_i64_f64,
2100 Dest, MaxSrcs);
2101 // TODO: Call the correct compiler-rt helper function.
2102 Call->addArg(Inst->getSrc(0));
2103 lowerCall(Call);
2104 return;
2105 } else {
2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2107 // Sign-extend the operand.
2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2109 Variable *T_1 = makeReg(IceType_i32);
2110 Variable *T_2 = makeReg(Dest->getType());
2111 if (Src0RM->getType() == IceType_i32)
2112 _mov(T_1, Src0RM);
2113 else
2114 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002116 _mov(Dest, T_2);
2117 }
2118 break;
2119 case InstCast::Uitofp: {
2120 Operand *Src0 = Inst->getSrc(0);
2121 if (isVectorType(Src0->getType())) {
2122 assert(Dest->getType() == IceType_v4f32 &&
2123 Src0->getType() == IceType_v4i32);
2124 const SizeT MaxSrcs = 1;
2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2126 Call->addArg(Src0);
2127 lowerCall(Call);
2128 } else if (Src0->getType() == IceType_i64 ||
2129 Src0->getType() == IceType_i32) {
2130 // Use a helper for x86-32 and x86-64. Also use a helper for
2131 // i32 on x86-32.
2132 const SizeT MaxSrcs = 1;
2133 Type DestType = Dest->getType();
2134 IceString TargetString;
2135 if (isInt32Asserting32Or64(Src0->getType())) {
2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2137 : H_uitofp_i32_f64;
2138 } else {
2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2140 : H_uitofp_i64_f64;
2141 }
2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2143 Call->addArg(Src0);
2144 lowerCall(Call);
2145 return;
2146 } else {
2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2148 // Zero-extend the operand.
2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2150 Variable *T_1 = makeReg(IceType_i32);
2151 Variable *T_2 = makeReg(Dest->getType());
2152 if (Src0RM->getType() == IceType_i32)
2153 _mov(T_1, Src0RM);
2154 else
2155 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002157 _mov(Dest, T_2);
2158 }
2159 break;
2160 }
2161 case InstCast::Bitcast: {
2162 Operand *Src0 = Inst->getSrc(0);
2163 if (Dest->getType() == Src0->getType()) {
2164 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2165 lowerAssign(Assign);
2166 return;
2167 }
2168 switch (Dest->getType()) {
2169 default:
2170 llvm_unreachable("Unexpected Bitcast dest type");
2171 case IceType_i8: {
2172 assert(Src0->getType() == IceType_v8i1);
2173 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2174 Call->addArg(Src0);
2175 lowerCall(Call);
2176 } break;
2177 case IceType_i16: {
2178 assert(Src0->getType() == IceType_v16i1);
2179 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2180 Call->addArg(Src0);
2181 lowerCall(Call);
2182 } break;
2183 case IceType_i32:
2184 case IceType_f32: {
2185 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2186 Type DestType = Dest->getType();
2187 Type SrcType = Src0RM->getType();
2188 (void)DestType;
2189 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2190 (DestType == IceType_f32 && SrcType == IceType_i32));
2191 // a.i32 = bitcast b.f32 ==>
2192 // t.f32 = b.f32
2193 // s.f32 = spill t.f32
2194 // a.i32 = s.f32
2195 Variable *T = nullptr;
2196 // TODO: Should be able to force a spill setup by calling legalize() with
2197 // Legal_Mem and not Legal_Reg or Legal_Imm.
John Porto921856d2015-07-07 11:56:26 -07002198 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002199 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
John Porto7e93c622015-06-23 10:58:57 -07002200 SpillVar->setLinkedTo(Dest);
2201 Variable *Spill = SpillVar;
2202 Spill->setWeight(RegWeight::Zero);
2203 _mov(T, Src0RM);
2204 _mov(Spill, T);
2205 _mov(Dest, Spill);
2206 } break;
2207 case IceType_i64: {
2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2209 assert(Src0RM->getType() == IceType_f64);
2210 // a.i64 = bitcast b.f64 ==>
2211 // s.f64 = spill b.f64
2212 // t_lo.i32 = lo(s.f64)
2213 // a_lo.i32 = t_lo.i32
2214 // t_hi.i32 = hi(s.f64)
2215 // a_hi.i32 = t_hi.i32
2216 Operand *SpillLo, *SpillHi;
2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
John Porto921856d2015-07-07 11:56:26 -07002218 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002220 SpillVar->setLinkedTo(Src0Var);
2221 Variable *Spill = SpillVar;
2222 Spill->setWeight(RegWeight::Zero);
2223 _movq(Spill, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002224 SpillLo = Traits::VariableSplit::create(Func, Spill,
2225 Traits::VariableSplit::Low);
2226 SpillHi = Traits::VariableSplit::create(Func, Spill,
2227 Traits::VariableSplit::High);
John Porto7e93c622015-06-23 10:58:57 -07002228 } else {
2229 SpillLo = loOperand(Src0RM);
2230 SpillHi = hiOperand(Src0RM);
2231 }
2232
2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2235 Variable *T_Lo = makeReg(IceType_i32);
2236 Variable *T_Hi = makeReg(IceType_i32);
2237
2238 _mov(T_Lo, SpillLo);
2239 _mov(DestLo, T_Lo);
2240 _mov(T_Hi, SpillHi);
2241 _mov(DestHi, T_Hi);
2242 } break;
2243 case IceType_f64: {
2244 Src0 = legalize(Src0);
2245 assert(Src0->getType() == IceType_i64);
John Porto921856d2015-07-07 11:56:26 -07002246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
John Porto5aeed952015-07-21 13:39:09 -07002247 Variable *T = Func->makeVariable(Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07002248 _movq(T, Src0);
2249 _movq(Dest, T);
2250 break;
2251 }
2252 // a.f64 = bitcast b.i64 ==>
2253 // t_lo.i32 = b_lo.i32
2254 // FakeDef(s.f64)
2255 // lo(s.f64) = t_lo.i32
2256 // t_hi.i32 = b_hi.i32
2257 // hi(s.f64) = t_hi.i32
2258 // a.f64 = s.f64
John Porto921856d2015-07-07 11:56:26 -07002259 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002261 SpillVar->setLinkedTo(Dest);
2262 Variable *Spill = SpillVar;
2263 Spill->setWeight(RegWeight::Zero);
2264
2265 Variable *T_Lo = nullptr, *T_Hi = nullptr;
John Porto921856d2015-07-07 11:56:26 -07002266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2267 Func, Spill, Traits::VariableSplit::Low);
2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2269 Func, Spill, Traits::VariableSplit::High);
John Porto7e93c622015-06-23 10:58:57 -07002270 _mov(T_Lo, loOperand(Src0));
2271 // Technically, the Spill is defined after the _store happens, but
2272 // SpillLo is considered a "use" of Spill so define Spill before it
2273 // is used.
2274 Context.insert(InstFakeDef::create(Func, Spill));
2275 _store(T_Lo, SpillLo);
2276 _mov(T_Hi, hiOperand(Src0));
2277 _store(T_Hi, SpillHi);
2278 _movq(Dest, Spill);
2279 } break;
2280 case IceType_v8i1: {
2281 assert(Src0->getType() == IceType_i8);
2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002284 // Arguments to functions are required to be at least 32 bits wide.
2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2286 Call->addArg(Src0AsI32);
2287 lowerCall(Call);
2288 } break;
2289 case IceType_v16i1: {
2290 assert(Src0->getType() == IceType_i16);
2291 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002292 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002293 // Arguments to functions are required to be at least 32 bits wide.
2294 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2295 Call->addArg(Src0AsI32);
2296 lowerCall(Call);
2297 } break;
2298 case IceType_v8i16:
2299 case IceType_v16i8:
2300 case IceType_v4i32:
2301 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07002302 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07002303 } break;
2304 }
2305 break;
2306 }
2307 }
2308}
2309
2310template <class Machine>
2311void TargetX86Base<Machine>::lowerExtractElement(
2312 const InstExtractElement *Inst) {
2313 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2314 ConstantInteger32 *ElementIndex =
2315 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2316 // Only constant indices are allowed in PNaCl IR.
2317 assert(ElementIndex);
2318
2319 unsigned Index = ElementIndex->getValue();
2320 Type Ty = SourceVectNotLegalized->getType();
2321 Type ElementTy = typeElementType(Ty);
2322 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2323 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2324
2325 // TODO(wala): Determine the best lowering sequences for each type.
2326 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07002327 InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002328 if (CanUsePextr && Ty != IceType_v4f32) {
2329 // Use pextrb, pextrw, or pextrd.
2330 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07002331 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002332 _pextr(ExtractedElementR, SourceVectR, Mask);
2333 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2334 // Use pshufd and movd/movss.
2335 Variable *T = nullptr;
2336 if (Index) {
2337 // The shuffle only needs to occur if the element to be extracted
2338 // is not at the lowest index.
2339 Constant *Mask = Ctx->getConstantInt32(Index);
2340 T = makeReg(Ty);
2341 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2342 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002343 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002344 }
2345
2346 if (InVectorElementTy == IceType_i32) {
2347 _movd(ExtractedElementR, T);
2348 } else { // Ty == IceType_f32
2349 // TODO(wala): _movss is only used here because _mov does not
2350 // allow a vector source and a scalar destination. _mov should be
2351 // able to be used here.
2352 // _movss is a binary instruction, so the FakeDef is needed to
2353 // keep the live range analysis consistent.
2354 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2355 _movss(ExtractedElementR, T);
2356 }
2357 } else {
2358 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2359 // Spill the value to a stack slot and do the extraction in memory.
2360 //
2361 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2362 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002363 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002364 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07002365 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002366
2367 // Compute the location of the element in memory.
2368 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002369 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002370 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2371 _mov(ExtractedElementR, Loc);
2372 }
2373
2374 if (ElementTy == IceType_i1) {
2375 // Truncate extracted integers to i1s if necessary.
2376 Variable *T = makeReg(IceType_i1);
2377 InstCast *Cast =
2378 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2379 lowerCast(Cast);
2380 ExtractedElementR = T;
2381 }
2382
2383 // Copy the element to the destination.
2384 Variable *Dest = Inst->getDest();
2385 _mov(Dest, ExtractedElementR);
2386}
2387
2388template <class Machine>
2389void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
2390 Operand *Src0 = Inst->getSrc(0);
2391 Operand *Src1 = Inst->getSrc(1);
2392 Variable *Dest = Inst->getDest();
2393
2394 if (isVectorType(Dest->getType())) {
2395 InstFcmp::FCond Condition = Inst->getCondition();
2396 size_t Index = static_cast<size_t>(Condition);
2397 assert(Index < Traits::TableFcmpSize);
2398
2399 if (Traits::TableFcmp[Index].SwapVectorOperands) {
2400 Operand *T = Src0;
2401 Src0 = Src1;
2402 Src1 = T;
2403 }
2404
2405 Variable *T = nullptr;
2406
2407 if (Condition == InstFcmp::True) {
2408 // makeVectorOfOnes() requires an integer vector type.
2409 T = makeVectorOfMinusOnes(IceType_v4i32);
2410 } else if (Condition == InstFcmp::False) {
2411 T = makeVectorOfZeros(Dest->getType());
2412 } else {
2413 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2414 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002415 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002416 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002417
2418 switch (Condition) {
2419 default: {
John Porto5d0acff2015-06-30 15:29:21 -07002420 typename Traits::Cond::CmppsCond Predicate =
2421 Traits::TableFcmp[Index].Predicate;
2422 assert(Predicate != Traits::Cond::Cmpps_Invalid);
John Porto7e93c622015-06-23 10:58:57 -07002423 T = makeReg(Src0RM->getType());
2424 _movp(T, Src0RM);
2425 _cmpps(T, Src1RM, Predicate);
2426 } break;
2427 case InstFcmp::One: {
2428 // Check both unequal and ordered.
2429 T = makeReg(Src0RM->getType());
2430 Variable *T2 = makeReg(Src0RM->getType());
2431 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002432 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
John Porto7e93c622015-06-23 10:58:57 -07002433 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002434 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
John Porto7e93c622015-06-23 10:58:57 -07002435 _pand(T, T2);
2436 } break;
2437 case InstFcmp::Ueq: {
2438 // Check both equal or unordered.
2439 T = makeReg(Src0RM->getType());
2440 Variable *T2 = makeReg(Src0RM->getType());
2441 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002442 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
John Porto7e93c622015-06-23 10:58:57 -07002443 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002444 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
John Porto7e93c622015-06-23 10:58:57 -07002445 _por(T, T2);
2446 } break;
2447 }
2448 }
2449
2450 _movp(Dest, T);
2451 eliminateNextVectorSextInstruction(Dest);
2452 return;
2453 }
2454
2455 // Lowering a = fcmp cond, b, c
2456 // ucomiss b, c /* only if C1 != Br_None */
2457 // /* but swap b,c order if SwapOperands==true */
2458 // mov a, <default>
2459 // j<C1> label /* only if C1 != Br_None */
2460 // j<C2> label /* only if C2 != Br_None */
2461 // FakeUse(a) /* only if C1 != Br_None */
2462 // mov a, !<default> /* only if C1 != Br_None */
2463 // label: /* only if C1 != Br_None */
2464 //
2465 // setcc lowering when C1 != Br_None && C2 == Br_None:
2466 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2467 // setcc a, C1
2468 InstFcmp::FCond Condition = Inst->getCondition();
2469 size_t Index = static_cast<size_t>(Condition);
2470 assert(Index < Traits::TableFcmpSize);
2471 if (Traits::TableFcmp[Index].SwapScalarOperands)
2472 std::swap(Src0, Src1);
John Porto5d0acff2015-06-30 15:29:21 -07002473 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2474 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07002475 if (HasC1) {
2476 Src0 = legalize(Src0);
2477 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2478 Variable *T = nullptr;
2479 _mov(T, Src0);
2480 _ucomiss(T, Src1RM);
2481 if (!HasC2) {
2482 assert(Traits::TableFcmp[Index].Default);
2483 _setcc(Dest, Traits::TableFcmp[Index].C1);
2484 return;
2485 }
2486 }
2487 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
2488 _mov(Dest, Default);
2489 if (HasC1) {
John Porto921856d2015-07-07 11:56:26 -07002490 typename Traits::Insts::Label *Label =
2491 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07002492 _br(Traits::TableFcmp[Index].C1, Label);
2493 if (HasC2) {
2494 _br(Traits::TableFcmp[Index].C2, Label);
2495 }
2496 Constant *NonDefault =
2497 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
2498 _mov_nonkillable(Dest, NonDefault);
2499 Context.insert(Label);
2500 }
2501}
2502
2503template <class Machine>
2504void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
2505 Operand *Src0 = legalize(Inst->getSrc(0));
2506 Operand *Src1 = legalize(Inst->getSrc(1));
2507 Variable *Dest = Inst->getDest();
2508
2509 if (isVectorType(Dest->getType())) {
2510 Type Ty = Src0->getType();
2511 // Promote i1 vectors to 128 bit integer vector types.
2512 if (typeElementType(Ty) == IceType_i1) {
2513 Type NewTy = IceType_NUM;
2514 switch (Ty) {
2515 default:
2516 llvm_unreachable("unexpected type");
2517 break;
2518 case IceType_v4i1:
2519 NewTy = IceType_v4i32;
2520 break;
2521 case IceType_v8i1:
2522 NewTy = IceType_v8i16;
2523 break;
2524 case IceType_v16i1:
2525 NewTy = IceType_v16i8;
2526 break;
2527 }
John Porto5aeed952015-07-21 13:39:09 -07002528 Variable *NewSrc0 = Func->makeVariable(NewTy);
2529 Variable *NewSrc1 = Func->makeVariable(NewTy);
John Porto7e93c622015-06-23 10:58:57 -07002530 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2531 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2532 Src0 = NewSrc0;
2533 Src1 = NewSrc1;
2534 Ty = NewTy;
2535 }
2536
2537 InstIcmp::ICond Condition = Inst->getCondition();
2538
2539 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2540 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2541
2542 // SSE2 only has signed comparison operations. Transform unsigned
2543 // inputs in a manner that allows for the use of signed comparison
2544 // operations by flipping the high order bits.
2545 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2546 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2547 Variable *T0 = makeReg(Ty);
2548 Variable *T1 = makeReg(Ty);
2549 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2550 _movp(T0, Src0RM);
2551 _pxor(T0, HighOrderBits);
2552 _movp(T1, Src1RM);
2553 _pxor(T1, HighOrderBits);
2554 Src0RM = T0;
2555 Src1RM = T1;
2556 }
2557
2558 Variable *T = makeReg(Ty);
2559 switch (Condition) {
2560 default:
2561 llvm_unreachable("unexpected condition");
2562 break;
2563 case InstIcmp::Eq: {
John Porto921856d2015-07-07 11:56:26 -07002564 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002565 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002566 _movp(T, Src0RM);
2567 _pcmpeq(T, Src1RM);
2568 } break;
2569 case InstIcmp::Ne: {
John Porto921856d2015-07-07 11:56:26 -07002570 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002571 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002572 _movp(T, Src0RM);
2573 _pcmpeq(T, Src1RM);
2574 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2575 _pxor(T, MinusOne);
2576 } break;
2577 case InstIcmp::Ugt:
2578 case InstIcmp::Sgt: {
John Porto921856d2015-07-07 11:56:26 -07002579 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002580 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002581 _movp(T, Src0RM);
2582 _pcmpgt(T, Src1RM);
2583 } break;
2584 case InstIcmp::Uge:
2585 case InstIcmp::Sge: {
2586 // !(Src1RM > Src0RM)
John Porto921856d2015-07-07 11:56:26 -07002587 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002588 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002589 _movp(T, Src1RM);
2590 _pcmpgt(T, Src0RM);
2591 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2592 _pxor(T, MinusOne);
2593 } break;
2594 case InstIcmp::Ult:
2595 case InstIcmp::Slt: {
John Porto921856d2015-07-07 11:56:26 -07002596 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002597 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002598 _movp(T, Src1RM);
2599 _pcmpgt(T, Src0RM);
2600 } break;
2601 case InstIcmp::Ule:
2602 case InstIcmp::Sle: {
2603 // !(Src0RM > Src1RM)
John Porto921856d2015-07-07 11:56:26 -07002604 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002605 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002606 _movp(T, Src0RM);
2607 _pcmpgt(T, Src1RM);
2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2609 _pxor(T, MinusOne);
2610 } break;
2611 }
2612
2613 _movp(Dest, T);
2614 eliminateNextVectorSextInstruction(Dest);
2615 return;
2616 }
2617
2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2619 if (Src0->getType() == IceType_i64) {
2620 InstIcmp::ICond Condition = Inst->getCondition();
2621 size_t Index = static_cast<size_t>(Condition);
2622 assert(Index < Traits::TableIcmp64Size);
2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2627 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2628 Constant *One = Ctx->getConstantInt32(1);
John Porto921856d2015-07-07 11:56:26 -07002629 typename Traits::Insts::Label *LabelFalse =
2630 Traits::Insts::Label::create(Func, this);
2631 typename Traits::Insts::Label *LabelTrue =
2632 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07002633 _mov(Dest, One);
2634 _cmp(Src0HiRM, Src1HiRI);
John Porto5d0acff2015-06-30 15:29:21 -07002635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
John Porto7e93c622015-06-23 10:58:57 -07002636 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
John Porto5d0acff2015-06-30 15:29:21 -07002637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
John Porto7e93c622015-06-23 10:58:57 -07002638 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2639 _cmp(Src0LoRM, Src1LoRI);
2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2641 Context.insert(LabelFalse);
2642 _mov_nonkillable(Dest, Zero);
2643 Context.insert(LabelTrue);
2644 return;
2645 }
2646
2647 // cmp b, c
2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2649 _cmp(Src0RM, Src1);
2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
2651}
2652
2653template <class Machine>
2654void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2655 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2657 ConstantInteger32 *ElementIndex =
2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2659 // Only constant indices are allowed in PNaCl IR.
2660 assert(ElementIndex);
2661 unsigned Index = ElementIndex->getValue();
2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2663
2664 Type Ty = SourceVectNotLegalized->getType();
2665 Type ElementTy = typeElementType(Ty);
2666 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2667
2668 if (ElementTy == IceType_i1) {
2669 // Expand the element to the appropriate size for it to be inserted
2670 // in the vector.
John Porto5aeed952015-07-21 13:39:09 -07002671 Variable *Expanded = Func->makeVariable(InVectorElementTy);
John Porto7e93c622015-06-23 10:58:57 -07002672 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2673 ElementToInsertNotLegalized);
2674 lowerCast(Cast);
2675 ElementToInsertNotLegalized = Expanded;
2676 }
2677
2678 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07002679 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07002680 // Use insertps, pinsrb, pinsrw, or pinsrd.
2681 Operand *ElementRM =
2682 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2683 Operand *SourceVectRM =
2684 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2685 Variable *T = makeReg(Ty);
2686 _movp(T, SourceVectRM);
2687 if (Ty == IceType_v4f32)
2688 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
2689 else
2690 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
2691 _movp(Inst->getDest(), T);
2692 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2693 // Use shufps or movss.
2694 Variable *ElementR = nullptr;
2695 Operand *SourceVectRM =
2696 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2697
2698 if (InVectorElementTy == IceType_f32) {
2699 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07002700 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002701 } else {
2702 // Copy an integer to an XMM register.
2703 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2704 ElementR = makeReg(Ty);
2705 _movd(ElementR, T);
2706 }
2707
2708 if (Index == 0) {
2709 Variable *T = makeReg(Ty);
2710 _movp(T, SourceVectRM);
2711 _movss(T, ElementR);
2712 _movp(Inst->getDest(), T);
2713 return;
2714 }
2715
2716 // shufps treats the source and desination operands as vectors of
2717 // four doublewords. The destination's two high doublewords are
2718 // selected from the source operand and the two low doublewords are
2719 // selected from the (original value of) the destination operand.
2720 // An insertelement operation can be effected with a sequence of two
2721 // shufps operations with appropriate masks. In all cases below,
2722 // Element[0] is being inserted into SourceVectOperand. Indices are
2723 // ordered from left to right.
2724 //
2725 // insertelement into index 1 (result is stored in ElementR):
2726 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2727 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
2728 //
2729 // insertelement into index 2 (result is stored in T):
2730 // T := SourceVectRM
2731 // ElementR := ElementR[0, 0] T[0, 3]
2732 // T := T[0, 1] ElementR[0, 3]
2733 //
2734 // insertelement into index 3 (result is stored in T):
2735 // T := SourceVectRM
2736 // ElementR := ElementR[0, 0] T[0, 2]
2737 // T := T[0, 1] ElementR[3, 0]
2738 const unsigned char Mask1[3] = {0, 192, 128};
2739 const unsigned char Mask2[3] = {227, 196, 52};
2740
2741 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2742 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
2743
2744 if (Index == 1) {
2745 _shufps(ElementR, SourceVectRM, Mask1Constant);
2746 _shufps(ElementR, SourceVectRM, Mask2Constant);
2747 _movp(Inst->getDest(), ElementR);
2748 } else {
2749 Variable *T = makeReg(Ty);
2750 _movp(T, SourceVectRM);
2751 _shufps(ElementR, T, Mask1Constant);
2752 _shufps(T, ElementR, Mask2Constant);
2753 _movp(Inst->getDest(), T);
2754 }
2755 } else {
2756 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2757 // Spill the value to a stack slot and perform the insertion in
2758 // memory.
2759 //
2760 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2761 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002762 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002763 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07002764 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002765
2766 // Compute the location of the position to insert in memory.
2767 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002768 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002769 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07002770 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07002771
2772 Variable *T = makeReg(Ty);
2773 _movp(T, Slot);
2774 _movp(Inst->getDest(), T);
2775 }
2776}
2777
2778template <class Machine>
2779void TargetX86Base<Machine>::lowerIntrinsicCall(
2780 const InstIntrinsicCall *Instr) {
2781 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
2782 case Intrinsics::AtomicCmpxchg: {
2783 if (!Intrinsics::isMemoryOrderValid(
2784 ID, getConstantMemoryOrder(Instr->getArg(3)),
2785 getConstantMemoryOrder(Instr->getArg(4)))) {
2786 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2787 return;
2788 }
2789 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07002790 Operand *PtrToMem = legalize(Instr->getArg(0));
2791 Operand *Expected = legalize(Instr->getArg(1));
2792 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07002793 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2794 return;
2795 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2796 return;
2797 }
2798 case Intrinsics::AtomicFence:
2799 if (!Intrinsics::isMemoryOrderValid(
2800 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
2801 Func->setError("Unexpected memory ordering for AtomicFence");
2802 return;
2803 }
2804 _mfence();
2805 return;
2806 case Intrinsics::AtomicFenceAll:
2807 // NOTE: FenceAll should prevent and load/store from being moved
2808 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2809 // instruction is currently marked coarsely as "HasSideEffects".
2810 _mfence();
2811 return;
2812 case Intrinsics::AtomicIsLockFree: {
2813 // X86 is always lock free for 8/16/32/64 bit accesses.
2814 // TODO(jvoung): Since the result is constant when given a constant
2815 // byte size, this opens up DCE opportunities.
2816 Operand *ByteSize = Instr->getArg(0);
2817 Variable *Dest = Instr->getDest();
2818 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
2819 Constant *Result;
2820 switch (CI->getValue()) {
2821 default:
2822 // Some x86-64 processors support the cmpxchg16b intruction, which
2823 // can make 16-byte operations lock free (when used with the LOCK
2824 // prefix). However, that's not supported in 32-bit mode, so just
2825 // return 0 even for large sizes.
2826 Result = Ctx->getConstantZero(IceType_i32);
2827 break;
2828 case 1:
2829 case 2:
2830 case 4:
2831 case 8:
2832 Result = Ctx->getConstantInt32(1);
2833 break;
2834 }
2835 _mov(Dest, Result);
2836 return;
2837 }
2838 // The PNaCl ABI requires the byte size to be a compile-time constant.
2839 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2840 return;
2841 }
2842 case Intrinsics::AtomicLoad: {
2843 // We require the memory address to be naturally aligned.
2844 // Given that is the case, then normal loads are atomic.
2845 if (!Intrinsics::isMemoryOrderValid(
2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2847 Func->setError("Unexpected memory ordering for AtomicLoad");
2848 return;
2849 }
2850 Variable *Dest = Instr->getDest();
2851 if (Dest->getType() == IceType_i64) {
2852 // Follow what GCC does and use a movq instead of what lowerLoad()
2853 // normally does (split the load into two).
2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2855 // can't happen anyway, since this is x86-32 and integer arithmetic only
2856 // happens on 32-bit quantities.
2857 Variable *T = makeReg(IceType_f64);
John Porto921856d2015-07-07 11:56:26 -07002858 typename Traits::X86OperandMem *Addr =
2859 formMemoryOperand(Instr->getArg(0), IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002860 _movq(T, Addr);
2861 // Then cast the bits back out of the XMM register to the i64 Dest.
2862 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2863 lowerCast(Cast);
2864 // Make sure that the atomic load isn't elided when unused.
2865 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2866 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2867 return;
2868 }
2869 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2870 lowerLoad(Load);
2871 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2872 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2873 // insert the FakeUse on the last-inserted instruction's dest.
2874 Context.insert(
2875 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2876 return;
2877 }
2878 case Intrinsics::AtomicRMW:
2879 if (!Intrinsics::isMemoryOrderValid(
2880 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
2881 Func->setError("Unexpected memory ordering for AtomicRMW");
2882 return;
2883 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07002884 lowerAtomicRMW(
2885 Instr->getDest(),
2886 static_cast<uint32_t>(
2887 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
2888 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07002889 return;
2890 case Intrinsics::AtomicStore: {
2891 if (!Intrinsics::isMemoryOrderValid(
2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2893 Func->setError("Unexpected memory ordering for AtomicStore");
2894 return;
2895 }
2896 // We require the memory address to be naturally aligned.
2897 // Given that is the case, then normal stores are atomic.
2898 // Add a fence after the store to make it visible.
2899 Operand *Value = Instr->getArg(0);
2900 Operand *Ptr = Instr->getArg(1);
2901 if (Value->getType() == IceType_i64) {
2902 // Use a movq instead of what lowerStore() normally does
2903 // (split the store into two), following what GCC does.
2904 // Cast the bits from int -> to an xmm register first.
2905 Variable *T = makeReg(IceType_f64);
2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2907 lowerCast(Cast);
2908 // Then store XMM w/ a movq.
John Porto921856d2015-07-07 11:56:26 -07002909 typename Traits::X86OperandMem *Addr =
2910 formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002911 _storeq(T, Addr);
2912 _mfence();
2913 return;
2914 }
2915 InstStore *Store = InstStore::create(Func, Value, Ptr);
2916 lowerStore(Store);
2917 _mfence();
2918 return;
2919 }
2920 case Intrinsics::Bswap: {
2921 Variable *Dest = Instr->getDest();
2922 Operand *Val = Instr->getArg(0);
2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2924 // argument must be a register. Use rotate left for 16-bit bswap.
2925 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002926 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002927 Variable *T_Lo = legalizeToReg(loOperand(Val));
2928 Variable *T_Hi = legalizeToReg(hiOperand(Val));
John Porto7e93c622015-06-23 10:58:57 -07002929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2931 _bswap(T_Lo);
2932 _bswap(T_Hi);
2933 _mov(DestLo, T_Hi);
2934 _mov(DestHi, T_Lo);
2935 } else if (Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002936 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07002937 _bswap(T);
2938 _mov(Dest, T);
2939 } else {
2940 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07002941 Constant *Eight = Ctx->getConstantInt16(8);
2942 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07002943 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07002944 _mov(T, Val);
2945 _rol(T, Eight);
2946 _mov(Dest, T);
2947 }
2948 return;
2949 }
2950 case Intrinsics::Ctpop: {
2951 Variable *Dest = Instr->getDest();
2952 Operand *Val = Instr->getArg(0);
2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2954 ? H_call_ctpop_i32
2955 : H_call_ctpop_i64,
2956 Dest, 1);
2957 Call->addArg(Val);
2958 lowerCall(Call);
2959 // The popcount helpers always return 32-bit values, while the intrinsic's
2960 // signature matches the native POPCNT instruction and fills a 64-bit reg
2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2962 // the user doesn't do that in the IR. If the user does that in the IR,
2963 // then this zero'ing instruction is dead and gets optimized out.
2964 if (Val->getType() == IceType_i64) {
2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2966 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2967 _mov(DestHi, Zero);
2968 }
2969 return;
2970 }
2971 case Intrinsics::Ctlz: {
2972 // The "is zero undef" parameter is ignored and we always return
2973 // a well-defined value.
2974 Operand *Val = legalize(Instr->getArg(0));
2975 Operand *FirstVal;
2976 Operand *SecondVal = nullptr;
2977 if (Val->getType() == IceType_i64) {
2978 FirstVal = loOperand(Val);
2979 SecondVal = hiOperand(Val);
2980 } else {
2981 FirstVal = Val;
2982 }
2983 const bool IsCttz = false;
2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2985 SecondVal);
2986 return;
2987 }
2988 case Intrinsics::Cttz: {
2989 // The "is zero undef" parameter is ignored and we always return
2990 // a well-defined value.
2991 Operand *Val = legalize(Instr->getArg(0));
2992 Operand *FirstVal;
2993 Operand *SecondVal = nullptr;
2994 if (Val->getType() == IceType_i64) {
2995 FirstVal = hiOperand(Val);
2996 SecondVal = loOperand(Val);
2997 } else {
2998 FirstVal = Val;
2999 }
3000 const bool IsCttz = true;
3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3002 SecondVal);
3003 return;
3004 }
3005 case Intrinsics::Fabs: {
3006 Operand *Src = legalize(Instr->getArg(0));
3007 Type Ty = Src->getType();
3008 Variable *Dest = Instr->getDest();
3009 Variable *T = makeVectorOfFabsMask(Ty);
3010 // The pand instruction operates on an m128 memory operand, so if
3011 // Src is an f32 or f64, we need to make sure it's in a register.
3012 if (isVectorType(Ty)) {
John Porto921856d2015-07-07 11:56:26 -07003013 if (llvm::isa<typename Traits::X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07003014 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003015 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003016 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003017 }
3018 _pand(T, Src);
3019 if (isVectorType(Ty))
3020 _movp(Dest, T);
3021 else
3022 _mov(Dest, T);
3023 return;
3024 }
3025 case Intrinsics::Longjmp: {
3026 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
3027 Call->addArg(Instr->getArg(0));
3028 Call->addArg(Instr->getArg(1));
3029 lowerCall(Call);
3030 return;
3031 }
3032 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07003033 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003034 return;
3035 }
3036 case Intrinsics::Memmove: {
3037 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
3038 Call->addArg(Instr->getArg(0));
3039 Call->addArg(Instr->getArg(1));
3040 Call->addArg(Instr->getArg(2));
3041 lowerCall(Call);
3042 return;
3043 }
3044 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07003045 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003046 return;
3047 }
3048 case Intrinsics::NaClReadTP: {
3049 if (Ctx->getFlags().getUseSandboxing()) {
John Porto5aeed952015-07-21 13:39:09 -07003050 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07003051 Variable *Dest = Instr->getDest();
3052 Variable *T = nullptr;
3053 _mov(T, Src);
3054 _mov(Dest, T);
3055 } else {
3056 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3057 lowerCall(Call);
3058 }
3059 return;
3060 }
3061 case Intrinsics::Setjmp: {
3062 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
3063 Call->addArg(Instr->getArg(0));
3064 lowerCall(Call);
3065 return;
3066 }
3067 case Intrinsics::Sqrt: {
3068 Operand *Src = legalize(Instr->getArg(0));
3069 Variable *Dest = Instr->getDest();
3070 Variable *T = makeReg(Dest->getType());
3071 _sqrtss(T, Src);
3072 _mov(Dest, T);
3073 return;
3074 }
3075 case Intrinsics::Stacksave: {
John Porto5d0acff2015-06-30 15:29:21 -07003076 Variable *esp =
3077 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003078 Variable *Dest = Instr->getDest();
3079 _mov(Dest, esp);
3080 return;
3081 }
3082 case Intrinsics::Stackrestore: {
John Porto5d0acff2015-06-30 15:29:21 -07003083 Variable *esp =
3084 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003085 _mov_nonkillable(esp, Instr->getArg(0));
3086 return;
3087 }
3088 case Intrinsics::Trap:
3089 _ud2();
3090 return;
3091 case Intrinsics::UnknownIntrinsic:
3092 Func->setError("Should not be lowering UnknownIntrinsic");
3093 return;
3094 }
3095 return;
3096}
3097
3098template <class Machine>
3099void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3100 Operand *Ptr, Operand *Expected,
3101 Operand *Desired) {
3102 if (Expected->getType() == IceType_i64) {
3103 // Reserve the pre-colored registers first, before adding any more
3104 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07003105 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3106 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3107 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3108 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07003109 _mov(T_eax, loOperand(Expected));
3110 _mov(T_edx, hiOperand(Expected));
3111 _mov(T_ebx, loOperand(Desired));
3112 _mov(T_ecx, hiOperand(Desired));
John Porto921856d2015-07-07 11:56:26 -07003113 typename Traits::X86OperandMem *Addr =
3114 formMemoryOperand(Ptr, Expected->getType());
John Porto7e93c622015-06-23 10:58:57 -07003115 const bool Locked = true;
3116 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3117 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3118 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3119 _mov(DestLo, T_eax);
3120 _mov(DestHi, T_edx);
3121 return;
3122 }
John Porto5d0acff2015-06-30 15:29:21 -07003123 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003124 _mov(T_eax, Expected);
John Porto921856d2015-07-07 11:56:26 -07003125 typename Traits::X86OperandMem *Addr =
3126 formMemoryOperand(Ptr, Expected->getType());
Andrew Scull97f460d2015-07-21 10:07:42 -07003127 Variable *DesiredReg = legalizeToReg(Desired);
John Porto7e93c622015-06-23 10:58:57 -07003128 const bool Locked = true;
3129 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3130 _mov(DestPrev, T_eax);
3131}
3132
3133template <class Machine>
3134bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3135 Operand *PtrToMem,
3136 Operand *Expected,
3137 Operand *Desired) {
3138 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3139 return false;
3140 // Peek ahead a few instructions and see how Dest is used.
3141 // It's very common to have:
3142 //
3143 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3144 // [%y_phi = ...] // list of phi stores
3145 // %p = icmp eq i32 %x, %expected
3146 // br i1 %p, label %l1, label %l2
3147 //
3148 // which we can optimize into:
3149 //
3150 // %x = <cmpxchg code>
3151 // [%y_phi = ...] // list of phi stores
3152 // br eq, %l1, %l2
3153 InstList::iterator I = Context.getCur();
3154 // I is currently the InstIntrinsicCall. Peek past that.
3155 // This assumes that the atomic cmpxchg has not been lowered yet,
3156 // so that the instructions seen in the scan from "Cur" is simple.
3157 assert(llvm::isa<InstIntrinsicCall>(*I));
3158 Inst *NextInst = Context.getNextInst(I);
3159 if (!NextInst)
3160 return false;
3161 // There might be phi assignments right before the compare+branch, since this
3162 // could be a backward branch for a loop. This placement of assignments is
3163 // determined by placePhiStores().
3164 std::vector<InstAssign *> PhiAssigns;
3165 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3166 if (PhiAssign->getDest() == Dest)
3167 return false;
3168 PhiAssigns.push_back(PhiAssign);
3169 NextInst = Context.getNextInst(I);
3170 if (!NextInst)
3171 return false;
3172 }
3173 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3174 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3175 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3176 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3177 return false;
3178 }
3179 NextInst = Context.getNextInst(I);
3180 if (!NextInst)
3181 return false;
3182 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3183 if (!NextBr->isUnconditional() &&
3184 NextCmp->getDest() == NextBr->getCondition() &&
3185 NextBr->isLastUse(NextCmp->getDest())) {
3186 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3187 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3188 // Lower the phi assignments now, before the branch (same placement
3189 // as before).
3190 InstAssign *PhiAssign = PhiAssigns[i];
3191 PhiAssign->setDeleted();
3192 lowerAssign(PhiAssign);
3193 Context.advanceNext();
3194 }
John Porto5d0acff2015-06-30 15:29:21 -07003195 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
3196 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07003197 // Skip over the old compare and branch, by deleting them.
3198 NextCmp->setDeleted();
3199 NextBr->setDeleted();
3200 Context.advanceNext();
3201 Context.advanceNext();
3202 return true;
3203 }
3204 }
3205 }
3206 return false;
3207}
3208
3209template <class Machine>
3210void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3211 Operand *Ptr, Operand *Val) {
3212 bool NeedsCmpxchg = false;
3213 LowerBinOp Op_Lo = nullptr;
3214 LowerBinOp Op_Hi = nullptr;
3215 switch (Operation) {
3216 default:
3217 Func->setError("Unknown AtomicRMW operation");
3218 return;
3219 case Intrinsics::AtomicAdd: {
3220 if (Dest->getType() == IceType_i64) {
3221 // All the fall-through paths must set this to true, but use this
3222 // for asserting.
3223 NeedsCmpxchg = true;
3224 Op_Lo = &TargetX86Base<Machine>::_add;
3225 Op_Hi = &TargetX86Base<Machine>::_adc;
3226 break;
3227 }
John Porto921856d2015-07-07 11:56:26 -07003228 typename Traits::X86OperandMem *Addr =
3229 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003230 const bool Locked = true;
3231 Variable *T = nullptr;
3232 _mov(T, Val);
3233 _xadd(Addr, T, Locked);
3234 _mov(Dest, T);
3235 return;
3236 }
3237 case Intrinsics::AtomicSub: {
3238 if (Dest->getType() == IceType_i64) {
3239 NeedsCmpxchg = true;
3240 Op_Lo = &TargetX86Base<Machine>::_sub;
3241 Op_Hi = &TargetX86Base<Machine>::_sbb;
3242 break;
3243 }
John Porto921856d2015-07-07 11:56:26 -07003244 typename Traits::X86OperandMem *Addr =
3245 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003246 const bool Locked = true;
3247 Variable *T = nullptr;
3248 _mov(T, Val);
3249 _neg(T);
3250 _xadd(Addr, T, Locked);
3251 _mov(Dest, T);
3252 return;
3253 }
3254 case Intrinsics::AtomicOr:
3255 // TODO(jvoung): If Dest is null or dead, then some of these
3256 // operations do not need an "exchange", but just a locked op.
3257 // That appears to be "worth" it for sub, or, and, and xor.
3258 // xadd is probably fine vs lock add for add, and xchg is fine
3259 // vs an atomic store.
3260 NeedsCmpxchg = true;
3261 Op_Lo = &TargetX86Base<Machine>::_or;
3262 Op_Hi = &TargetX86Base<Machine>::_or;
3263 break;
3264 case Intrinsics::AtomicAnd:
3265 NeedsCmpxchg = true;
3266 Op_Lo = &TargetX86Base<Machine>::_and;
3267 Op_Hi = &TargetX86Base<Machine>::_and;
3268 break;
3269 case Intrinsics::AtomicXor:
3270 NeedsCmpxchg = true;
3271 Op_Lo = &TargetX86Base<Machine>::_xor;
3272 Op_Hi = &TargetX86Base<Machine>::_xor;
3273 break;
3274 case Intrinsics::AtomicExchange:
3275 if (Dest->getType() == IceType_i64) {
3276 NeedsCmpxchg = true;
3277 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3278 // just need to be moved to the ecx and ebx registers.
3279 Op_Lo = nullptr;
3280 Op_Hi = nullptr;
3281 break;
3282 }
John Porto921856d2015-07-07 11:56:26 -07003283 typename Traits::X86OperandMem *Addr =
3284 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003285 Variable *T = nullptr;
3286 _mov(T, Val);
3287 _xchg(Addr, T);
3288 _mov(Dest, T);
3289 return;
3290 }
3291 // Otherwise, we need a cmpxchg loop.
3292 (void)NeedsCmpxchg;
3293 assert(NeedsCmpxchg);
3294 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3295}
3296
3297template <class Machine>
3298void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3299 LowerBinOp Op_Hi,
3300 Variable *Dest,
3301 Operand *Ptr,
3302 Operand *Val) {
3303 // Expand a more complex RMW operation as a cmpxchg loop:
3304 // For 64-bit:
3305 // mov eax, [ptr]
3306 // mov edx, [ptr + 4]
3307 // .LABEL:
3308 // mov ebx, eax
3309 // <Op_Lo> ebx, <desired_adj_lo>
3310 // mov ecx, edx
3311 // <Op_Hi> ecx, <desired_adj_hi>
3312 // lock cmpxchg8b [ptr]
3313 // jne .LABEL
3314 // mov <dest_lo>, eax
3315 // mov <dest_lo>, edx
3316 //
3317 // For 32-bit:
3318 // mov eax, [ptr]
3319 // .LABEL:
3320 // mov <reg>, eax
3321 // op <reg>, [desired_adj]
3322 // lock cmpxchg [ptr], <reg>
3323 // jne .LABEL
3324 // mov <dest>, eax
3325 //
3326 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3327 Val = legalize(Val);
3328 Type Ty = Val->getType();
3329 if (Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07003330 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3331 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto921856d2015-07-07 11:56:26 -07003332 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07003333 _mov(T_eax, loOperand(Addr));
3334 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07003335 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3336 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto921856d2015-07-07 11:56:26 -07003337 typename Traits::Insts::Label *Label =
3338 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003339 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3340 if (!IsXchg8b) {
3341 Context.insert(Label);
3342 _mov(T_ebx, T_eax);
3343 (this->*Op_Lo)(T_ebx, loOperand(Val));
3344 _mov(T_ecx, T_edx);
3345 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3346 } else {
3347 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3348 // It just needs the Val loaded into ebx and ecx.
3349 // That can also be done before the loop.
3350 _mov(T_ebx, loOperand(Val));
3351 _mov(T_ecx, hiOperand(Val));
3352 Context.insert(Label);
3353 }
3354 const bool Locked = true;
3355 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003356 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003357 if (!IsXchg8b) {
3358 // If Val is a variable, model the extended live range of Val through
3359 // the end of the loop, since it will be re-used by the loop.
3360 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3361 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3362 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3363 Context.insert(InstFakeUse::create(Func, ValLo));
3364 Context.insert(InstFakeUse::create(Func, ValHi));
3365 }
3366 } else {
3367 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3368 Context.insert(InstFakeUse::create(Func, T_ebx));
3369 Context.insert(InstFakeUse::create(Func, T_ecx));
3370 }
3371 // The address base (if any) is also reused in the loop.
3372 if (Variable *Base = Addr->getBase())
3373 Context.insert(InstFakeUse::create(Func, Base));
3374 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3375 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3376 _mov(DestLo, T_eax);
3377 _mov(DestHi, T_edx);
3378 return;
3379 }
John Porto921856d2015-07-07 11:56:26 -07003380 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto5d0acff2015-06-30 15:29:21 -07003381 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003382 _mov(T_eax, Addr);
John Porto921856d2015-07-07 11:56:26 -07003383 typename Traits::Insts::Label *Label =
3384 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003385 Context.insert(Label);
3386 // We want to pick a different register for T than Eax, so don't use
3387 // _mov(T == nullptr, T_eax).
3388 Variable *T = makeReg(Ty);
3389 _mov(T, T_eax);
3390 (this->*Op_Lo)(T, Val);
3391 const bool Locked = true;
3392 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003393 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003394 // If Val is a variable, model the extended live range of Val through
3395 // the end of the loop, since it will be re-used by the loop.
3396 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3397 Context.insert(InstFakeUse::create(Func, ValVar));
3398 }
3399 // The address base (if any) is also reused in the loop.
3400 if (Variable *Base = Addr->getBase())
3401 Context.insert(InstFakeUse::create(Func, Base));
3402 _mov(Dest, T_eax);
3403}
3404
Andrew Scull9612d322015-07-06 14:53:25 -07003405/// Lowers count {trailing, leading} zeros intrinsic.
3406///
3407/// We could do constant folding here, but that should have
3408/// been done by the front-end/middle-end optimizations.
John Porto7e93c622015-06-23 10:58:57 -07003409template <class Machine>
3410void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3411 Operand *FirstVal,
3412 Operand *SecondVal) {
3413 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3414 // Then the instructions will handle the Val == 0 case much more simply
3415 // and won't require conversion from bit position to number of zeros.
3416 //
3417 // Otherwise:
3418 // bsr IF_NOT_ZERO, Val
3419 // mov T_DEST, 63
3420 // cmovne T_DEST, IF_NOT_ZERO
3421 // xor T_DEST, 31
3422 // mov DEST, T_DEST
3423 //
3424 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3425 // register. Also, bsf and bsr require their dest to be a register.
3426 //
3427 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3428 // E.g., for 000... 00001100, bsr will say that the most significant bit
3429 // set is at position 3, while the number of leading zeros is 28. Xor is
3430 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3431 //
3432 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3433 // are all zero, and compute the result for that case (checking the lower
3434 // 32 bits). Then actually compute the result for the upper bits and
3435 // cmov in the result from the lower computation if the earlier speculation
3436 // was correct.
3437 //
3438 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3439 // bit position conversion, and the speculation is reversed.
3440 assert(Ty == IceType_i32 || Ty == IceType_i64);
3441 Variable *T = makeReg(IceType_i32);
3442 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3443 if (Cttz) {
3444 _bsf(T, FirstValRM);
3445 } else {
3446 _bsr(T, FirstValRM);
3447 }
3448 Variable *T_Dest = makeReg(IceType_i32);
3449 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3450 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3451 if (Cttz) {
3452 _mov(T_Dest, ThirtyTwo);
3453 } else {
3454 Constant *SixtyThree = Ctx->getConstantInt32(63);
3455 _mov(T_Dest, SixtyThree);
3456 }
John Porto5d0acff2015-06-30 15:29:21 -07003457 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07003458 if (!Cttz) {
3459 _xor(T_Dest, ThirtyOne);
3460 }
3461 if (Ty == IceType_i32) {
3462 _mov(Dest, T_Dest);
3463 return;
3464 }
3465 _add(T_Dest, ThirtyTwo);
3466 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3467 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3468 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07003469 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07003470 Variable *T_Dest2 = makeReg(IceType_i32);
3471 if (Cttz) {
3472 _bsf(T_Dest2, SecondVar);
3473 } else {
3474 _bsr(T_Dest2, SecondVar);
3475 _xor(T_Dest2, ThirtyOne);
3476 }
3477 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07003478 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07003479 _mov(DestLo, T_Dest2);
3480 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3481}
3482
Andrew Scull86df4e92015-07-30 13:54:44 -07003483template <class Machine>
Andrew Scull9df4a372015-08-07 09:19:35 -07003484void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src,
3485 Operand *Count) {
3486 // There is a load and store for each chunk in the unroll
3487 constexpr uint32_t UNROLL_LIMIT = 8;
3488 constexpr uint32_t BytesPerStorep = 16;
3489 constexpr uint32_t BytesPerStoreq = 8;
3490 constexpr uint32_t BytesPerStorei32 = 4;
3491 constexpr uint32_t BytesPerStorei16 = 2;
3492 constexpr uint32_t BytesPerStorei8 = 1;
3493
3494 // Check if the operands are constants
3495 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3496 const bool IsCountConst = CountConst != nullptr;
3497 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3498
3499 if (IsCountConst && CountValue <= BytesPerStorep * UNROLL_LIMIT) {
3500 // Unlikely, but nothing to do if it does happen
3501 if (CountValue == 0)
3502 return;
3503
3504 Variable *SrcBase = legalizeToReg(Src);
3505 Variable *DestBase = legalizeToReg(Dest);
3506
3507 auto lowerCopy = [this, DestBase, SrcBase](Type Ty, uint32_t OffsetAmt) {
3508 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
3509 // TODO(ascull): this or add nullptr test to _movp, _movq
3510 Variable *Data = makeReg(Ty);
3511
3512 // TODO(ascull): is 64-bit better with vector or scalar movq?
3513 auto *SrcMem = Traits::X86OperandMem::create(Func, Ty, SrcBase, Offset);
3514 if (isVectorType(Ty))
3515 _movp(Data, SrcMem);
3516 else if (Ty == IceType_f64)
3517 _movq(Data, SrcMem);
3518 else
3519 _mov(Data, SrcMem);
3520
3521 auto *DestMem = Traits::X86OperandMem::create(Func, Ty, DestBase, Offset);
3522 if (isVectorType(Ty))
3523 _storep(Data, DestMem);
3524 else if (Ty == IceType_f64)
3525 _storeq(Data, DestMem);
3526 else
3527 _store(Data, DestMem);
3528 };
3529
3530 // Lowers the assignment to the remaining bytes. Assumes the original size
3531 // was large enough to allow for overlaps.
3532 auto lowerLeftOvers = [this, lowerCopy, CountValue](uint32_t Size) {
3533 if (Size > BytesPerStoreq) {
3534 lowerCopy(IceType_v16i8, CountValue - BytesPerStorep);
3535 } else if (Size > BytesPerStorei32) {
3536 lowerCopy(IceType_f64, CountValue - BytesPerStoreq);
3537 } else if (Size > BytesPerStorei16) {
3538 lowerCopy(IceType_i32, CountValue - BytesPerStorei32);
3539 } else if (Size > BytesPerStorei8) {
3540 lowerCopy(IceType_i16, CountValue - BytesPerStorei16);
3541 } else if (Size == BytesPerStorei8) {
3542 lowerCopy(IceType_i8, CountValue - BytesPerStorei8);
3543 }
3544 };
3545
3546 if (CountValue >= BytesPerStorep) {
3547 // Use large vector operations
3548 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) {
3549 N -= BytesPerStorep;
3550 lowerCopy(IceType_v16i8, N);
3551 }
3552 lowerLeftOvers(CountValue & 0xF);
3553 return;
3554 }
3555
3556 // Too small to use large vector operations so use small ones instead
3557 if (CountValue >= BytesPerStoreq) {
3558 lowerCopy(IceType_f64, 0);
3559 lowerLeftOvers(CountValue - BytesPerStoreq);
3560 return;
3561 }
3562
3563 // Too small for vector operations so use scalar ones
3564 if (CountValue >= BytesPerStorei32) {
3565 lowerCopy(IceType_i32, 0);
3566 lowerLeftOvers(CountValue - BytesPerStorei32);
3567 return;
3568 }
3569
3570 // 3 is the awkward size as it is too small for the vector or 32-bit
3571 // operations and will not work with lowerLeftOvers as there is no valid
3572 // overlap.
3573 if (CountValue == 3) {
3574 lowerCopy(IceType_i16, 0);
3575 lowerCopy(IceType_i8, 2);
3576 return;
3577 }
3578
3579 // 1 or 2 can be done in a single scalar copy
3580 lowerLeftOvers(CountValue);
3581 return;
3582 }
3583
3584 // Fall back on a function call
3585 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
3586 Call->addArg(Dest);
3587 Call->addArg(Src);
3588 Call->addArg(Count);
3589 lowerCall(Call);
3590}
3591
3592template <class Machine>
Andrew Scull713dbde2015-08-04 14:25:27 -07003593void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val,
3594 Operand *Count) {
3595 constexpr uint32_t UNROLL_LIMIT = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07003596 constexpr uint32_t BytesPerStorep = 16;
3597 constexpr uint32_t BytesPerStoreq = 8;
3598 constexpr uint32_t BytesPerStorei32 = 4;
3599 constexpr uint32_t BytesPerStorei16 = 2;
3600 constexpr uint32_t BytesPerStorei8 = 1;
Andrew Scull713dbde2015-08-04 14:25:27 -07003601 assert(Val->getType() == IceType_i8);
3602
3603 // Check if the operands are constants
3604 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3605 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
3606 const bool IsCountConst = CountConst != nullptr;
3607 const bool IsValConst = ValConst != nullptr;
3608 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3609 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
3610
3611 // Unlikely, but nothing to do if it does happen
3612 if (IsCountConst && CountValue == 0)
3613 return;
3614
3615 // TODO(ascull): if the count is constant but val is not it would be possible
3616 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
3617 // eax, ax and al.
3618 if (IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003619 Variable *Base = nullptr;
3620 const uint32_t SpreadValue =
3621 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
3622 Variable *VecReg = nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07003623
Andrew Scull9df4a372015-08-07 09:19:35 -07003624 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07003625 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003626 assert(Base != nullptr);
3627 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07003628
Andrew Scull9df4a372015-08-07 09:19:35 -07003629 // TODO(ascull): is 64-bit better with vector or scalar movq?
3630 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
3631 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07003632 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07003633 _storep(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07003634 } else if (Ty == IceType_i64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07003635 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07003636 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07003637 } else {
3638 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07003639 }
3640 };
3641
Andrew Scull9df4a372015-08-07 09:19:35 -07003642 // Lowers the assignment to the remaining bytes. Assumes the original size
3643 // was large enough to allow for overlaps.
3644 auto lowerLeftOvers = [this, lowerSet, CountValue](uint32_t Size) {
3645 if (Size > BytesPerStoreq) {
3646 lowerSet(IceType_v16i8, CountValue - BytesPerStorep);
3647 } else if (Size > BytesPerStorei32) {
3648 lowerSet(IceType_i64, CountValue - BytesPerStoreq);
3649 } else if (Size > BytesPerStorei16) {
3650 lowerSet(IceType_i32, CountValue - BytesPerStorei32);
3651 } else if (Size > BytesPerStorei8) {
3652 lowerSet(IceType_i16, CountValue - BytesPerStorei16);
3653 } else if (Size == BytesPerStorei8) {
3654 lowerSet(IceType_i8, CountValue - BytesPerStorei8);
3655 }
3656 };
3657
3658 // When the value is zero it can be loaded into a vector register cheaply
3659 // using the xor trick.
3660 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scull713dbde2015-08-04 14:25:27 -07003661 CountValue <= BytesPerStorep * UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003662 Base = legalizeToReg(Dest);
3663 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scull713dbde2015-08-04 14:25:27 -07003664
3665 // Too small to use large vector operations so use small ones instead
Andrew Scull9df4a372015-08-07 09:19:35 -07003666 if (CountValue < BytesPerStorep) {
3667 lowerSet(IceType_i64, 0);
3668 lowerLeftOvers(CountValue - BytesPerStoreq);
Andrew Scull713dbde2015-08-04 14:25:27 -07003669 return;
3670 }
3671
Andrew Scull713dbde2015-08-04 14:25:27 -07003672 // Use large vector operations
3673 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) {
3674 N -= 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07003675 lowerSet(IceType_v16i8, N);
Andrew Scull713dbde2015-08-04 14:25:27 -07003676 }
Andrew Scull9df4a372015-08-07 09:19:35 -07003677 lowerLeftOvers(CountValue & 0xF);
Andrew Scull713dbde2015-08-04 14:25:27 -07003678 return;
3679 }
3680
3681 // TODO(ascull): load val into reg and select subregs e.g. eax, ax, al?
Andrew Scull9df4a372015-08-07 09:19:35 -07003682 if (CountValue <= BytesPerStorei32 * UNROLL_LIMIT) {
3683 Base = legalizeToReg(Dest);
3684 // 3 is the awkward size as it is too small for the vector or 32-bit
3685 // operations and will not work with lowerLeftOvers as there is no valid
3686 // overlap.
3687 if (CountValue == 3) {
3688 lowerSet(IceType_i16, 0);
3689 lowerSet(IceType_i8, 2);
3690 return;
Andrew Scull713dbde2015-08-04 14:25:27 -07003691 }
Andrew Scull9df4a372015-08-07 09:19:35 -07003692
3693 // TODO(ascull); 64-bit can do better with 64-bit mov
3694 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) {
3695 N -= 4;
3696 lowerSet(IceType_i32, N);
3697 }
3698 lowerLeftOvers(CountValue & 0x3);
Andrew Scull713dbde2015-08-04 14:25:27 -07003699 return;
3700 }
3701 }
3702
3703 // Fall back on calling the memset function. The value operand needs to be
3704 // extended to a stack slot size because the PNaCl ABI requires arguments to
3705 // be at least 32 bits wide.
3706 Operand *ValExt;
3707 if (IsValConst) {
3708 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
3709 } else {
3710 Variable *ValExtVar = Func->makeVariable(stackSlotType());
3711 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
3712 ValExt = ValExtVar;
3713 }
3714 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3715 Call->addArg(Dest);
3716 Call->addArg(ValExt);
3717 Call->addArg(Count);
3718 lowerCall(Call);
3719}
3720
3721template <class Machine>
Andrew Scull86df4e92015-07-30 13:54:44 -07003722void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) {
3723 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
3724 if (NeedSandboxing) {
3725 _bundle_lock();
3726 const SizeT BundleSize =
3727 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
3728 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1)));
3729 }
3730 _jmp(Target);
3731 if (NeedSandboxing)
3732 _bundle_unlock();
3733}
3734
John Porto5aeed952015-07-21 13:39:09 -07003735inline bool isAdd(const Inst *Inst) {
John Porto7e93c622015-06-23 10:58:57 -07003736 if (const InstArithmetic *Arith =
3737 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3738 return (Arith->getOp() == InstArithmetic::Add);
3739 }
3740 return false;
3741}
3742
John Porto5aeed952015-07-21 13:39:09 -07003743inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3744 const Variable *Index, uint16_t Shift,
3745 int32_t Offset, const Inst *Reason) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07003746 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07003747 return;
3748 if (!Func->isVerbose(IceV_AddrOpt))
3749 return;
3750 OstreamLocker L(Func->getContext());
3751 Ostream &Str = Func->getContext()->getStrDump();
3752 Str << "Instruction: ";
3753 Reason->dumpDecorated(Func);
3754 Str << " results in Base=";
3755 if (Base)
3756 Base->dump(Func);
3757 else
3758 Str << "<null>";
3759 Str << ", Index=";
3760 if (Index)
3761 Index->dump(Func);
3762 else
3763 Str << "<null>";
3764 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3765}
3766
John Porto5aeed952015-07-21 13:39:09 -07003767inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
3768 Variable *&Var, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003769 // Var originates from Var=SrcVar ==>
3770 // set Var:=SrcVar
3771 if (Var == nullptr)
3772 return false;
3773 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3774 assert(!VMetadata->isMultiDef(Var));
3775 if (llvm::isa<InstAssign>(VarAssign)) {
3776 Operand *SrcOp = VarAssign->getSrc(0);
3777 assert(SrcOp);
3778 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
3779 if (!VMetadata->isMultiDef(SrcVar) &&
3780 // TODO: ensure SrcVar stays single-BB
3781 true) {
3782 Var = SrcVar;
3783 Reason = VarAssign;
3784 return true;
3785 }
3786 }
3787 }
3788 }
3789 return false;
3790}
3791
John Porto5aeed952015-07-21 13:39:09 -07003792inline bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata,
3793 Variable *&Base, Variable *&Index,
3794 uint16_t &Shift, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003795 // Index==nullptr && Base is Base=Var1+Var2 ==>
3796 // set Base=Var1, Index=Var2, Shift=0
3797 if (Base == nullptr)
3798 return false;
3799 if (Index != nullptr)
3800 return false;
3801 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
3802 if (BaseInst == nullptr)
3803 return false;
3804 assert(!VMetadata->isMultiDef(Base));
3805 if (BaseInst->getSrcSize() < 2)
3806 return false;
3807 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
3808 if (VMetadata->isMultiDef(Var1))
3809 return false;
3810 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
3811 if (VMetadata->isMultiDef(Var2))
3812 return false;
3813 if (isAdd(BaseInst) &&
3814 // TODO: ensure Var1 and Var2 stay single-BB
3815 true) {
3816 Base = Var1;
3817 Index = Var2;
3818 Shift = 0; // should already have been 0
3819 Reason = BaseInst;
3820 return true;
3821 }
3822 }
3823 }
3824 return false;
3825}
3826
John Porto5aeed952015-07-21 13:39:09 -07003827inline bool matchShiftedIndex(const VariablesMetadata *VMetadata,
3828 Variable *&Index, uint16_t &Shift,
3829 const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003830 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3831 // Index=Var, Shift+=log2(Const)
3832 if (Index == nullptr)
3833 return false;
3834 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
3835 if (IndexInst == nullptr)
3836 return false;
3837 assert(!VMetadata->isMultiDef(Index));
3838 if (IndexInst->getSrcSize() < 2)
3839 return false;
3840 if (const InstArithmetic *ArithInst =
3841 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3842 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3843 if (ConstantInteger32 *Const =
3844 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
3845 if (ArithInst->getOp() == InstArithmetic::Mul &&
3846 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
3847 uint64_t Mult = Const->getValue();
3848 uint32_t LogMult;
3849 switch (Mult) {
3850 case 1:
3851 LogMult = 0;
3852 break;
3853 case 2:
3854 LogMult = 1;
3855 break;
3856 case 4:
3857 LogMult = 2;
3858 break;
3859 case 8:
3860 LogMult = 3;
3861 break;
3862 default:
3863 return false;
3864 }
3865 if (Shift + LogMult <= 3) {
3866 Index = Var;
3867 Shift += LogMult;
3868 Reason = IndexInst;
3869 return true;
3870 }
3871 }
3872 }
3873 }
3874 }
3875 return false;
3876}
3877
John Porto5aeed952015-07-21 13:39:09 -07003878inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3879 int32_t &Offset, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003880 // Base is Base=Var+Const || Base is Base=Const+Var ==>
3881 // set Base=Var, Offset+=Const
3882 // Base is Base=Var-Const ==>
3883 // set Base=Var, Offset-=Const
3884 if (Base == nullptr)
3885 return false;
3886 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
3887 if (BaseInst == nullptr)
3888 return false;
3889 assert(!VMetadata->isMultiDef(Base));
3890 if (const InstArithmetic *ArithInst =
3891 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3892 if (ArithInst->getOp() != InstArithmetic::Add &&
3893 ArithInst->getOp() != InstArithmetic::Sub)
3894 return false;
3895 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
3896 Variable *Var = nullptr;
3897 ConstantInteger32 *Const = nullptr;
3898 if (Variable *VariableOperand =
3899 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3900 Var = VariableOperand;
3901 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
3902 } else if (IsAdd) {
3903 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
3904 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3905 }
3906 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
3907 return false;
3908 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
3909 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
3910 return false;
3911 Base = Var;
3912 Offset += MoreOffset;
3913 Reason = BaseInst;
3914 return true;
3915 }
3916 return false;
3917}
3918
John Porto5aeed952015-07-21 13:39:09 -07003919inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3920 Variable *&Index, uint16_t &Shift,
3921 int32_t &Offset) {
John Porto7e93c622015-06-23 10:58:57 -07003922 Func->resetCurrentNode();
3923 if (Func->isVerbose(IceV_AddrOpt)) {
3924 OstreamLocker L(Func->getContext());
3925 Ostream &Str = Func->getContext()->getStrDump();
3926 Str << "\nStarting computeAddressOpt for instruction:\n ";
3927 Instr->dumpDecorated(Func);
3928 }
3929 (void)Offset; // TODO: pattern-match for non-zero offsets.
3930 if (Base == nullptr)
3931 return;
3932 // If the Base has more than one use or is live across multiple
3933 // blocks, then don't go further. Alternatively (?), never consider
3934 // a transformation that would change a variable that is currently
3935 // *not* live across basic block boundaries into one that *is*.
3936 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
3937 return;
3938
3939 const VariablesMetadata *VMetadata = Func->getVMetadata();
3940 bool Continue = true;
3941 while (Continue) {
3942 const Inst *Reason = nullptr;
3943 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
3944 matchTransitiveAssign(VMetadata, Index, Reason) ||
3945 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
3946 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
3947 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
3948 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
3949 } else {
3950 Continue = false;
3951 }
3952
3953 // Index is Index=Var<<Const && Const+Shift<=3 ==>
3954 // Index=Var, Shift+=Const
3955
3956 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
3957 // Index=Var, Shift+=log2(Const)
3958
3959 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
3960 // swap(Index,Base)
3961 // Similar for Base=Const*Var and Base=Var<<Const
3962
3963 // Index is Index=Var+Const ==>
3964 // set Index=Var, Offset+=(Const<<Shift)
3965
3966 // Index is Index=Const+Var ==>
3967 // set Index=Var, Offset+=(Const<<Shift)
3968
3969 // Index is Index=Var-Const ==>
3970 // set Index=Var, Offset-=(Const<<Shift)
3971
3972 // TODO: consider overflow issues with respect to Offset.
3973 // TODO: handle symbolic constants.
3974 }
3975}
3976
3977template <class Machine>
3978void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07003979 // A Load instruction can be treated the same as an Assign instruction, after
3980 // the source operand is transformed into an Traits::X86OperandMem operand.
3981 // Note that the address mode optimization already creates an
3982 // Traits::X86OperandMem operand, so it doesn't need another level of
3983 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07003984 Variable *DestLoad = Load->getDest();
3985 Type Ty = DestLoad->getType();
3986 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
3987 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
3988 lowerAssign(Assign);
3989}
3990
3991template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
3992 Inst *Inst = Context.getCur();
3993 Variable *Dest = Inst->getDest();
3994 Operand *Addr = Inst->getSrc(0);
3995 Variable *Index = nullptr;
3996 uint16_t Shift = 0;
3997 int32_t Offset = 0; // TODO: make Constant
John Porto921856d2015-07-07 11:56:26 -07003998 // Vanilla ICE load instructions should not use the segment registers, and
3999 // computeAddressOpt only works at the level of Variables and Constants, not
4000 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004001 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004002 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4003 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004004 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4005 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4006 if (Base && Addr != Base) {
4007 Inst->setDeleted();
4008 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004009 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4010 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004011 Context.insert(InstLoad::create(Func, Dest, Addr));
4012 }
4013}
4014
4015template <class Machine>
4016void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4017 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4018 if (RNG.getTrueWithProbability(Probability)) {
4019 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4020 }
4021}
4022
4023template <class Machine>
4024void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4025 Func->setError("Phi found in regular instruction list");
4026}
4027
4028template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07004029void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4030 Variable *Dest = Inst->getDest();
4031 Type DestTy = Dest->getType();
4032 Operand *SrcT = Inst->getTrueOperand();
4033 Operand *SrcF = Inst->getFalseOperand();
4034 Operand *Condition = Inst->getCondition();
4035
4036 if (isVectorType(DestTy)) {
4037 Type SrcTy = SrcT->getType();
4038 Variable *T = makeReg(SrcTy);
4039 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4040 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004041 if (InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07004042 // TODO(wala): If the condition operand is a constant, use blendps
4043 // or pblendw.
4044 //
4045 // Use blendvps or pblendvb to implement select.
4046 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4047 SrcTy == IceType_v4f32) {
4048 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004049 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004050 _movp(xmm0, ConditionRM);
4051 _psll(xmm0, Ctx->getConstantInt8(31));
4052 _movp(T, SrcFRM);
4053 _blendvps(T, SrcTRM, xmm0);
4054 _movp(Dest, T);
4055 } else {
4056 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4057 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4058 : IceType_v16i8;
John Porto5d0acff2015-06-30 15:29:21 -07004059 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004060 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4061 _movp(T, SrcFRM);
4062 _pblendvb(T, SrcTRM, xmm0);
4063 _movp(Dest, T);
4064 }
4065 return;
4066 }
John Porto5d0acff2015-06-30 15:29:21 -07004067 // Lower select without Traits::SSE4.1:
John Porto7e93c622015-06-23 10:58:57 -07004068 // a=d?b:c ==>
4069 // if elementtype(d) != i1:
4070 // d=sext(d);
4071 // a=(b&d)|(c&~d);
4072 Variable *T2 = makeReg(SrcTy);
4073 // Sign extend the condition operand if applicable.
4074 if (SrcTy == IceType_v4f32) {
4075 // The sext operation takes only integer arguments.
John Porto5aeed952015-07-21 13:39:09 -07004076 Variable *T3 = Func->makeVariable(IceType_v4i32);
John Porto7e93c622015-06-23 10:58:57 -07004077 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4078 _movp(T, T3);
4079 } else if (typeElementType(SrcTy) != IceType_i1) {
4080 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4081 } else {
4082 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4083 _movp(T, ConditionRM);
4084 }
4085 _movp(T2, T);
4086 _pand(T, SrcTRM);
4087 _pandn(T2, SrcFRM);
4088 _por(T, T2);
4089 _movp(Dest, T);
4090
4091 return;
4092 }
4093
John Porto5d0acff2015-06-30 15:29:21 -07004094 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
John Porto7e93c622015-06-23 10:58:57 -07004095 Operand *CmpOpnd0 = nullptr;
4096 Operand *CmpOpnd1 = nullptr;
4097 // Handle folding opportunities.
4098 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4099 assert(Producer->isDeleted());
4100 switch (BoolFolding::getProducerKind(Producer)) {
4101 default:
4102 break;
4103 case BoolFolding::PK_Icmp32: {
4104 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4105 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4106 CmpOpnd1 = legalize(Producer->getSrc(1));
4107 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4108 } break;
4109 }
4110 }
4111 if (CmpOpnd0 == nullptr) {
4112 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4113 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4114 }
4115 assert(CmpOpnd0);
4116 assert(CmpOpnd1);
4117
4118 _cmp(CmpOpnd0, CmpOpnd1);
4119 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4120 // The cmov instruction doesn't allow 8-bit or FP operands, so
4121 // we need explicit control flow.
4122 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto921856d2015-07-07 11:56:26 -07004123 typename Traits::Insts::Label *Label =
4124 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004125 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4126 _mov(Dest, SrcT);
4127 _br(Cond, Label);
4128 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4129 _mov_nonkillable(Dest, SrcF);
4130 Context.insert(Label);
4131 return;
4132 }
4133 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4134 // But if SrcT is immediate, we might be able to do better, as
4135 // the cmov instruction doesn't allow an immediate operand:
4136 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4137 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4138 std::swap(SrcT, SrcF);
John Porto921856d2015-07-07 11:56:26 -07004139 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07004140 }
4141 if (DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004142 SrcT = legalizeUndef(SrcT);
4143 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07004144 // Set the low portion.
4145 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4146 Variable *TLo = nullptr;
4147 Operand *SrcFLo = legalize(loOperand(SrcF));
4148 _mov(TLo, SrcFLo);
4149 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4150 _cmov(TLo, SrcTLo, Cond);
4151 _mov(DestLo, TLo);
4152 // Set the high portion.
4153 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4154 Variable *THi = nullptr;
4155 Operand *SrcFHi = legalize(hiOperand(SrcF));
4156 _mov(THi, SrcFHi);
4157 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4158 _cmov(THi, SrcTHi, Cond);
4159 _mov(DestHi, THi);
4160 return;
4161 }
4162
4163 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4164 Variable *T = nullptr;
4165 SrcF = legalize(SrcF);
4166 _mov(T, SrcF);
4167 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4168 _cmov(T, SrcT, Cond);
4169 _mov(Dest, T);
4170}
4171
4172template <class Machine>
4173void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4174 Operand *Value = Inst->getData();
4175 Operand *Addr = Inst->getAddr();
John Porto921856d2015-07-07 11:56:26 -07004176 typename Traits::X86OperandMem *NewAddr =
4177 formMemoryOperand(Addr, Value->getType());
John Porto7e93c622015-06-23 10:58:57 -07004178 Type Ty = NewAddr->getType();
4179
4180 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004181 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07004182 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4183 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004184 _store(ValueHi,
4185 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4186 _store(ValueLo,
4187 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07004188 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004189 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07004190 } else {
4191 Value = legalize(Value, Legal_Reg | Legal_Imm);
4192 _store(Value, NewAddr);
4193 }
4194}
4195
4196template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4197 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4198 Operand *Data = Inst->getData();
4199 Operand *Addr = Inst->getAddr();
4200 Variable *Index = nullptr;
4201 uint16_t Shift = 0;
4202 int32_t Offset = 0; // TODO: make Constant
4203 Variable *Base = llvm::dyn_cast<Variable>(Addr);
John Porto921856d2015-07-07 11:56:26 -07004204 // Vanilla ICE store instructions should not use the segment registers, and
4205 // computeAddressOpt only works at the level of Variables and Constants, not
4206 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004207 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004208 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4209 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004210 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4211 if (Base && Addr != Base) {
4212 Inst->setDeleted();
4213 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004214 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4215 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004216 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4217 if (Inst->getDest())
4218 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4219 Context.insert(NewStore);
4220 }
4221}
4222
4223template <class Machine>
Andrew Scull87f80c12015-07-20 10:19:16 -07004224Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4225 uint64_t Min, uint64_t Max) {
4226 // TODO(ascull): 64-bit should not reach here but only because it is not
4227 // implemented yet. This should be able to handle the 64-bit case.
4228 assert(Comparison->getType() != IceType_i64);
4229 // Subtracting 0 is a nop so don't do it
4230 if (Min != 0) {
4231 // Avoid clobbering the comparison by copying it
4232 Variable *T = nullptr;
4233 _mov(T, Comparison);
4234 _sub(T, Ctx->getConstantInt32(Min));
4235 Comparison = T;
4236 }
4237
4238 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
4239
4240 return Comparison;
4241}
4242
4243template <class Machine>
4244void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
4245 Operand *Comparison, bool DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004246 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004247 switch (Case.getKind()) {
4248 case CaseCluster::JumpTable: {
4249 typename Traits::Insts::Label *SkipJumpTable;
4250
4251 Operand *RangeIndex =
4252 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004253 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004254 // Skip over jump table logic if comparison not in range and no default
4255 SkipJumpTable = Traits::Insts::Label::create(Func, this);
4256 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07004257 } else {
4258 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004259 }
Andrew Scull87f80c12015-07-20 10:19:16 -07004260
4261 InstJumpTable *JumpTable = Case.getJumpTable();
4262 Context.insert(JumpTable);
4263
4264 // Make sure the index is a register of the same width as the base
4265 Variable *Index;
4266 if (RangeIndex->getType() != getPointerType()) {
4267 Index = makeReg(getPointerType());
4268 _movzx(Index, RangeIndex);
4269 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004270 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07004271 }
4272
4273 constexpr RelocOffsetT RelocOffset = 0;
4274 constexpr bool SuppressMangling = true;
Andrew Scull86df4e92015-07-30 13:54:44 -07004275 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
4276 Constant *Base = Ctx->getConstantSym(
4277 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
4278 SuppressMangling);
Andrew Scull87f80c12015-07-20 10:19:16 -07004279 Constant *Offset = nullptr;
4280 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
4281 // TODO(ascull): remove need for legalize by allowing null base in memop
Andrew Scull86df4e92015-07-30 13:54:44 -07004282 auto *TargetInMemory = Traits::X86OperandMem::create(
Andrew Scull97f460d2015-07-21 10:07:42 -07004283 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
Andrew Scull87f80c12015-07-20 10:19:16 -07004284 Variable *Target = nullptr;
Andrew Scull86df4e92015-07-30 13:54:44 -07004285 _mov(Target, TargetInMemory);
4286 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07004287
Andrew Scull86df4e92015-07-30 13:54:44 -07004288 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07004289 Context.insert(SkipJumpTable);
4290 return;
4291 }
4292 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07004293 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004294 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07004295 if (!DoneCmp) {
4296 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004297 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004298 }
4299 _br(Traits::Cond::Br_e, Case.getTarget());
4300 } else if (DoneCmp && Case.isPairRange()) {
4301 // Range of two items with first item aleady compared against
4302 _br(Traits::Cond::Br_e, Case.getTarget());
4303 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
4304 _cmp(Comparison, Value);
4305 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004306 } else {
4307 // Range
4308 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004309 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004310 }
Andrew Scull86df4e92015-07-30 13:54:44 -07004311 if (DefaultTarget != nullptr)
4312 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004313 return;
4314 }
4315 }
4316}
4317
4318template <class Machine>
4319void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004320 // Group cases together and navigate through them with a binary search
4321 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4322 Operand *Src0 = Inst->getComparison();
Andrew Scull86df4e92015-07-30 13:54:44 -07004323 CfgNode *DefaultTarget = Inst->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07004324
4325 assert(CaseClusters.size() != 0); // Should always be at least one
4326
4327 if (Src0->getType() == IceType_i64) {
4328 Src0 = legalize(Src0); // get Base/Index into physical registers
4329 Operand *Src0Lo = loOperand(Src0);
4330 Operand *Src0Hi = hiOperand(Src0);
4331 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4332 // TODO(ascull): handle 64-bit case properly (currently naive version)
4333 // This might be handled by a higher level lowering of switches.
4334 SizeT NumCases = Inst->getNumCases();
4335 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004336 Src0Lo = legalizeToReg(Src0Lo);
4337 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07004338 } else {
4339 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4340 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4341 }
4342 for (SizeT I = 0; I < NumCases; ++I) {
4343 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4344 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4345 typename Traits::Insts::Label *Label =
4346 Traits::Insts::Label::create(Func, this);
4347 _cmp(Src0Lo, ValueLo);
4348 _br(Traits::Cond::Br_ne, Label);
4349 _cmp(Src0Hi, ValueHi);
4350 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4351 Context.insert(Label);
4352 }
4353 _br(Inst->getLabelDefault());
4354 return;
4355 } else {
4356 // All the values are 32-bit so just check the operand is too and then
4357 // fall through to the 32-bit implementation. This is a common case.
4358 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4359 Constant *Zero = Ctx->getConstantInt32(0);
4360 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07004361 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004362 Src0 = Src0Lo;
4363 }
John Porto7e93c622015-06-23 10:58:57 -07004364 }
4365
Andrew Scull87f80c12015-07-20 10:19:16 -07004366 // 32-bit lowering
4367
4368 if (CaseClusters.size() == 1) {
4369 // Jump straight to default if needed. Currently a common case as jump
4370 // tables occur on their own.
4371 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004372 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004373 return;
4374 }
4375
4376 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07004377 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07004378
4379 // A span is over the clusters
4380 struct SearchSpan {
4381 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
4382 : Begin(Begin), Size(Size), Label(Label) {}
4383
4384 SizeT Begin;
4385 SizeT Size;
4386 typename Traits::Insts::Label *Label;
4387 };
Andrew Scull8447bba2015-07-23 11:41:18 -07004388 // The stack will only grow to the height of the tree so 12 should be plenty
4389 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07004390 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
4391 bool DoneCmp = false;
4392
4393 while (!SearchSpanStack.empty()) {
4394 SearchSpan Span = SearchSpanStack.top();
4395 SearchSpanStack.pop();
4396
4397 if (Span.Label != nullptr)
4398 Context.insert(Span.Label);
4399
4400 switch (Span.Size) {
4401 case 0:
4402 llvm::report_fatal_error("Invalid SearchSpan size");
4403 break;
4404
4405 case 1:
4406 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004407 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004408 DoneCmp = false;
4409 break;
4410
Andrew Scull86df4e92015-07-30 13:54:44 -07004411 case 2: {
4412 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
4413 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
4414
4415 // Placing a range last may allow register clobbering during the range
4416 // test. That means there is no need to clone the register. If it is a
4417 // unit range the comparison may have already been done in the binary
4418 // search (DoneCmp) and so it should be placed first. If this is a range
4419 // of two items and the comparison with the low value has already been
4420 // done, comparing with the other element is cheaper than a range test.
4421 // If the low end of the range is zero then there is no subtraction and
4422 // nothing to be gained.
4423 if (!CaseA->isUnitRange() &&
4424 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
4425 std::swap(CaseA, CaseB);
4426 DoneCmp = false;
4427 }
4428
4429 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07004430 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004431 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
4432 SearchSpanStack.empty() ? nullptr : DefaultTarget);
4433 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07004434
4435 default:
4436 // Pick the middle item and branch b or ae
4437 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
4438 const CaseCluster &Pivot = CaseClusters[PivotIndex];
4439 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004440 typename Traits::Insts::Label *Label =
4441 Traits::Insts::Label::create(Func, this);
4442 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004443 // TODO(ascull): does it alway have to be far?
4444 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07004445 // Lower the left and (pivot+right) sides, falling through to the right
4446 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
4447 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
4448 DoneCmp = true;
4449 break;
4450 }
4451 }
4452
Andrew Scull86df4e92015-07-30 13:54:44 -07004453 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004454}
4455
4456template <class Machine>
4457void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4458 Variable *Dest, Operand *Src0,
4459 Operand *Src1) {
4460 assert(isVectorType(Dest->getType()));
4461 Type Ty = Dest->getType();
4462 Type ElementTy = typeElementType(Ty);
4463 SizeT NumElements = typeNumElements(Ty);
4464
4465 Operand *T = Ctx->getConstantUndef(Ty);
4466 for (SizeT I = 0; I < NumElements; ++I) {
4467 Constant *Index = Ctx->getConstantInt32(I);
4468
4469 // Extract the next two inputs.
John Porto5aeed952015-07-21 13:39:09 -07004470 Variable *Op0 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004471 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
John Porto5aeed952015-07-21 13:39:09 -07004472 Variable *Op1 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004473 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4474
4475 // Perform the arithmetic as a scalar operation.
John Porto5aeed952015-07-21 13:39:09 -07004476 Variable *Res = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004477 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4478
4479 // Insert the result into position.
John Porto5aeed952015-07-21 13:39:09 -07004480 Variable *DestT = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07004481 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4482 T = DestT;
4483 }
4484
4485 lowerAssign(InstAssign::create(Func, Dest, T));
4486}
4487
Andrew Scull9612d322015-07-06 14:53:25 -07004488/// The following pattern occurs often in lowered C and C++ code:
4489///
4490/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4491/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4492///
4493/// We can eliminate the sext operation by copying the result of pcmpeqd,
4494/// pcmpgtd, or cmpps (which produce sign extended results) to the result
4495/// of the sext operation.
John Porto7e93c622015-06-23 10:58:57 -07004496template <class Machine>
4497void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4498 Variable *SignExtendedResult) {
4499 if (InstCast *NextCast =
4500 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4501 if (NextCast->getCastKind() == InstCast::Sext &&
4502 NextCast->getSrc(0) == SignExtendedResult) {
4503 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07004504 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07004505 // Skip over the instruction.
4506 Context.advanceNext();
4507 }
4508 }
4509}
4510
4511template <class Machine>
4512void TargetX86Base<Machine>::lowerUnreachable(
4513 const InstUnreachable * /*Inst*/) {
4514 _ud2();
4515}
4516
4517template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004518void TargetX86Base<Machine>::lowerRMW(
4519 const typename Traits::Insts::FakeRMW *RMW) {
John Porto7e93c622015-06-23 10:58:57 -07004520 // If the beacon variable's live range does not end in this
4521 // instruction, then it must end in the modified Store instruction
4522 // that follows. This means that the original Store instruction is
4523 // still there, either because the value being stored is used beyond
4524 // the Store instruction, or because dead code elimination did not
4525 // happen. In either case, we cancel RMW lowering (and the caller
4526 // deletes the RMW instruction).
4527 if (!RMW->isLastUse(RMW->getBeacon()))
4528 return;
4529 Operand *Src = RMW->getData();
4530 Type Ty = Src->getType();
John Porto921856d2015-07-07 11:56:26 -07004531 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
John Porto7e93c622015-06-23 10:58:57 -07004532 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004533 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07004534 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4535 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004536 typename Traits::X86OperandMem *AddrLo =
4537 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4538 typename Traits::X86OperandMem *AddrHi =
4539 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07004540 switch (RMW->getOp()) {
4541 default:
4542 // TODO(stichnot): Implement other arithmetic operators.
4543 break;
4544 case InstArithmetic::Add:
4545 _add_rmw(AddrLo, SrcLo);
4546 _adc_rmw(AddrHi, SrcHi);
4547 return;
4548 case InstArithmetic::Sub:
4549 _sub_rmw(AddrLo, SrcLo);
4550 _sbb_rmw(AddrHi, SrcHi);
4551 return;
4552 case InstArithmetic::And:
4553 _and_rmw(AddrLo, SrcLo);
4554 _and_rmw(AddrHi, SrcHi);
4555 return;
4556 case InstArithmetic::Or:
4557 _or_rmw(AddrLo, SrcLo);
4558 _or_rmw(AddrHi, SrcHi);
4559 return;
4560 case InstArithmetic::Xor:
4561 _xor_rmw(AddrLo, SrcLo);
4562 _xor_rmw(AddrHi, SrcHi);
4563 return;
4564 }
4565 } else {
4566 // i8, i16, i32
4567 switch (RMW->getOp()) {
4568 default:
4569 // TODO(stichnot): Implement other arithmetic operators.
4570 break;
4571 case InstArithmetic::Add:
4572 Src = legalize(Src, Legal_Reg | Legal_Imm);
4573 _add_rmw(Addr, Src);
4574 return;
4575 case InstArithmetic::Sub:
4576 Src = legalize(Src, Legal_Reg | Legal_Imm);
4577 _sub_rmw(Addr, Src);
4578 return;
4579 case InstArithmetic::And:
4580 Src = legalize(Src, Legal_Reg | Legal_Imm);
4581 _and_rmw(Addr, Src);
4582 return;
4583 case InstArithmetic::Or:
4584 Src = legalize(Src, Legal_Reg | Legal_Imm);
4585 _or_rmw(Addr, Src);
4586 return;
4587 case InstArithmetic::Xor:
4588 Src = legalize(Src, Legal_Reg | Legal_Imm);
4589 _xor_rmw(Addr, Src);
4590 return;
4591 }
4592 }
4593 llvm::report_fatal_error("Couldn't lower RMW instruction");
4594}
4595
4596template <class Machine>
4597void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
John Porto921856d2015-07-07 11:56:26 -07004598 if (const auto *RMW =
4599 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07004600 lowerRMW(RMW);
4601 } else {
4602 TargetLowering::lowerOther(Instr);
4603 }
4604}
4605
Andrew Scull9612d322015-07-06 14:53:25 -07004606/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4607/// preserve integrity of liveness analysis. Undef values are also
4608/// turned into zeroes, since loOperand() and hiOperand() don't expect
4609/// Undef input.
John Porto7e93c622015-06-23 10:58:57 -07004610template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4611 // Pause constant blinding or pooling, blinding or pooling will be done later
4612 // during phi lowering assignments
4613 BoolFlagSaver B(RandomizationPoolingPaused, true);
Jan Voung53483692015-07-16 10:47:46 -07004614 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4615 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07004616}
4617
John Porto7e93c622015-06-23 10:58:57 -07004618// There is no support for loading or emitting vector constants, so the
4619// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4620// etc. are initialized with register operations.
4621//
4622// TODO(wala): Add limited support for vector constants so that
4623// complex initialization in registers is unnecessary.
4624
4625template <class Machine>
4626Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
4627 Variable *Reg = makeReg(Ty, RegNum);
4628 // Insert a FakeDef, since otherwise the live range of Reg might
4629 // be overestimated.
4630 Context.insert(InstFakeDef::create(Func, Reg));
4631 _pxor(Reg, Reg);
4632 return Reg;
4633}
4634
4635template <class Machine>
4636Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4637 int32_t RegNum) {
4638 Variable *MinusOnes = makeReg(Ty, RegNum);
4639 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4640 Context.insert(InstFakeDef::create(Func, MinusOnes));
4641 _pcmpeq(MinusOnes, MinusOnes);
4642 return MinusOnes;
4643}
4644
4645template <class Machine>
4646Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
4647 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
4648 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
4649 _psub(Dest, MinusOne);
4650 return Dest;
4651}
4652
4653template <class Machine>
4654Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
4655 int32_t RegNum) {
4656 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4657 Ty == IceType_v16i8);
4658 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4659 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4660 SizeT Shift =
4661 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
4662 _psll(Reg, Ctx->getConstantInt8(Shift));
4663 return Reg;
4664 } else {
4665 // SSE has no left shift operation for vectors of 8 bit integers.
4666 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4667 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4668 Variable *Reg = makeReg(Ty, RegNum);
4669 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4670 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4671 return Reg;
4672 }
4673}
4674
Andrew Scull9612d322015-07-06 14:53:25 -07004675/// Construct a mask in a register that can be and'ed with a
4676/// floating-point value to mask off its sign bit. The value will be
4677/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4678/// for f64. Construct it as vector of ones logically right shifted
4679/// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4680/// vector constants in memory.
John Porto7e93c622015-06-23 10:58:57 -07004681template <class Machine>
4682Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4683 int32_t RegNum) {
4684 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4685 _psrl(Reg, Ctx->getConstantInt8(1));
4686 return Reg;
4687}
4688
4689template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004690typename TargetX86Base<Machine>::Traits::X86OperandMem *
John Porto7e93c622015-06-23 10:58:57 -07004691TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4692 uint32_t Offset) {
4693 // Ensure that Loc is a stack slot.
4694 assert(Slot->getWeight().isZero());
4695 assert(Slot->getRegNum() == Variable::NoRegister);
4696 // Compute the location of Loc in memory.
4697 // TODO(wala,stichnot): lea should not be required. The address of
4698 // the stack slot is known at compile time (although not until after
4699 // addProlog()).
4700 const Type PointerType = IceType_i32;
4701 Variable *Loc = makeReg(PointerType);
4702 _lea(Loc, Slot);
4703 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004704 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07004705}
4706
Andrew Scull9612d322015-07-06 14:53:25 -07004707/// Helper for legalize() to emit the right code to lower an operand to a
4708/// register of the appropriate type.
John Porto7e93c622015-06-23 10:58:57 -07004709template <class Machine>
4710Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
4711 Type Ty = Src->getType();
4712 Variable *Reg = makeReg(Ty, RegNum);
4713 if (isVectorType(Ty)) {
4714 _movp(Reg, Src);
4715 } else {
4716 _mov(Reg, Src);
4717 }
4718 return Reg;
4719}
4720
4721template <class Machine>
4722Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
4723 int32_t RegNum) {
4724 Type Ty = From->getType();
4725 // Assert that a physical register is allowed. To date, all calls
4726 // to legalize() allow a physical register. If a physical register
4727 // needs to be explicitly disallowed, then new code will need to be
4728 // written to force a spill.
4729 assert(Allowed & Legal_Reg);
4730 // If we're asking for a specific physical register, make sure we're
4731 // not allowing any other operand kinds. (This could be future
4732 // work, e.g. allow the shl shift amount to be either an immediate
4733 // or in ecx.)
4734 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4735
John Porto921856d2015-07-07 11:56:26 -07004736 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
John Porto7e93c622015-06-23 10:58:57 -07004737 // Before doing anything with a Mem operand, we need to ensure
4738 // that the Base and Index components are in physical registers.
4739 Variable *Base = Mem->getBase();
4740 Variable *Index = Mem->getIndex();
4741 Variable *RegBase = nullptr;
4742 Variable *RegIndex = nullptr;
4743 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004744 RegBase = legalizeToReg(Base);
John Porto7e93c622015-06-23 10:58:57 -07004745 }
4746 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004747 RegIndex = legalizeToReg(Index);
John Porto7e93c622015-06-23 10:58:57 -07004748 }
4749 if (Base != RegBase || Index != RegIndex) {
John Porto921856d2015-07-07 11:56:26 -07004750 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
4751 RegIndex, Mem->getShift(),
4752 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07004753 }
4754
4755 // For all Memory Operands, we do randomization/pooling here
4756 From = randomizeOrPoolImmediate(Mem);
4757
4758 if (!(Allowed & Legal_Mem)) {
4759 From = copyToReg(From, RegNum);
4760 }
4761 return From;
4762 }
4763 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
4764 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004765 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07004766 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07004767 return From;
4768 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07004769 }
4770 // There should be no constants of vector type (other than undef).
4771 assert(!isVectorType(Ty));
4772
4773 // If the operand is an 32 bit constant integer, we should check
4774 // whether we need to randomize it or pool it.
4775 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4776 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4777 if (NewConst != Const) {
4778 return NewConst;
4779 }
4780 }
4781
4782 // Convert a scalar floating point constant into an explicit
4783 // memory operand.
4784 if (isScalarFloatingType(Ty)) {
4785 Variable *Base = nullptr;
4786 std::string Buffer;
4787 llvm::raw_string_ostream StrBuf(Buffer);
4788 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
4789 llvm::cast<Constant>(From)->setShouldBePooled(true);
4790 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
John Porto921856d2015-07-07 11:56:26 -07004791 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07004792 }
4793 bool NeedsReg = false;
4794 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
4795 // Immediate specifically not allowed
4796 NeedsReg = true;
4797 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
4798 // On x86, FP constants are lowered to mem operands.
4799 NeedsReg = true;
4800 if (NeedsReg) {
4801 From = copyToReg(From, RegNum);
4802 }
4803 return From;
4804 }
4805 if (auto Var = llvm::dyn_cast<Variable>(From)) {
4806 // Check if the variable is guaranteed a physical register. This
4807 // can happen either when the variable is pre-colored or when it is
4808 // assigned infinite weight.
4809 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
4810 // We need a new physical register for the operand if:
4811 // Mem is not allowed and Var isn't guaranteed a physical
4812 // register, or
4813 // RegNum is required and Var->getRegNum() doesn't match.
4814 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
4815 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
4816 From = copyToReg(From, RegNum);
4817 }
4818 return From;
4819 }
4820 llvm_unreachable("Unhandled operand kind in legalize()");
4821 return From;
4822}
4823
Andrew Scull9612d322015-07-06 14:53:25 -07004824/// Provide a trivial wrapper to legalize() for this common usage.
John Porto7e93c622015-06-23 10:58:57 -07004825template <class Machine>
Andrew Scull97f460d2015-07-21 10:07:42 -07004826Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07004827 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
4828}
4829
Jan Voungfbdd2442015-07-15 12:36:20 -07004830/// Legalize undef values to concrete values.
4831template <class Machine>
4832Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
4833 Type Ty = From->getType();
4834 if (llvm::isa<ConstantUndef>(From)) {
4835 // Lower undefs to zero. Another option is to lower undefs to an
4836 // uninitialized register; however, using an uninitialized register
4837 // results in less predictable code.
4838 //
4839 // If in the future the implementation is changed to lower undef
4840 // values to uninitialized registers, a FakeDef will be needed:
4841 // Context.insert(InstFakeDef::create(Func, Reg));
4842 // This is in order to ensure that the live range of Reg is not
4843 // overestimated. If the constant being lowered is a 64 bit value,
4844 // then the result should be split and the lo and hi components will
4845 // need to go in uninitialized registers.
4846 if (isVectorType(Ty))
4847 return makeVectorOfZeros(Ty, RegNum);
4848 return Ctx->getConstantZero(Ty);
4849 }
4850 return From;
4851}
4852
Andrew Scull9612d322015-07-06 14:53:25 -07004853/// For the cmp instruction, if Src1 is an immediate, or known to be a
4854/// physical register, we can allow Src0 to be a memory operand.
4855/// Otherwise, Src0 must be copied into a physical register.
4856/// (Actually, either Src0 or Src1 can be chosen for the physical
4857/// register, but unfortunately we have to commit to one or the other
4858/// before register allocation.)
John Porto7e93c622015-06-23 10:58:57 -07004859template <class Machine>
4860Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
4861 Operand *Src1) {
4862 bool IsSrc1ImmOrReg = false;
4863 if (llvm::isa<Constant>(Src1)) {
4864 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07004865 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07004866 if (Var->hasReg())
4867 IsSrc1ImmOrReg = true;
4868 }
4869 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
4870}
4871
4872template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004873typename TargetX86Base<Machine>::Traits::X86OperandMem *
4874TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
4875 bool DoLegalize) {
4876 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
4877 // It may be the case that address mode optimization already creates an
4878 // Traits::X86OperandMem, so in that case it wouldn't need another level of
4879 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07004880 if (!Mem) {
4881 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
4882 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
4883 assert(Base || Offset);
4884 if (Offset) {
4885 // During memory operand building, we do not blind or pool
4886 // the constant offset, we will work on the whole memory
4887 // operand later as one entity later, this save one instruction.
4888 // By turning blinding and pooling off, we guarantee
Jan Voungfbdd2442015-07-15 12:36:20 -07004889 // legalize(Offset) will return a Constant*.
John Porto7e93c622015-06-23 10:58:57 -07004890 {
4891 BoolFlagSaver B(RandomizationPoolingPaused, true);
4892
4893 Offset = llvm::cast<Constant>(legalize(Offset));
4894 }
4895
4896 assert(llvm::isa<ConstantInteger32>(Offset) ||
4897 llvm::isa<ConstantRelocatable>(Offset));
4898 }
John Porto921856d2015-07-07 11:56:26 -07004899 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07004900 }
4901 // Do legalization, which contains randomization/pooling
4902 // or do randomization/pooling.
John Porto921856d2015-07-07 11:56:26 -07004903 return llvm::cast<typename Traits::X86OperandMem>(
John Porto7e93c622015-06-23 10:58:57 -07004904 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
4905}
4906
4907template <class Machine>
4908Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
4909 // There aren't any 64-bit integer registers for x86-32.
4910 assert(Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07004911 Variable *Reg = Func->makeVariable(Type);
John Porto7e93c622015-06-23 10:58:57 -07004912 if (RegNum == Variable::NoRegister)
4913 Reg->setWeightInfinite();
4914 else
4915 Reg->setRegNum(RegNum);
4916 return Reg;
4917}
4918
4919template <class Machine> void TargetX86Base<Machine>::postLower() {
4920 if (Ctx->getFlags().getOptLevel() == Opt_m1)
4921 return;
4922 inferTwoAddress();
4923}
4924
4925template <class Machine>
4926void TargetX86Base<Machine>::makeRandomRegisterPermutation(
4927 llvm::SmallVectorImpl<int32_t> &Permutation,
4928 const llvm::SmallBitVector &ExcludeRegisters) const {
John Porto921856d2015-07-07 11:56:26 -07004929 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
4930 ExcludeRegisters);
John Porto7e93c622015-06-23 10:58:57 -07004931}
4932
4933template <class Machine>
4934void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004935 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07004936 return;
4937 Ostream &Str = Ctx->getStrEmit();
4938 Str << getConstantPrefix() << C->getValue();
4939}
4940
4941template <class Machine>
4942void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
4943 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
4944}
4945
4946template <class Machine>
4947void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004948 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07004949 return;
4950 Ostream &Str = Ctx->getStrEmit();
4951 C->emitPoolLabel(Str);
4952}
4953
4954template <class Machine>
4955void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004956 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07004957 return;
4958 Ostream &Str = Ctx->getStrEmit();
4959 C->emitPoolLabel(Str);
4960}
4961
4962template <class Machine>
4963void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
4964 llvm::report_fatal_error("undef value encountered by emitter.");
4965}
4966
Andrew Scull9612d322015-07-06 14:53:25 -07004967/// Randomize or pool an Immediate.
John Porto7e93c622015-06-23 10:58:57 -07004968template <class Machine>
4969Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
4970 int32_t RegNum) {
4971 assert(llvm::isa<ConstantInteger32>(Immediate) ||
4972 llvm::isa<ConstantRelocatable>(Immediate));
4973 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
4974 RandomizationPoolingPaused == true) {
4975 // Immediates randomization/pooling off or paused
4976 return Immediate;
4977 }
4978 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
4979 Ctx->statsUpdateRPImms();
4980 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
4981 RPI_Randomize) {
4982 // blind the constant
4983 // FROM:
4984 // imm
4985 // TO:
4986 // insert: mov imm+cookie, Reg
4987 // insert: lea -cookie[Reg], Reg
4988 // => Reg
4989 // If we have already assigned a phy register, we must come from
4990 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
4991 // the assigned register as this assignment is that start of its use-def
4992 // chain. So we add RegNum argument here.
4993 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
4994 // the flags.
4995 Variable *Reg = makeReg(IceType_i32, RegNum);
4996 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
4997 uint32_t Value = Integer->getValue();
4998 uint32_t Cookie = Ctx->getRandomizationCookie();
4999 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5000 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
John Porto921856d2015-07-07 11:56:26 -07005001 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5002 nullptr, 0));
John Porto7e93c622015-06-23 10:58:57 -07005003 // make sure liveness analysis won't kill this variable, otherwise a
Jan Voungf645d852015-07-09 10:35:09 -07005004 // liveness assertion will be triggered.
John Porto7e93c622015-06-23 10:58:57 -07005005 _set_dest_nonkillable();
5006 if (Immediate->getType() != IceType_i32) {
5007 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5008 _mov(TruncReg, Reg);
5009 return TruncReg;
5010 }
5011 return Reg;
5012 }
5013 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5014 // pool the constant
5015 // FROM:
5016 // imm
5017 // TO:
5018 // insert: mov $label, Reg
5019 // => Reg
5020 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5021 Immediate->setShouldBePooled(true);
5022 // if we have already assigned a phy register, we must come from
5023 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5024 // the assigned register as this assignment is that start of its use-def
5025 // chain. So we add RegNum argument here.
5026 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5027 IceString Label;
5028 llvm::raw_string_ostream Label_stream(Label);
5029 Immediate->emitPoolLabel(Label_stream);
5030 const RelocOffsetT Offset = 0;
5031 const bool SuppressMangling = true;
5032 Constant *Symbol =
5033 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005034 typename Traits::X86OperandMem *MemOperand =
5035 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5036 Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005037 _mov(Reg, MemOperand);
5038 return Reg;
5039 }
5040 assert("Unsupported -randomize-pool-immediates option" && false);
5041 }
5042 // the constant Immediate is not eligible for blinding/pooling
5043 return Immediate;
5044}
5045
5046template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005047typename TargetX86Base<Machine>::Traits::X86OperandMem *
5048TargetX86Base<Machine>::randomizeOrPoolImmediate(
5049 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005050 assert(MemOperand);
5051 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5052 RandomizationPoolingPaused == true) {
5053 // immediates randomization/pooling is turned off
5054 return MemOperand;
5055 }
5056
5057 // If this memory operand is already a randommized one, we do
5058 // not randomize it again.
5059 if (MemOperand->getRandomized())
5060 return MemOperand;
5061
5062 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5063 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5064 // The offset of this mem operand should be blinded or pooled
5065 Ctx->statsUpdateRPImms();
5066 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5067 RPI_Randomize) {
5068 // blind the constant offset
5069 // FROM:
5070 // offset[base, index, shift]
5071 // TO:
5072 // insert: lea offset+cookie[base], RegTemp
5073 // => -cookie[RegTemp, index, shift]
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005074 uint32_t Value =
5075 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5076 ->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005077 uint32_t Cookie = Ctx->getRandomizationCookie();
5078 Constant *Mask1 = Ctx->getConstantInt(
5079 MemOperand->getOffset()->getType(), Cookie + Value);
5080 Constant *Mask2 =
5081 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5082
John Porto921856d2015-07-07 11:56:26 -07005083 typename Traits::X86OperandMem *TempMemOperand =
5084 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5085 MemOperand->getBase(), Mask1);
John Porto7e93c622015-06-23 10:58:57 -07005086 // If we have already assigned a physical register, we must come from
5087 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5088 // the assigned register as this assignment is that start of its use-def
5089 // chain. So we add RegNum argument here.
5090 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5091 _lea(RegTemp, TempMemOperand);
5092 // As source operand doesn't use the dstreg, we don't need to add
5093 // _set_dest_nonkillable().
5094 // But if we use the same Dest Reg, that is, with RegNum
5095 // assigned, we should add this _set_dest_nonkillable()
5096 if (RegNum != Variable::NoRegister)
5097 _set_dest_nonkillable();
5098
John Porto921856d2015-07-07 11:56:26 -07005099 typename Traits::X86OperandMem *NewMemOperand =
5100 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5101 Mask2, MemOperand->getIndex(),
5102 MemOperand->getShift(),
5103 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005104
Jan Voungfbdd2442015-07-15 12:36:20 -07005105 // Label this memory operand as randomized, so we won't randomize it
5106 // again in case we call legalize() multiple times on this memory
John Porto7e93c622015-06-23 10:58:57 -07005107 // operand.
5108 NewMemOperand->setRandomized(true);
5109 return NewMemOperand;
5110 }
5111 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5112 // pool the constant offset
5113 // FROM:
5114 // offset[base, index, shift]
5115 // TO:
5116 // insert: mov $label, RegTemp
5117 // insert: lea [base, RegTemp], RegTemp
5118 // =>[RegTemp, index, shift]
5119 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5120 RPI_Pool);
5121 // Memory operand should never exist as source operands in phi
5122 // lowering assignments, so there is no need to reuse any registers
5123 // here. For phi lowering, we should not ask for new physical
5124 // registers in general.
5125 // However, if we do meet Memory Operand during phi lowering, we
5126 // should not blind or pool the immediates for now.
5127 if (RegNum != Variable::NoRegister)
5128 return MemOperand;
5129 Variable *RegTemp = makeReg(IceType_i32);
5130 IceString Label;
5131 llvm::raw_string_ostream Label_stream(Label);
5132 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5133 MemOperand->getOffset()->setShouldBePooled(true);
5134 const RelocOffsetT SymOffset = 0;
5135 bool SuppressMangling = true;
5136 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5137 SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005138 typename Traits::X86OperandMem *SymbolOperand =
5139 Traits::X86OperandMem::create(
5140 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005141 _mov(RegTemp, SymbolOperand);
5142 // If we have a base variable here, we should add the lea instruction
5143 // to add the value of the base variable to RegTemp. If there is no
5144 // base variable, we won't need this lea instruction.
5145 if (MemOperand->getBase()) {
John Porto921856d2015-07-07 11:56:26 -07005146 typename Traits::X86OperandMem *CalculateOperand =
5147 Traits::X86OperandMem::create(
5148 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5149 RegTemp, 0, MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005150 _lea(RegTemp, CalculateOperand);
5151 _set_dest_nonkillable();
5152 }
John Porto921856d2015-07-07 11:56:26 -07005153 typename Traits::X86OperandMem *NewMemOperand =
5154 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5155 nullptr, MemOperand->getIndex(),
5156 MemOperand->getShift(),
5157 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005158 return NewMemOperand;
5159 }
5160 assert("Unsupported -randomize-pool-immediates option" && false);
5161 }
5162 }
5163 // the offset is not eligible for blinding or pooling, return the original
5164 // mem operand
5165 return MemOperand;
5166}
5167
5168} // end of namespace X86Internal
5169} // end of namespace Ice
5170
5171#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H