blob: 76fffa04039c1de54e4884ab05aa55c9fdcf5c3f [file] [log] [blame]
John Porto453660f2015-07-31 14:52:52 -07001//===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
John Portod58f01c2015-06-23 15:55:17 -07002//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
John Porto453660f2015-07-31 14:52:52 -070011/// This file implements the TargetLoweringX8664 class, which
12/// consists almost entirely of the lowering sequence for each
13/// high-level instruction.
Andrew Scull9612d322015-07-06 14:53:25 -070014///
John Portod58f01c2015-06-23 15:55:17 -070015//===----------------------------------------------------------------------===//
16
John Portod58f01c2015-06-23 15:55:17 -070017#include "IceTargetLoweringX8664.h"
18
John Porto453660f2015-07-31 14:52:52 -070019#include "IceTargetLoweringX8664Traits.h"
20#include "IceTargetLoweringX86Base.h"
21
John Portod58f01c2015-06-23 15:55:17 -070022namespace Ice {
23
John Portoe0d9afa2015-08-05 10:13:44 -070024//------------------------------------------------------------------------------
25// ______ ______ ______ __ ______ ______
26// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
27// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
28// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
29// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
30//
31//------------------------------------------------------------------------------
John Porto453660f2015-07-31 14:52:52 -070032namespace X86Internal {
33const MachineTraits<TargetX8664>::TableFcmpType
34 MachineTraits<TargetX8664>::TableFcmp[] = {
35#define X(val, dflt, swapS, C1, C2, swapV, pred) \
36 { \
37 dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \
38 X8664::Traits::Cond::pred \
39 } \
40 ,
41 FCMPX8664_TABLE
42#undef X
43};
Andrew Scull86df4e92015-07-30 13:54:44 -070044
John Porto453660f2015-07-31 14:52:52 -070045const size_t MachineTraits<TargetX8664>::TableFcmpSize =
46 llvm::array_lengthof(TableFcmp);
47
48const MachineTraits<TargetX8664>::TableIcmp32Type
49 MachineTraits<TargetX8664>::TableIcmp32[] = {
50#define X(val, C_32, C1_64, C2_64, C3_64) \
51 { X8664::Traits::Cond::C_32 } \
52 ,
53 ICMPX8664_TABLE
54#undef X
55};
56
57const size_t MachineTraits<TargetX8664>::TableIcmp32Size =
58 llvm::array_lengthof(TableIcmp32);
59
60const MachineTraits<TargetX8664>::TableIcmp64Type
61 MachineTraits<TargetX8664>::TableIcmp64[] = {
62#define X(val, C_32, C1_64, C2_64, C3_64) \
63 { \
64 X8664::Traits::Cond::C1_64, X8664::Traits::Cond::C2_64, \
65 X8664::Traits::Cond::C3_64 \
66 } \
67 ,
68 ICMPX8664_TABLE
69#undef X
70};
71
72const size_t MachineTraits<TargetX8664>::TableIcmp64Size =
73 llvm::array_lengthof(TableIcmp64);
74
75const MachineTraits<TargetX8664>::TableTypeX8664AttributesType
76 MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = {
77#define X(tag, elementty, cvt, sdss, pack, width, fld) \
78 { elementty } \
79 ,
80 ICETYPEX8664_TABLE
81#undef X
82};
83
84const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize =
85 llvm::array_lengthof(TableTypeX8664Attributes);
86
87const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;
88const char *MachineTraits<TargetX8664>::TargetName = "X8664";
89
90} // end of namespace X86Internal
91
John Portoe0d9afa2015-08-05 10:13:44 -070092//------------------------------------------------------------------------------
93// __ ______ __ __ ______ ______ __ __ __ ______
94// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
95// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
96// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
97// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
98//
99//------------------------------------------------------------------------------
100namespace {
101static inline TargetX8664::Traits::RegisterSet::AllRegisters
102getRegisterForXmmArgNum(uint32_t ArgNum) {
103 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
104 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
105 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
106}
107
108static inline TargetX8664::Traits::RegisterSet::AllRegisters
109getRegisterForGprArgNum(uint32_t ArgNum) {
110 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
111 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
112 TargetX8664::Traits::RegisterSet::Reg_edi,
113 TargetX8664::Traits::RegisterSet::Reg_esi,
114 TargetX8664::Traits::RegisterSet::Reg_edx,
115 TargetX8664::Traits::RegisterSet::Reg_ecx,
116 TargetX8664::Traits::RegisterSet::Reg_r8d,
117 TargetX8664::Traits::RegisterSet::Reg_r9d,
118 };
119 static_assert(llvm::array_lengthof(GprForArgNum) ==
120 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
121 "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
122 return GprForArgNum[ArgNum];
123}
124
125// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
John Porto1d235422015-08-12 12:37:53 -0700126// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
John Portoe0d9afa2015-08-05 10:13:44 -0700127constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
128
129} // end of anonymous namespace
130
131void TargetX8664::lowerCall(const InstCall *Instr) {
132 // x86-64 calling convention:
133 //
134 // * At the point before the call, the stack must be aligned to 16
135 // bytes.
136 //
137 // * The first eight arguments of vector/fp type, regardless of their
138 // position relative to the other arguments in the argument list, are
139 // placed in registers %xmm0 - %xmm7.
140 //
141 // * The first six arguments of integer types, regardless of their
142 // position relative to the other arguments in the argument list, are
143 // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
144 //
145 // * Other arguments are pushed onto the stack in right-to-left order,
146 // such that the left-most argument ends up on the top of the stack at
147 // the lowest memory address.
148 //
149 // * Stack arguments of vector type are aligned to start at the next
150 // highest multiple of 16 bytes. Other stack arguments are aligned to
151 // 8 bytes.
152 //
153 // This intends to match the section "Function Calling Sequence" of the
154 // document "System V Application Binary Interface."
155 NeedsStackAlignment = true;
156
157 using OperandList =
158 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
159 Traits::X86_MAX_GPR_ARGS)>;
160 OperandList XmmArgs;
161 OperandList GprArgs;
162 OperandList StackArgs, StackArgLocations;
163 uint32_t ParameterAreaSizeBytes = 0;
164
165 // Classify each argument operand according to the location where the
166 // argument is passed.
167 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
168 Operand *Arg = Instr->getArg(i);
169 Type Ty = Arg->getType();
170 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
171 assert(typeWidthInBytes(Ty) >= 4);
172 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
173 XmmArgs.push_back(Arg);
174 } else if (isScalarFloatingType(Ty) &&
175 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
176 XmmArgs.push_back(Arg);
177 } else if (isScalarIntegerType(Ty) &&
178 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
179 GprArgs.push_back(Arg);
180 } else {
181 StackArgs.push_back(Arg);
182 if (isVectorType(Arg->getType())) {
183 ParameterAreaSizeBytes =
184 Traits::applyStackAlignment(ParameterAreaSizeBytes);
185 }
186 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
187 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
188 StackArgLocations.push_back(
189 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
190 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
191 }
192 }
193
194 // Adjust the parameter area so that the stack is aligned. It is
195 // assumed that the stack is already aligned at the start of the
196 // calling sequence.
197 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
198
199 // Subtract the appropriate amount for the argument area. This also
200 // takes care of setting the stack adjustment during emission.
201 //
202 // TODO: If for some reason the call instruction gets dead-code
203 // eliminated after lowering, we would need to ensure that the
204 // pre-call and the post-call esp adjustment get eliminated as well.
205 if (ParameterAreaSizeBytes) {
206 _adjust_stack(ParameterAreaSizeBytes);
207 }
208
209 // Copy arguments that are passed on the stack to the appropriate
210 // stack locations.
211 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
212 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
213 }
214
215 // Copy arguments to be passed in registers to the appropriate
216 // registers.
217 // TODO: Investigate the impact of lowering arguments passed in
218 // registers after lowering stack arguments as opposed to the other
219 // way around. Lowering register arguments after stack arguments may
220 // reduce register pressure. On the other hand, lowering register
221 // arguments first (before stack arguments) may result in more compact
222 // code, as the memory operand displacements may end up being smaller
223 // before any stack adjustment is done.
224 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
225 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
226 // Generate a FakeUse of register arguments so that they do not get
227 // dead code eliminated as a result of the FakeKill of scratch
228 // registers after the call.
229 Context.insert(InstFakeUse::create(Func, Reg));
230 }
231
232 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
233 Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
234 Context.insert(InstFakeUse::create(Func, Reg));
235 }
236
237 // Generate the call instruction. Assign its result to a temporary
238 // with high register allocation weight.
239 Variable *Dest = Instr->getDest();
240 // ReturnReg doubles as ReturnRegLo as necessary.
241 Variable *ReturnReg = nullptr;
John Portoe0d9afa2015-08-05 10:13:44 -0700242 if (Dest) {
243 switch (Dest->getType()) {
244 case IceType_NUM:
John Portoe0d9afa2015-08-05 10:13:44 -0700245 case IceType_void:
John Porto729b5f62015-08-06 07:44:30 -0700246 llvm::report_fatal_error("Invalid Call dest type");
John Portoe0d9afa2015-08-05 10:13:44 -0700247 break;
248 case IceType_i1:
249 case IceType_i8:
250 case IceType_i16:
251 case IceType_i32:
John Portoe0d9afa2015-08-05 10:13:44 -0700252 case IceType_i64:
John Porto1d235422015-08-12 12:37:53 -0700253 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
John Portoe0d9afa2015-08-05 10:13:44 -0700254 break;
255 case IceType_f32:
256 case IceType_f64:
257 case IceType_v4i1:
258 case IceType_v8i1:
259 case IceType_v16i1:
260 case IceType_v16i8:
261 case IceType_v8i16:
262 case IceType_v4i32:
263 case IceType_v4f32:
264 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
265 break;
266 }
267 }
268
John Porto1d235422015-08-12 12:37:53 -0700269 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm);
John Portoe0d9afa2015-08-05 10:13:44 -0700270 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
271 if (NeedSandboxing) {
John Porto1d235422015-08-12 12:37:53 -0700272 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
John Portoe0d9afa2015-08-05 10:13:44 -0700273 }
274 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
275 Context.insert(NewCall);
John Porto1d235422015-08-12 12:37:53 -0700276 if (NeedSandboxing) {
277 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
278 }
John Portoe0d9afa2015-08-05 10:13:44 -0700279
280 // Add the appropriate offset to esp. The call instruction takes care
281 // of resetting the stack offset during emission.
282 if (ParameterAreaSizeBytes) {
283 Variable *Esp =
284 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
285 _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
286 }
287
288 // Insert a register-kill pseudo instruction.
289 Context.insert(InstFakeKill::create(Func, NewCall));
290
291 // Generate a FakeUse to keep the call live if necessary.
292 if (Instr->hasSideEffects() && ReturnReg) {
293 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
294 Context.insert(FakeUse);
295 }
296
297 if (!Dest)
298 return;
299
300 assert(ReturnReg && "x86-64 always returns value on registers.");
301
John Porto1d235422015-08-12 12:37:53 -0700302 if (isVectorType(Dest->getType())) {
John Portoe0d9afa2015-08-05 10:13:44 -0700303 _movp(Dest, ReturnReg);
304 } else {
John Porto1d235422015-08-12 12:37:53 -0700305 assert(isScalarFloatingType(Dest->getType()) ||
306 isScalarIntegerType(Dest->getType()));
John Portoe0d9afa2015-08-05 10:13:44 -0700307 _mov(Dest, ReturnReg);
308 }
309}
310
John Porto729b5f62015-08-06 07:44:30 -0700311void TargetX8664::lowerArguments() {
312 VarList &Args = Func->getArgs();
313 // The first eight vetcor typed arguments (as well as fp arguments) are passed
314 // in %xmm0 through %xmm7 regardless of their position in the argument list.
315 unsigned NumXmmArgs = 0;
316 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,
317 // %r8, and %r9 regardless of their position in the argument list.
318 unsigned NumGprArgs = 0;
John Portoe0d9afa2015-08-05 10:13:44 -0700319
John Porto729b5f62015-08-06 07:44:30 -0700320 Context.init(Func->getEntryNode());
321 Context.setInsertPoint(Context.getCur());
322
323 for (SizeT i = 0, End = Args.size();
324 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
325 NumGprArgs < Traits::X86_MAX_XMM_ARGS);
326 ++i) {
327 Variable *Arg = Args[i];
328 Type Ty = Arg->getType();
John Porto1d235422015-08-12 12:37:53 -0700329 Variable *RegisterArg = nullptr;
330 int32_t RegNum = Variable::NoRegister;
331 if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
332 if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
333 continue;
334 }
335 RegNum = getRegisterForXmmArgNum(NumXmmArgs);
John Porto729b5f62015-08-06 07:44:30 -0700336 ++NumXmmArgs;
John Porto1d235422015-08-12 12:37:53 -0700337 RegisterArg = Func->makeVariable(Ty);
338 } else if (isScalarIntegerType(Ty)) {
339 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
340 continue;
341 }
342 RegNum = getRegisterForGprArgNum(NumGprArgs);
John Porto729b5f62015-08-06 07:44:30 -0700343 ++NumGprArgs;
John Porto1d235422015-08-12 12:37:53 -0700344 RegisterArg = Func->makeVariable(Ty);
John Portoe0d9afa2015-08-05 10:13:44 -0700345 }
John Porto1d235422015-08-12 12:37:53 -0700346 assert(RegNum != Variable::NoRegister);
347 assert(RegisterArg != nullptr);
348 // Replace Arg in the argument list with the home register. Then
349 // generate an instruction in the prolog to copy the home register
350 // to the assigned location of Arg.
351 if (BuildDefs::dump())
352 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
353 RegisterArg->setRegNum(RegNum);
354 RegisterArg->setIsArg();
355 Arg->setIsArg(false);
356
357 Args[i] = RegisterArg;
358 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
John Portoe0d9afa2015-08-05 10:13:44 -0700359 }
360}
361
John Porto729b5f62015-08-06 07:44:30 -0700362void TargetX8664::lowerRet(const InstRet *Inst) {
363 Variable *Reg = nullptr;
364 if (Inst->hasRetValue()) {
365 Operand *Src0 = legalize(Inst->getRetValue());
John Porto1d235422015-08-12 12:37:53 -0700366 if (isVectorType(Src0->getType()) ||
367 isScalarFloatingType(Src0->getType())) {
John Porto729b5f62015-08-06 07:44:30 -0700368 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
369 } else {
John Porto1d235422015-08-12 12:37:53 -0700370 assert(isScalarIntegerType(Src0->getType()));
John Porto729b5f62015-08-06 07:44:30 -0700371 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
372 }
373 }
374 // Add a ret instruction even if sandboxing is enabled, because
375 // addEpilog explicitly looks for a ret instruction as a marker for
376 // where to insert the frame removal instructions.
377 _ret(Reg);
378 // Add a fake use of esp to make sure esp stays alive for the entire
379 // function. Otherwise post-call esp adjustments get dead-code
380 // eliminated. TODO: Are there more places where the fake use
381 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
382 // have a ret instruction.
383 Variable *esp =
384 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
385 Context.insert(InstFakeUse::create(Func, esp));
386}
387
388void TargetX8664::addProlog(CfgNode *Node) {
389 // Stack frame layout:
390 //
391 // +------------------------+
392 // | 1. return address |
393 // +------------------------+
394 // | 2. preserved registers |
395 // +------------------------+
396 // | 3. padding |
397 // +------------------------+
398 // | 4. global spill area |
399 // +------------------------+
400 // | 5. padding |
401 // +------------------------+
402 // | 6. local spill area |
403 // +------------------------+
404 // | 7. padding |
405 // +------------------------+
406 // | 8. allocas |
407 // +------------------------+
408 //
409 // The following variables record the size in bytes of the given areas:
410 // * X86_RET_IP_SIZE_BYTES: area 1
411 // * PreservedRegsSizeBytes: area 2
412 // * SpillAreaPaddingBytes: area 3
413 // * GlobalsSize: area 4
414 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
415 // * LocalsSpillAreaSize: area 6
416 // * SpillAreaSizeBytes: areas 3 - 7
417
418 // Determine stack frame offsets for each Variable without a
419 // register assignment. This can be done as one variable per stack
420 // slot. Or, do coalescing by running the register allocator again
421 // with an infinite set of registers (as a side effect, this gives
422 // variables a second chance at physical register assignment).
423 //
424 // A middle ground approach is to leverage sparsity and allocate one
425 // block of space on the frame for globals (variables with
426 // multi-block lifetime), and one block to share for locals
427 // (single-block lifetime).
428
429 Context.init(Node);
430 Context.setInsertPoint(Context.getCur());
431
432 llvm::SmallBitVector CalleeSaves =
433 getRegisterSet(RegSet_CalleeSave, RegSet_None);
434 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
435 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
436 size_t GlobalsSize = 0;
437 // If there is a separate locals area, this represents that area.
438 // Otherwise it counts any variable not counted by GlobalsSize.
439 SpillAreaSizeBytes = 0;
440 // If there is a separate locals area, this specifies the alignment
441 // for it.
442 uint32_t LocalsSlotsAlignmentBytes = 0;
443 // The entire spill locations area gets aligned to largest natural
444 // alignment of the variables that have a spill slot.
445 uint32_t SpillAreaAlignmentBytes = 0;
446 // A spill slot linked to a variable with a stack slot should reuse
447 // that stack slot.
448 std::function<bool(Variable *)> TargetVarHook =
449 [&VariablesLinkedToSpillSlots](Variable *Var) {
450 if (auto *SpillVar =
451 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
452 assert(Var->getWeight().isZero());
453 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
454 VariablesLinkedToSpillSlots.push_back(Var);
455 return true;
456 }
457 }
458 return false;
459 };
460
461 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
462 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
463 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
464 &LocalsSlotsAlignmentBytes, TargetVarHook);
465 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
466 SpillAreaSizeBytes += GlobalsSize;
467
468 // Add push instructions for preserved registers.
469 uint32_t NumCallee = 0;
470 size_t PreservedRegsSizeBytes = 0;
471 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
472 if (CalleeSaves[i] && RegsUsed[i]) {
473 ++NumCallee;
474 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
475 _push(getPhysicalRegister(i));
476 }
477 }
478 Ctx->statsUpdateRegistersSaved(NumCallee);
479
480 // Generate "push ebp; mov ebp, esp"
481 if (IsEbpBasedFrame) {
482 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
483 .count() == 0);
484 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
485 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
486 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
487 _push(ebp);
488 _mov(ebp, esp);
489 // Keep ebp live for late-stage liveness analysis
490 // (e.g. asm-verbose mode).
491 Context.insert(InstFakeUse::create(Func, ebp));
492 }
493
494 // Align the variables area. SpillAreaPaddingBytes is the size of
495 // the region after the preserved registers and before the spill areas.
496 // LocalsSlotsPaddingBytes is the amount of padding between the globals
497 // and locals area if they are separate.
498 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
499 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
500 uint32_t SpillAreaPaddingBytes = 0;
501 uint32_t LocalsSlotsPaddingBytes = 0;
502 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
503 SpillAreaAlignmentBytes, GlobalsSize,
504 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
505 &LocalsSlotsPaddingBytes);
506 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
507 uint32_t GlobalsAndSubsequentPaddingSize =
508 GlobalsSize + LocalsSlotsPaddingBytes;
509
510 // Align esp if necessary.
511 if (NeedsStackAlignment) {
512 uint32_t StackOffset =
513 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
514 uint32_t StackSize =
515 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
516 SpillAreaSizeBytes = StackSize - StackOffset;
517 }
518
519 // Generate "sub esp, SpillAreaSizeBytes"
520 if (SpillAreaSizeBytes)
521 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
522 Ctx->getConstantInt32(SpillAreaSizeBytes));
523 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
524
525 resetStackAdjustment();
526
527 // Fill in stack offsets for stack args, and copy args into registers
528 // for those that were register-allocated. Args are pushed right to
529 // left, so Arg[0] is closest to the stack/frame pointer.
530 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
531 size_t BasicFrameOffset =
532 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
533 if (!IsEbpBasedFrame)
534 BasicFrameOffset += SpillAreaSizeBytes;
535
536 const VarList &Args = Func->getArgs();
537 size_t InArgsSizeBytes = 0;
538 unsigned NumXmmArgs = 0;
539 unsigned NumGPRArgs = 0;
540 for (Variable *Arg : Args) {
541 // Skip arguments passed in registers.
John Porto1d235422015-08-12 12:37:53 -0700542 if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
543 if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
544 ++NumXmmArgs;
545 continue;
546 }
547 } else {
548 assert(isScalarIntegerType(Arg->getType()));
549 if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
550 ++NumGPRArgs;
551 continue;
552 }
John Porto729b5f62015-08-06 07:44:30 -0700553 }
554 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
555 }
556
557 // Fill in stack offsets for locals.
558 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
559 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
560 IsEbpBasedFrame);
561 // Assign stack offsets to variables that have been linked to spilled
562 // variables.
563 for (Variable *Var : VariablesLinkedToSpillSlots) {
564 Variable *Linked =
565 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
566 Var->setStackOffset(Linked->getStackOffset());
567 }
568 this->HasComputedFrame = true;
569
570 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
571 OstreamLocker L(Func->getContext());
572 Ostream &Str = Func->getContext()->getStrDump();
573
574 Str << "Stack layout:\n";
575 uint32_t EspAdjustmentPaddingSize =
576 SpillAreaSizeBytes - LocalsSpillAreaSize -
577 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
578 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
579 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
580 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
581 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
582 << " globals spill area = " << GlobalsSize << " bytes\n"
583 << " globals-locals spill areas intermediate padding = "
584 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
585 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
586 << " esp alignment padding = " << EspAdjustmentPaddingSize
587 << " bytes\n";
588
589 Str << "Stack details:\n"
590 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
591 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
592 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
593 << " bytes\n"
594 << " is ebp based = " << IsEbpBasedFrame << "\n";
595 }
596}
597
598void TargetX8664::addEpilog(CfgNode *Node) {
599 InstList &Insts = Node->getInsts();
600 InstList::reverse_iterator RI, E;
601 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
602 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
603 break;
604 }
605 if (RI == E)
606 return;
607
608 // Convert the reverse_iterator position into its corresponding
609 // (forward) iterator position.
610 InstList::iterator InsertPoint = RI.base();
611 --InsertPoint;
612 Context.init(Node);
613 Context.setInsertPoint(InsertPoint);
614
615 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
616 if (IsEbpBasedFrame) {
617 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
618 // For late-stage liveness analysis (e.g. asm-verbose mode),
619 // adding a fake use of esp before the assignment of esp=ebp keeps
620 // previous esp adjustments from being dead-code eliminated.
621 Context.insert(InstFakeUse::create(Func, esp));
622 _mov(esp, ebp);
623 _pop(ebp);
624 } else {
625 // add esp, SpillAreaSizeBytes
626 if (SpillAreaSizeBytes)
627 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
628 }
629
630 // Add pop instructions for preserved registers.
631 llvm::SmallBitVector CalleeSaves =
632 getRegisterSet(RegSet_CalleeSave, RegSet_None);
633 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
634 SizeT j = CalleeSaves.size() - i - 1;
635 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
636 continue;
637 if (CalleeSaves[j] && RegsUsed[j]) {
638 _pop(getPhysicalRegister(j));
639 }
640 }
641
John Porto1d235422015-08-12 12:37:53 -0700642 if (Ctx->getFlags().getUseSandboxing()) {
643 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
John Porto729b5f62015-08-06 07:44:30 -0700644 }
John Porto729b5f62015-08-06 07:44:30 -0700645}
646
647void TargetX8664::emitJumpTable(const Cfg *Func,
648 const InstJumpTable *JumpTable) const {
649 if (!BuildDefs::dump())
650 return;
651 Ostream &Str = Ctx->getStrEmit();
652 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
653 Str << "\t.section\t.rodata." << MangledName
654 << "$jumptable,\"a\",@progbits\n";
655 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
656 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
657
658 // On X8664 ILP32 pointers are 32-bit hence the use of .long
659 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
660 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
661 Str << "\n";
662}
663
John Porto453660f2015-07-31 14:52:52 -0700664namespace {
665template <typename T> struct PoolTypeConverter {};
666
667template <> struct PoolTypeConverter<float> {
668 typedef uint32_t PrimitiveIntType;
669 typedef ConstantFloat IceType;
670 static const Type Ty = IceType_f32;
671 static const char *TypeName;
672 static const char *AsmTag;
673 static const char *PrintfString;
674};
675const char *PoolTypeConverter<float>::TypeName = "float";
676const char *PoolTypeConverter<float>::AsmTag = ".long";
677const char *PoolTypeConverter<float>::PrintfString = "0x%x";
678
679template <> struct PoolTypeConverter<double> {
680 typedef uint64_t PrimitiveIntType;
681 typedef ConstantDouble IceType;
682 static const Type Ty = IceType_f64;
683 static const char *TypeName;
684 static const char *AsmTag;
685 static const char *PrintfString;
686};
687const char *PoolTypeConverter<double>::TypeName = "double";
688const char *PoolTypeConverter<double>::AsmTag = ".quad";
689const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
690
691// Add converter for int type constant pooling
692template <> struct PoolTypeConverter<uint32_t> {
693 typedef uint32_t PrimitiveIntType;
694 typedef ConstantInteger32 IceType;
695 static const Type Ty = IceType_i32;
696 static const char *TypeName;
697 static const char *AsmTag;
698 static const char *PrintfString;
699};
700const char *PoolTypeConverter<uint32_t>::TypeName = "i32";
701const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";
702const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";
703
704// Add converter for int type constant pooling
705template <> struct PoolTypeConverter<uint16_t> {
706 typedef uint32_t PrimitiveIntType;
707 typedef ConstantInteger32 IceType;
708 static const Type Ty = IceType_i16;
709 static const char *TypeName;
710 static const char *AsmTag;
711 static const char *PrintfString;
712};
713const char *PoolTypeConverter<uint16_t>::TypeName = "i16";
714const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";
715const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";
716
717// Add converter for int type constant pooling
718template <> struct PoolTypeConverter<uint8_t> {
719 typedef uint32_t PrimitiveIntType;
720 typedef ConstantInteger32 IceType;
721 static const Type Ty = IceType_i8;
722 static const char *TypeName;
723 static const char *AsmTag;
724 static const char *PrintfString;
725};
726const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
727const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
728const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
729} // end of anonymous namespace
730
731template <typename T>
732void TargetDataX8664::emitConstantPool(GlobalContext *Ctx) {
733 if (!BuildDefs::dump())
734 return;
735 Ostream &Str = Ctx->getStrEmit();
736 Type Ty = T::Ty;
737 SizeT Align = typeAlignInBytes(Ty);
738 ConstantList Pool = Ctx->getConstantPool(Ty);
739
740 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
741 << "\n";
742 Str << "\t.align\t" << Align << "\n";
743
744 // If reorder-pooled-constants option is set to true, we need to shuffle the
745 // constant pool before emitting it.
Qining Luaee5fa82015-08-20 14:59:03 -0700746 if (Ctx->getFlags().shouldReorderPooledConstants()) {
747 // Use the constant's kind value as the salt for creating random number
748 // generator.
749 Operand::OperandKind K = (*Pool.begin())->getKind();
750 RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
751 RPE_PooledConstantReordering, K);
752 RandomShuffle(Pool.begin(), Pool.end(),
753 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
754 }
John Porto453660f2015-07-31 14:52:52 -0700755
756 for (Constant *C : Pool) {
757 if (!C->getShouldBePooled())
758 continue;
759 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
760 typename T::IceType::PrimType Value = Const->getValue();
761 // Use memcpy() to copy bits from Value into RawValue in a way
762 // that avoids breaking strict-aliasing rules.
763 typename T::PrimitiveIntType RawValue;
764 memcpy(&RawValue, &Value, sizeof(Value));
765 char buf[30];
766 int CharsPrinted =
767 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
768 assert(CharsPrinted >= 0 &&
769 (size_t)CharsPrinted < llvm::array_lengthof(buf));
770 (void)CharsPrinted; // avoid warnings if asserts are disabled
771 Const->emitPoolLabel(Str);
772 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
773 << Value << "\n";
774 }
John Portod58f01c2015-06-23 15:55:17 -0700775}
776
777void TargetDataX8664::lowerConstants() {
John Porto453660f2015-07-31 14:52:52 -0700778 if (Ctx->getFlags().getDisableTranslation())
779 return;
780 // No need to emit constants from the int pool since (for x86) they
781 // are embedded as immediates in the instructions, just emit float/double.
782 switch (Ctx->getFlags().getOutFileType()) {
783 case FT_Elf: {
784 ELFObjectWriter *Writer = Ctx->getObjectWriter();
785
786 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
787 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
788 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
789
790 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
791 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
792 } break;
793 case FT_Asm:
794 case FT_Iasm: {
795 OstreamLocker L(Ctx);
796
797 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
798 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
799 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
800
801 emitConstantPool<PoolTypeConverter<float>>(Ctx);
802 emitConstantPool<PoolTypeConverter<double>>(Ctx);
803 } break;
804 }
805}
806
John Porto729b5f62015-08-06 07:44:30 -0700807void TargetDataX8664::lowerJumpTables() {
808 switch (Ctx->getFlags().getOutFileType()) {
809 case FT_Elf: {
810 ELFObjectWriter *Writer = Ctx->getObjectWriter();
811 for (const JumpTableData &JumpTable : Ctx->getJumpTables())
John Porto1d235422015-08-12 12:37:53 -0700812 Writer->writeJumpTable(JumpTable, TargetX8664::Traits::RelFixup);
John Porto729b5f62015-08-06 07:44:30 -0700813 } break;
814 case FT_Asm:
815 // Already emitted from Cfg
816 break;
817 case FT_Iasm: {
818 if (!BuildDefs::dump())
819 return;
820 Ostream &Str = Ctx->getStrEmit();
821 for (const JumpTableData &JT : Ctx->getJumpTables()) {
822 Str << "\t.section\t.rodata." << JT.getFunctionName()
823 << "$jumptable,\"a\",@progbits\n";
824 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
825 Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
John Porto453660f2015-07-31 14:52:52 -0700826
John Porto729b5f62015-08-06 07:44:30 -0700827 // On X8664 ILP32 pointers are 32-bit hence the use of .long
828 for (intptr_t TargetOffset : JT.getTargetOffsets())
829 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
830 Str << "\n";
831 }
832 } break;
833 }
John Portod58f01c2015-06-23 15:55:17 -0700834}
835
John Porto453660f2015-07-31 14:52:52 -0700836void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
837 const IceString &SectionSuffix) {
838 switch (Ctx->getFlags().getOutFileType()) {
839 case FT_Elf: {
840 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto1d235422015-08-12 12:37:53 -0700841 Writer->writeDataSection(Vars, TargetX8664::Traits::RelFixup,
842 SectionSuffix);
John Porto453660f2015-07-31 14:52:52 -0700843 } break;
844 case FT_Asm:
845 case FT_Iasm: {
846 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
847 OstreamLocker L(Ctx);
848 for (const VariableDeclaration *Var : Vars) {
849 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
850 emitGlobal(*Var, SectionSuffix);
851 }
852 }
853 } break;
854 }
855}
856
857// In some cases, there are x-macros tables for both high-level and
858// low-level instructions/operands that use the same enum key value.
859// The tables are kept separate to maintain a proper separation
860// between abstraction layers. There is a risk that the tables could
861// get out of sync if enum values are reordered or if entries are
862// added or deleted. The following dummy namespaces use
863// static_asserts to ensure everything is kept in sync.
864
865namespace {
866// Validate the enum values in FCMPX8664_TABLE.
867namespace dummy1 {
868// Define a temporary set of enum values based on low-level table
869// entries.
870enum _tmp_enum {
871#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
872 FCMPX8664_TABLE
873#undef X
874 _num
875};
876// Define a set of constants based on high-level table entries.
877#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
878ICEINSTFCMP_TABLE
879#undef X
880// Define a set of constants based on low-level table entries, and
881// ensure the table entry keys are consistent.
882#define X(val, dflt, swapS, C1, C2, swapV, pred) \
883 static const int _table2_##val = _tmp_##val; \
884 static_assert( \
885 _table1_##val == _table2_##val, \
886 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
887FCMPX8664_TABLE
888#undef X
889// Repeat the static asserts with respect to the high-level table
890// entries in case the high-level table has extra entries.
891#define X(tag, str) \
892 static_assert( \
893 _table1_##tag == _table2_##tag, \
894 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
895ICEINSTFCMP_TABLE
896#undef X
897} // end of namespace dummy1
898
899// Validate the enum values in ICMPX8664_TABLE.
900namespace dummy2 {
901// Define a temporary set of enum values based on low-level table
902// entries.
903enum _tmp_enum {
904#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
905 ICMPX8664_TABLE
906#undef X
907 _num
908};
909// Define a set of constants based on high-level table entries.
910#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
911ICEINSTICMP_TABLE
912#undef X
913// Define a set of constants based on low-level table entries, and
914// ensure the table entry keys are consistent.
915#define X(val, C_32, C1_64, C2_64, C3_64) \
916 static const int _table2_##val = _tmp_##val; \
917 static_assert( \
918 _table1_##val == _table2_##val, \
919 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
920ICMPX8664_TABLE
921#undef X
922// Repeat the static asserts with respect to the high-level table
923// entries in case the high-level table has extra entries.
924#define X(tag, str) \
925 static_assert( \
926 _table1_##tag == _table2_##tag, \
927 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
928ICEINSTICMP_TABLE
929#undef X
930} // end of namespace dummy2
931
932// Validate the enum values in ICETYPEX8664_TABLE.
933namespace dummy3 {
934// Define a temporary set of enum values based on low-level table
935// entries.
936enum _tmp_enum {
937#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
938 ICETYPEX8664_TABLE
939#undef X
940 _num
941};
942// Define a set of constants based on high-level table entries.
943#define X(tag, sizeLog2, align, elts, elty, str) \
944 static const int _table1_##tag = tag;
945ICETYPE_TABLE
946#undef X
947// Define a set of constants based on low-level table entries, and
948// ensure the table entry keys are consistent.
949#define X(tag, elementty, cvt, sdss, pack, width, fld) \
950 static const int _table2_##tag = _tmp_##tag; \
951 static_assert(_table1_##tag == _table2_##tag, \
952 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
953ICETYPEX8664_TABLE
954#undef X
955// Repeat the static asserts with respect to the high-level table
956// entries in case the high-level table has extra entries.
957#define X(tag, sizeLog2, align, elts, elty, str) \
958 static_assert(_table1_##tag == _table2_##tag, \
959 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
960ICETYPE_TABLE
961#undef X
962} // end of namespace dummy3
963} // end of anonymous namespace
964
John Portod58f01c2015-06-23 15:55:17 -0700965} // end of namespace Ice