blob: 9bb2386b387ff42631ad4dce6e8361e1f7b0e7f9 [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringARM32 class, which consists almost
11// entirely of the lowering sequence for each high-level instruction.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Support/MathExtras.h"
16
17#include "IceCfg.h"
18#include "IceCfgNode.h"
19#include "IceClFlags.h"
20#include "IceDefs.h"
21#include "IceELFObjectWriter.h"
22#include "IceGlobalInits.h"
23#include "IceInstARM32.h"
24#include "IceLiveness.h"
25#include "IceOperand.h"
26#include "IceRegistersARM32.h"
27#include "IceTargetLoweringARM32.def"
28#include "IceTargetLoweringARM32.h"
29#include "IceUtils.h"
30
31namespace Ice {
32
Jan Voungb2d50842015-05-12 09:53:50 -070033namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070034
Jan Voungb2d50842015-05-12 09:53:50 -070035void UnimplementedError(const ClFlags &Flags) {
36 if (!Flags.getSkipUnimplemented()) {
37 // Use llvm_unreachable instead of report_fatal_error, which gives better
38 // stack traces.
39 llvm_unreachable("Not yet implemented");
40 abort();
41 }
42}
Jan Voungb3401d22015-05-18 09:38:21 -070043
Jan Voung3bfd99a2015-05-22 16:35:25 -070044// The following table summarizes the logic for lowering the icmp instruction
45// for i32 and narrower types. Each icmp condition has a clear mapping to an
46// ARM32 conditional move instruction.
47
48const struct TableIcmp32_ {
49 CondARM32::Cond Mapping;
50} TableIcmp32[] = {
51#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
52 { CondARM32::C_32 } \
53 ,
54 ICMPARM32_TABLE
55#undef X
56};
57const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
58
59// The following table summarizes the logic for lowering the icmp instruction
60// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
61// The operands may need to be swapped, and there is a slight difference
62// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
63const struct TableIcmp64_ {
64 bool IsSigned;
65 bool Swapped;
66 CondARM32::Cond C1, C2;
67} TableIcmp64[] = {
68#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
69 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
70 ,
71 ICMPARM32_TABLE
72#undef X
73};
74const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
75
76CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
77 size_t Index = static_cast<size_t>(Cond);
78 assert(Index < TableIcmp32Size);
79 return TableIcmp32[Index].Mapping;
80}
81
82// In some cases, there are x-macros tables for both high-level and
83// low-level instructions/operands that use the same enum key value.
84// The tables are kept separate to maintain a proper separation
85// between abstraction layers. There is a risk that the tables could
86// get out of sync if enum values are reordered or if entries are
87// added or deleted. The following dummy namespaces use
88// static_asserts to ensure everything is kept in sync.
89
90// Validate the enum values in ICMPARM32_TABLE.
91namespace dummy1 {
92// Define a temporary set of enum values based on low-level table
93// entries.
94enum _tmp_enum {
95#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
96 ICMPARM32_TABLE
97#undef X
98 _num
99};
100// Define a set of constants based on high-level table entries.
101#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
102ICEINSTICMP_TABLE
103#undef X
104// Define a set of constants based on low-level table entries, and
105// ensure the table entry keys are consistent.
106#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
107 static const int _table2_##val = _tmp_##val; \
108 static_assert( \
109 _table1_##val == _table2_##val, \
110 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
111ICMPARM32_TABLE
112#undef X
113// Repeat the static asserts with respect to the high-level table
114// entries in case the high-level table has extra entries.
115#define X(tag, str) \
116 static_assert( \
117 _table1_##tag == _table2_##tag, \
118 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
119ICEINSTICMP_TABLE
120#undef X
121} // end of namespace dummy1
122
Jan Voungb3401d22015-05-18 09:38:21 -0700123// The maximum number of arguments to pass in GPR registers.
124const uint32_t ARM32_MAX_GPR_ARG = 4;
125
Jan Voung55500db2015-05-26 14:25:40 -0700126// Stack alignment
127const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
128
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700129// Value is in bytes. Return Value adjusted to the next highest multiple
130// of the stack alignment.
131uint32_t applyStackAlignment(uint32_t Value) {
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
133}
134
Jan Voungb2d50842015-05-12 09:53:50 -0700135} // end of anonymous namespace
136
Jan Voungb36ad9b2015-04-21 17:01:49 -0700137TargetARM32::TargetARM32(Cfg *Func)
Jan Voungd062f732015-06-15 17:17:31 -0700138 : TargetLowering(Func), InstructionSet(ARM32InstructionSet::Begin),
139 UsesFramePointer(false), NeedsStackAlignment(false), MaybeLeafFunc(true),
140 SpillAreaSizeBytes(0) {
141 static_assert(
142 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
143 (TargetInstructionSet::ARM32InstructionSet_End -
144 TargetInstructionSet::ARM32InstructionSet_Begin),
145 "ARM32InstructionSet range different from TargetInstructionSet");
146 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
147 TargetInstructionSet::BaseInstructionSet) {
148 InstructionSet = static_cast<ARM32InstructionSet>(
149 (Func->getContext()->getFlags().getTargetInstructionSet() -
150 TargetInstructionSet::ARM32InstructionSet_Begin) +
151 ARM32InstructionSet::Begin);
152 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700153 // TODO: Don't initialize IntegerRegisters and friends every time.
154 // Instead, initialize in some sort of static initializer for the
155 // class.
156 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
157 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
158 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
159 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
160 ScratchRegs.resize(RegARM32::Reg_NUM);
161#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
162 isFP) \
163 IntegerRegisters[RegARM32::val] = isInt; \
164 FloatRegisters[RegARM32::val] = isFP; \
165 VectorRegisters[RegARM32::val] = isFP; \
166 ScratchRegs[RegARM32::val] = scratch;
167 REGARM32_TABLE;
168#undef X
169 TypeToRegisterSet[IceType_void] = InvalidRegisters;
170 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
171 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
172 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
173 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
174 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
175 TypeToRegisterSet[IceType_f32] = FloatRegisters;
176 TypeToRegisterSet[IceType_f64] = FloatRegisters;
177 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
178 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
179 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
180 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
181 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
182 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
183 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
184}
185
186void TargetARM32::translateO2() {
187 TimerMarker T(TimerStack::TT_O2, Func);
188
189 // TODO(stichnot): share passes with X86?
190 // https://code.google.com/p/nativeclient/issues/detail?id=4094
191
192 if (!Ctx->getFlags().getPhiEdgeSplit()) {
193 // Lower Phi instructions.
194 Func->placePhiLoads();
195 if (Func->hasError())
196 return;
197 Func->placePhiStores();
198 if (Func->hasError())
199 return;
200 Func->deletePhis();
201 if (Func->hasError())
202 return;
203 Func->dump("After Phi lowering");
204 }
205
206 // Address mode optimization.
207 Func->getVMetadata()->init(VMK_SingleDefs);
208 Func->doAddressOpt();
209
210 // Argument lowering
211 Func->doArgLowering();
212
213 // Target lowering. This requires liveness analysis for some parts
214 // of the lowering decisions, such as compare/branch fusing. If
215 // non-lightweight liveness analysis is used, the instructions need
216 // to be renumbered first. TODO: This renumbering should only be
217 // necessary if we're actually calculating live intervals, which we
218 // only do for register allocation.
219 Func->renumberInstructions();
220 if (Func->hasError())
221 return;
222
223 // TODO: It should be sufficient to use the fastest liveness
224 // calculation, i.e. livenessLightweight(). However, for some
225 // reason that slows down the rest of the translation. Investigate.
226 Func->liveness(Liveness_Basic);
227 if (Func->hasError())
228 return;
229 Func->dump("After ARM32 address mode opt");
230
231 Func->genCode();
232 if (Func->hasError())
233 return;
234 Func->dump("After ARM32 codegen");
235
236 // Register allocation. This requires instruction renumbering and
237 // full liveness analysis.
238 Func->renumberInstructions();
239 if (Func->hasError())
240 return;
241 Func->liveness(Liveness_Intervals);
242 if (Func->hasError())
243 return;
244 // Validate the live range computations. The expensive validation
245 // call is deliberately only made when assertions are enabled.
246 assert(Func->validateLiveness());
247 // The post-codegen dump is done here, after liveness analysis and
248 // associated cleanup, to make the dump cleaner and more useful.
249 Func->dump("After initial ARM32 codegen");
250 Func->getVMetadata()->init(VMK_All);
251 regAlloc(RAK_Global);
252 if (Func->hasError())
253 return;
254 Func->dump("After linear scan regalloc");
255
256 if (Ctx->getFlags().getPhiEdgeSplit()) {
257 Func->advancedPhiLowering();
258 Func->dump("After advanced Phi lowering");
259 }
260
261 // Stack frame mapping.
262 Func->genFrame();
263 if (Func->hasError())
264 return;
265 Func->dump("After stack frame mapping");
266
267 Func->contractEmptyNodes();
268 Func->reorderNodes();
269
270 // Branch optimization. This needs to be done just before code
271 // emission. In particular, no transformations that insert or
272 // reorder CfgNodes should be done after branch optimization. We go
273 // ahead and do it before nop insertion to reduce the amount of work
274 // needed for searching for opportunities.
275 Func->doBranchOpt();
276 Func->dump("After branch optimization");
277
278 // Nop insertion
279 if (Ctx->getFlags().shouldDoNopInsertion()) {
280 Func->doNopInsertion();
281 }
282}
283
284void TargetARM32::translateOm1() {
285 TimerMarker T(TimerStack::TT_Om1, Func);
286
287 // TODO: share passes with X86?
288
289 Func->placePhiLoads();
290 if (Func->hasError())
291 return;
292 Func->placePhiStores();
293 if (Func->hasError())
294 return;
295 Func->deletePhis();
296 if (Func->hasError())
297 return;
298 Func->dump("After Phi lowering");
299
300 Func->doArgLowering();
301
302 Func->genCode();
303 if (Func->hasError())
304 return;
305 Func->dump("After initial ARM32 codegen");
306
307 regAlloc(RAK_InfOnly);
308 if (Func->hasError())
309 return;
310 Func->dump("After regalloc of infinite-weight variables");
311
312 Func->genFrame();
313 if (Func->hasError())
314 return;
315 Func->dump("After stack frame mapping");
316
317 // Nop insertion
318 if (Ctx->getFlags().shouldDoNopInsertion()) {
319 Func->doNopInsertion();
320 }
321}
322
323bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700324 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
325 return Br->optimizeBranch(NextNode);
326 }
Jan Voungb2d50842015-05-12 09:53:50 -0700327 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700328}
329
330IceString TargetARM32::RegNames[] = {
331#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
332 isFP) \
333 name,
334 REGARM32_TABLE
335#undef X
336};
337
338IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
339 assert(RegNum < RegARM32::Reg_NUM);
340 (void)Ty;
341 return RegNames[RegNum];
342}
343
344Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
345 if (Ty == IceType_void)
346 Ty = IceType_i32;
347 if (PhysicalRegisters[Ty].empty())
348 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
349 assert(RegNum < PhysicalRegisters[Ty].size());
350 Variable *Reg = PhysicalRegisters[Ty][RegNum];
351 if (Reg == nullptr) {
352 Reg = Func->makeVariable(Ty);
353 Reg->setRegNum(RegNum);
354 PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voungb2d50842015-05-12 09:53:50 -0700355 // Specially mark SP and LR as an "argument" so that it is considered
Jan Voungb36ad9b2015-04-21 17:01:49 -0700356 // live upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700357 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700358 Func->addImplicitArg(Reg);
359 Reg->setIgnoreLiveness();
360 }
361 }
362 return Reg;
363}
364
365void TargetARM32::emitVariable(const Variable *Var) const {
366 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700367 if (Var->hasReg()) {
368 Str << getRegName(Var->getRegNum(), Var->getType());
369 return;
370 }
371 if (Var->getWeight().isInf()) {
372 llvm::report_fatal_error(
373 "Infinite-weight Variable has no register assigned");
374 }
375 int32_t Offset = Var->getStackOffset();
376 if (!hasFramePointer())
377 Offset += getStackAdjustment();
378 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register
379 // to materialize a larger offset.
380 const bool SignExt = false;
381 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
382 llvm::report_fatal_error("Illegal stack offset");
383 }
384 const Type FrameSPTy = IceType_i32;
Jan Voungb3401d22015-05-18 09:38:21 -0700385 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
386 if (Offset != 0) {
387 Str << ", " << getConstantPrefix() << Offset;
388 }
389 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700390}
391
392void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700393 VarList &Args = Func->getArgs();
394 // The first few integer type parameters can use r0-r3, regardless of their
395 // position relative to the floating-point/vector arguments in the argument
396 // list. Floating-point and vector arguments can use q0-q3 (aka d0-d7,
397 // s0-s15).
398 unsigned NumGPRRegsUsed = 0;
399
400 // For each register argument, replace Arg in the argument list with the
401 // home register. Then generate an instruction in the prolog to copy the
402 // home register to the assigned location of Arg.
403 Context.init(Func->getEntryNode());
404 Context.setInsertPoint(Context.getCur());
405
406 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
407 Variable *Arg = Args[I];
408 Type Ty = Arg->getType();
409 // TODO(jvoung): handle float/vector types.
410 if (isVectorType(Ty)) {
411 UnimplementedError(Func->getContext()->getFlags());
412 continue;
413 } else if (isFloatingType(Ty)) {
414 UnimplementedError(Func->getContext()->getFlags());
415 continue;
416 } else if (Ty == IceType_i64) {
417 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
418 continue;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700419 int32_t RegLo;
420 int32_t RegHi;
Jan Voungb3401d22015-05-18 09:38:21 -0700421 // Always start i64 registers at an even register, so this may end
422 // up padding away a register.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700423 if (NumGPRRegsUsed % 2 != 0) {
Jan Voungb3401d22015-05-18 09:38:21 -0700424 ++NumGPRRegsUsed;
425 }
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700426 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
427 ++NumGPRRegsUsed;
Jan Voungb3401d22015-05-18 09:38:21 -0700428 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
429 ++NumGPRRegsUsed;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700430 // If this bumps us past the boundary, don't allocate to a register
431 // and leave any previously speculatively consumed registers as consumed.
432 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
433 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700434 Variable *RegisterArg = Func->makeVariable(Ty);
435 Variable *RegisterLo = Func->makeVariable(IceType_i32);
436 Variable *RegisterHi = Func->makeVariable(IceType_i32);
437 if (ALLOW_DUMP) {
438 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
439 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
440 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
441 }
442 RegisterLo->setRegNum(RegLo);
443 RegisterLo->setIsArg();
444 RegisterHi->setRegNum(RegHi);
445 RegisterHi->setIsArg();
446 RegisterArg->setLoHi(RegisterLo, RegisterHi);
447 RegisterArg->setIsArg();
448 Arg->setIsArg(false);
449
450 Args[I] = RegisterArg;
451 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
452 continue;
453 } else {
454 assert(Ty == IceType_i32);
455 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
456 continue;
457 int32_t RegNum = RegARM32::Reg_r0 + NumGPRRegsUsed;
458 ++NumGPRRegsUsed;
459 Variable *RegisterArg = Func->makeVariable(Ty);
460 if (ALLOW_DUMP) {
461 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
462 }
463 RegisterArg->setRegNum(RegNum);
464 RegisterArg->setIsArg();
465 Arg->setIsArg(false);
466
467 Args[I] = RegisterArg;
468 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
469 }
470 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700471}
472
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700473// Helper function for addProlog().
474//
475// This assumes Arg is an argument passed on the stack. This sets the
476// frame offset for Arg and updates InArgsSizeBytes according to Arg's
477// width. For an I64 arg that has been split into Lo and Hi components,
478// it calls itself recursively on the components, taking care to handle
479// Lo first because of the little-endian architecture. Lastly, this
480// function generates an instruction to copy Arg into its assigned
481// register if applicable.
482void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
483 size_t BasicFrameOffset,
484 size_t &InArgsSizeBytes) {
485 Variable *Lo = Arg->getLo();
486 Variable *Hi = Arg->getHi();
487 Type Ty = Arg->getType();
488 if (Lo && Hi && Ty == IceType_i64) {
489 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
490 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
491 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
492 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
493 return;
494 }
495 if (isVectorType(Ty)) {
496 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
497 }
498 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
499 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
500 // If the argument variable has been assigned a register, we need to load
501 // the value from the stack slot.
502 if (Arg->hasReg()) {
503 assert(Ty != IceType_i64);
504 OperandARM32Mem *Mem = OperandARM32Mem::create(
505 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
506 Ctx->getConstantInt32(Arg->getStackOffset())));
507 if (isVectorType(Arg->getType())) {
508 UnimplementedError(Func->getContext()->getFlags());
509 } else {
510 _ldr(Arg, Mem);
511 }
512 // This argument-copying instruction uses an explicit
513 // OperandARM32Mem operand instead of a Variable, so its
514 // fill-from-stack operation has to be tracked separately for
515 // statistics.
516 Ctx->statsUpdateFills();
517 }
518}
519
Jan Voungb36ad9b2015-04-21 17:01:49 -0700520Type TargetARM32::stackSlotType() { return IceType_i32; }
521
522void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700523 // Stack frame layout:
524 //
525 // +------------------------+
526 // | 1. preserved registers |
527 // +------------------------+
528 // | 2. padding |
529 // +------------------------+
530 // | 3. global spill area |
531 // +------------------------+
532 // | 4. padding |
533 // +------------------------+
534 // | 5. local spill area |
535 // +------------------------+
536 // | 6. padding |
537 // +------------------------+
538 // | 7. allocas |
539 // +------------------------+
540 //
541 // The following variables record the size in bytes of the given areas:
542 // * PreservedRegsSizeBytes: area 1
543 // * SpillAreaPaddingBytes: area 2
544 // * GlobalsSize: area 3
545 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
546 // * LocalsSpillAreaSize: area 5
547 // * SpillAreaSizeBytes: areas 2 - 6
548 // Determine stack frame offsets for each Variable without a
549 // register assignment. This can be done as one variable per stack
550 // slot. Or, do coalescing by running the register allocator again
551 // with an infinite set of registers (as a side effect, this gives
552 // variables a second chance at physical register assignment).
553 //
554 // A middle ground approach is to leverage sparsity and allocate one
555 // block of space on the frame for globals (variables with
556 // multi-block lifetime), and one block to share for locals
557 // (single-block lifetime).
558
559 Context.init(Node);
560 Context.setInsertPoint(Context.getCur());
561
562 llvm::SmallBitVector CalleeSaves =
563 getRegisterSet(RegSet_CalleeSave, RegSet_None);
564 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
565 VarList SortedSpilledVariables;
566 size_t GlobalsSize = 0;
567 // If there is a separate locals area, this represents that area.
568 // Otherwise it counts any variable not counted by GlobalsSize.
569 SpillAreaSizeBytes = 0;
570 // If there is a separate locals area, this specifies the alignment
571 // for it.
572 uint32_t LocalsSlotsAlignmentBytes = 0;
573 // The entire spill locations area gets aligned to largest natural
574 // alignment of the variables that have a spill slot.
575 uint32_t SpillAreaAlignmentBytes = 0;
576 // For now, we don't have target-specific variables that need special
577 // treatment (no stack-slot-linked SpillVariable type).
578 std::function<bool(Variable *)> TargetVarHook =
579 [](Variable *) { return false; };
580
581 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
582 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
583 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
584 &LocalsSlotsAlignmentBytes, TargetVarHook);
585 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
586 SpillAreaSizeBytes += GlobalsSize;
587
588 // Add push instructions for preserved registers.
589 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
590 // Unlike x86, ARM also has callee-saved float/vector registers.
591 // The "vpush" instruction can handle a whole list of float/vector
592 // registers, but it only handles contiguous sequences of registers
593 // by specifying the start and the length.
594 VarList GPRsToPreserve;
595 GPRsToPreserve.reserve(CalleeSaves.size());
596 uint32_t NumCallee = 0;
597 size_t PreservedRegsSizeBytes = 0;
598 // Consider FP and LR as callee-save / used as needed.
599 if (UsesFramePointer) {
600 CalleeSaves[RegARM32::Reg_fp] = true;
601 assert(RegsUsed[RegARM32::Reg_fp] == false);
602 RegsUsed[RegARM32::Reg_fp] = true;
603 }
604 if (!MaybeLeafFunc) {
605 CalleeSaves[RegARM32::Reg_lr] = true;
606 RegsUsed[RegARM32::Reg_lr] = true;
607 }
608 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
609 if (CalleeSaves[i] && RegsUsed[i]) {
610 // TODO(jvoung): do separate vpush for each floating point
611 // register segment and += 4, or 8 depending on type.
612 ++NumCallee;
613 PreservedRegsSizeBytes += 4;
614 GPRsToPreserve.push_back(getPhysicalRegister(i));
615 }
616 }
617 Ctx->statsUpdateRegistersSaved(NumCallee);
618 if (!GPRsToPreserve.empty())
619 _push(GPRsToPreserve);
620
621 // Generate "mov FP, SP" if needed.
622 if (UsesFramePointer) {
623 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
624 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
625 _mov(FP, SP);
626 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
627 Context.insert(InstFakeUse::create(Func, FP));
628 }
629
630 // Align the variables area. SpillAreaPaddingBytes is the size of
631 // the region after the preserved registers and before the spill areas.
632 // LocalsSlotsPaddingBytes is the amount of padding between the globals
633 // and locals area if they are separate.
634 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
635 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
636 uint32_t SpillAreaPaddingBytes = 0;
637 uint32_t LocalsSlotsPaddingBytes = 0;
638 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
639 GlobalsSize, LocalsSlotsAlignmentBytes,
640 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
641 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
642 uint32_t GlobalsAndSubsequentPaddingSize =
643 GlobalsSize + LocalsSlotsPaddingBytes;
644
645 // Align SP if necessary.
646 if (NeedsStackAlignment) {
647 uint32_t StackOffset = PreservedRegsSizeBytes;
648 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
649 SpillAreaSizeBytes = StackSize - StackOffset;
650 }
651
652 // Generate "sub sp, SpillAreaSizeBytes"
653 if (SpillAreaSizeBytes) {
654 // Use the IP inter-procedural scratch register if needed to legalize
655 // the immediate.
656 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
657 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
658 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
659 _sub(SP, SP, SubAmount);
660 }
661 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
662
663 resetStackAdjustment();
664
665 // Fill in stack offsets for stack args, and copy args into registers
666 // for those that were register-allocated. Args are pushed right to
667 // left, so Arg[0] is closest to the stack/frame pointer.
668 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
669 size_t BasicFrameOffset = PreservedRegsSizeBytes;
670 if (!UsesFramePointer)
671 BasicFrameOffset += SpillAreaSizeBytes;
672
673 const VarList &Args = Func->getArgs();
674 size_t InArgsSizeBytes = 0;
675 unsigned NumGPRArgs = 0;
676 for (Variable *Arg : Args) {
677 Type Ty = Arg->getType();
678 // Skip arguments passed in registers.
679 if (isVectorType(Ty)) {
680 UnimplementedError(Func->getContext()->getFlags());
681 continue;
682 } else if (isFloatingType(Ty)) {
683 UnimplementedError(Func->getContext()->getFlags());
684 continue;
685 } else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) {
686 // Start at an even register.
687 if (NumGPRArgs % 2 == 1) {
688 ++NumGPRArgs;
689 }
690 NumGPRArgs += 2;
691 if (NumGPRArgs <= ARM32_MAX_GPR_ARG)
692 continue;
693 } else if (NumGPRArgs < ARM32_MAX_GPR_ARG) {
694 ++NumGPRArgs;
695 continue;
696 }
697 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
698 }
699
700 // Fill in stack offsets for locals.
701 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
702 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
703 UsesFramePointer);
704 this->HasComputedFrame = true;
705
706 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
707 OstreamLocker L(Func->getContext());
708 Ostream &Str = Func->getContext()->getStrDump();
709
710 Str << "Stack layout:\n";
711 uint32_t SPAdjustmentPaddingSize =
712 SpillAreaSizeBytes - LocalsSpillAreaSize -
713 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
714 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
715 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
716 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
717 << " globals spill area = " << GlobalsSize << " bytes\n"
718 << " globals-locals spill areas intermediate padding = "
719 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
720 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
721 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
722
723 Str << "Stack details:\n"
724 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
725 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
726 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
727 << " bytes\n"
728 << " is FP based = " << UsesFramePointer << "\n";
729 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700730}
731
732void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700733 InstList &Insts = Node->getInsts();
734 InstList::reverse_iterator RI, E;
735 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
736 if (llvm::isa<InstARM32Ret>(*RI))
737 break;
738 }
739 if (RI == E)
740 return;
741
742 // Convert the reverse_iterator position into its corresponding
743 // (forward) iterator position.
744 InstList::iterator InsertPoint = RI.base();
745 --InsertPoint;
746 Context.init(Node);
747 Context.setInsertPoint(InsertPoint);
748
749 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
750 if (UsesFramePointer) {
751 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
752 // For late-stage liveness analysis (e.g. asm-verbose mode),
753 // adding a fake use of SP before the assignment of SP=FP keeps
754 // previous SP adjustments from being dead-code eliminated.
755 Context.insert(InstFakeUse::create(Func, SP));
756 _mov(SP, FP);
757 } else {
758 // add SP, SpillAreaSizeBytes
759 if (SpillAreaSizeBytes) {
760 // Use the IP inter-procedural scratch register if needed to legalize
761 // the immediate. It shouldn't be live at this point.
762 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
763 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
764 _add(SP, SP, AddAmount);
765 }
766 }
767
768 // Add pop instructions for preserved registers.
769 llvm::SmallBitVector CalleeSaves =
770 getRegisterSet(RegSet_CalleeSave, RegSet_None);
771 VarList GPRsToRestore;
772 GPRsToRestore.reserve(CalleeSaves.size());
773 // Consider FP and LR as callee-save / used as needed.
774 if (UsesFramePointer) {
775 CalleeSaves[RegARM32::Reg_fp] = true;
776 }
777 if (!MaybeLeafFunc) {
778 CalleeSaves[RegARM32::Reg_lr] = true;
779 }
780 // Pop registers in ascending order just like push
781 // (instead of in reverse order).
782 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
783 if (CalleeSaves[i] && RegsUsed[i]) {
784 GPRsToRestore.push_back(getPhysicalRegister(i));
785 }
786 }
787 if (!GPRsToRestore.empty())
788 _pop(GPRsToRestore);
789
790 if (!Ctx->getFlags().getUseSandboxing())
791 return;
792
793 // Change the original ret instruction into a sandboxed return sequence.
794 // bundle_lock
795 // bic lr, #0xc000000f
796 // bx lr
797 // bundle_unlock
798 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
799 // restrict to the lower 1GB as well.
800 Operand *RetMask =
801 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
802 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
803 Variable *RetValue = nullptr;
804 if (RI->getSrcSize())
805 RetValue = llvm::cast<Variable>(RI->getSrc(0));
806 _bundle_lock();
807 _bic(LR, LR, RetMask);
808 _ret(LR, RetValue);
809 _bundle_unlock();
810 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700811}
812
Jan Voungb3401d22015-05-18 09:38:21 -0700813void TargetARM32::split64(Variable *Var) {
814 assert(Var->getType() == IceType_i64);
815 Variable *Lo = Var->getLo();
816 Variable *Hi = Var->getHi();
817 if (Lo) {
818 assert(Hi);
819 return;
820 }
821 assert(Hi == nullptr);
822 Lo = Func->makeVariable(IceType_i32);
823 Hi = Func->makeVariable(IceType_i32);
824 if (ALLOW_DUMP) {
825 Lo->setName(Func, Var->getName(Func) + "__lo");
826 Hi->setName(Func, Var->getName(Func) + "__hi");
827 }
828 Var->setLoHi(Lo, Hi);
829 if (Var->getIsArg()) {
830 Lo->setIsArg();
831 Hi->setIsArg();
832 }
833}
834
835Operand *TargetARM32::loOperand(Operand *Operand) {
836 assert(Operand->getType() == IceType_i64);
837 if (Operand->getType() != IceType_i64)
838 return Operand;
839 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
840 split64(Var);
841 return Var->getLo();
842 }
843 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
844 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
845 }
846 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
847 // Conservatively disallow memory operands with side-effects (pre/post
848 // increment) in case of duplication.
849 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
850 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
851 if (Mem->isRegReg()) {
852 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
853 Mem->getIndex(), Mem->getShiftOp(),
854 Mem->getShiftAmt(), Mem->getAddrMode());
855 } else {
856 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
857 Mem->getOffset(), Mem->getAddrMode());
858 }
859 }
860 llvm_unreachable("Unsupported operand type");
861 return nullptr;
862}
863
864Operand *TargetARM32::hiOperand(Operand *Operand) {
865 assert(Operand->getType() == IceType_i64);
866 if (Operand->getType() != IceType_i64)
867 return Operand;
868 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
869 split64(Var);
870 return Var->getHi();
871 }
872 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
873 return Ctx->getConstantInt32(
874 static_cast<uint32_t>(Const->getValue() >> 32));
875 }
876 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
877 // Conservatively disallow memory operands with side-effects
878 // in case of duplication.
879 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
880 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
881 const Type SplitType = IceType_i32;
882 if (Mem->isRegReg()) {
883 // We have to make a temp variable T, and add 4 to either Base or Index.
884 // The Index may be shifted, so adding 4 can mean something else.
885 // Thus, prefer T := Base + 4, and use T as the new Base.
886 Variable *Base = Mem->getBase();
887 Constant *Four = Ctx->getConstantInt32(4);
888 Variable *NewBase = Func->makeVariable(Base->getType());
889 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
890 Base, Four));
891 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
892 Mem->getShiftOp(), Mem->getShiftAmt(),
893 Mem->getAddrMode());
894 } else {
895 Variable *Base = Mem->getBase();
896 ConstantInteger32 *Offset = Mem->getOffset();
897 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
898 int32_t NextOffsetVal = Offset->getValue() + 4;
899 const bool SignExt = false;
900 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
901 // We have to make a temp variable and add 4 to either Base or Offset.
902 // If we add 4 to Offset, this will convert a non-RegReg addressing
903 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
904 // RegReg addressing modes, prefer adding to base and replacing instead.
905 // Thus we leave the old offset alone.
906 Constant *Four = Ctx->getConstantInt32(4);
907 Variable *NewBase = Func->makeVariable(Base->getType());
908 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
909 NewBase, Base, Four));
910 Base = NewBase;
911 } else {
912 Offset =
913 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
914 }
915 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
916 Mem->getAddrMode());
917 }
918 }
919 llvm_unreachable("Unsupported operand type");
920 return nullptr;
921}
922
Jan Voungb36ad9b2015-04-21 17:01:49 -0700923llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
924 RegSetMask Exclude) const {
925 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
926
927#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
928 isFP) \
929 if (scratch && (Include & RegSet_CallerSave)) \
930 Registers[RegARM32::val] = true; \
931 if (preserved && (Include & RegSet_CalleeSave)) \
932 Registers[RegARM32::val] = true; \
933 if (stackptr && (Include & RegSet_StackPointer)) \
934 Registers[RegARM32::val] = true; \
935 if (frameptr && (Include & RegSet_FramePointer)) \
936 Registers[RegARM32::val] = true; \
937 if (scratch && (Exclude & RegSet_CallerSave)) \
938 Registers[RegARM32::val] = false; \
939 if (preserved && (Exclude & RegSet_CalleeSave)) \
940 Registers[RegARM32::val] = false; \
941 if (stackptr && (Exclude & RegSet_StackPointer)) \
942 Registers[RegARM32::val] = false; \
943 if (frameptr && (Exclude & RegSet_FramePointer)) \
944 Registers[RegARM32::val] = false;
945
946 REGARM32_TABLE
947
948#undef X
949
950 return Registers;
951}
952
953void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
954 UsesFramePointer = true;
955 // Conservatively require the stack to be aligned. Some stack
956 // adjustment operations implemented below assume that the stack is
957 // aligned before the alloca. All the alloca code ensures that the
958 // stack alignment is preserved after the alloca. The stack alignment
959 // restriction can be relaxed in some cases.
960 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -0700961
962 // TODO(stichnot): minimize the number of adjustments of SP, etc.
963 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
964 Variable *Dest = Inst->getDest();
965 uint32_t AlignmentParam = Inst->getAlignInBytes();
966 // For default align=0, set it to the real value 1, to avoid any
967 // bit-manipulation problems below.
968 AlignmentParam = std::max(AlignmentParam, 1u);
969
970 // LLVM enforces power of 2 alignment.
971 assert(llvm::isPowerOf2_32(AlignmentParam));
972 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
973
974 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
975 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
976 alignRegisterPow2(SP, Alignment);
977 }
978 Operand *TotalSize = Inst->getSizeInBytes();
979 if (const auto *ConstantTotalSize =
980 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
981 uint32_t Value = ConstantTotalSize->getValue();
982 Value = Utils::applyAlignment(Value, Alignment);
983 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
984 _sub(SP, SP, SubAmount);
985 } else {
986 // Non-constant sizes need to be adjusted to the next highest
987 // multiple of the required alignment at runtime.
988 TotalSize = legalize(TotalSize);
989 Variable *T = makeReg(IceType_i32);
990 _mov(T, TotalSize);
991 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
992 _add(T, T, AddAmount);
993 alignRegisterPow2(T, Alignment);
994 _sub(SP, SP, T);
995 }
996 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -0700997}
998
999void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001000 Variable *Dest = Inst->getDest();
1001 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
1002 // to legalize Src0 to flex or Src1 to flex and there is a reversible
1003 // instruction. E.g., reverse subtract with immediate, register vs
1004 // register, immediate.
1005 // Or it may be the case that the operands aren't swapped, but the
1006 // bits can be flipped and a different operation applied.
1007 // E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung29719972015-05-19 11:24:51 -07001008 Operand *Src0 = Inst->getSrc(0);
1009 Operand *Src1 = Inst->getSrc(1);
Jan Voungb3401d22015-05-18 09:38:21 -07001010 if (Dest->getType() == IceType_i64) {
Jan Voung29719972015-05-19 11:24:51 -07001011 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1012 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1013 Variable *Src0RLo = legalizeToVar(loOperand(Src0));
1014 Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
1015 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1016 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1017 Variable *T_Lo = makeReg(DestLo->getType());
1018 Variable *T_Hi = makeReg(DestHi->getType());
1019 switch (Inst->getOp()) {
1020 case InstArithmetic::_num:
1021 llvm_unreachable("Unknown arithmetic operator");
1022 break;
1023 case InstArithmetic::Add:
1024 _adds(T_Lo, Src0RLo, Src1Lo);
1025 _mov(DestLo, T_Lo);
1026 _adc(T_Hi, Src0RHi, Src1Hi);
1027 _mov(DestHi, T_Hi);
1028 break;
1029 case InstArithmetic::And:
1030 _and(T_Lo, Src0RLo, Src1Lo);
1031 _mov(DestLo, T_Lo);
1032 _and(T_Hi, Src0RHi, Src1Hi);
1033 _mov(DestHi, T_Hi);
1034 break;
1035 case InstArithmetic::Or:
1036 _orr(T_Lo, Src0RLo, Src1Lo);
1037 _mov(DestLo, T_Lo);
1038 _orr(T_Hi, Src0RHi, Src1Hi);
1039 _mov(DestHi, T_Hi);
1040 break;
1041 case InstArithmetic::Xor:
1042 _eor(T_Lo, Src0RLo, Src1Lo);
1043 _mov(DestLo, T_Lo);
1044 _eor(T_Hi, Src0RHi, Src1Hi);
1045 _mov(DestHi, T_Hi);
1046 break;
1047 case InstArithmetic::Sub:
1048 _subs(T_Lo, Src0RLo, Src1Lo);
1049 _mov(DestLo, T_Lo);
1050 _sbc(T_Hi, Src0RHi, Src1Hi);
1051 _mov(DestHi, T_Hi);
1052 break;
1053 case InstArithmetic::Mul: {
1054 // GCC 4.8 does:
1055 // a=b*c ==>
1056 // t_acc =(mul) (b.lo * c.hi)
1057 // t_acc =(mla) (c.lo * b.hi) + t_acc
1058 // t.hi,t.lo =(umull) b.lo * c.lo
1059 // t.hi += t_acc
1060 // a.lo = t.lo
1061 // a.hi = t.hi
1062 //
1063 // LLVM does:
1064 // t.hi,t.lo =(umull) b.lo * c.lo
1065 // t.hi =(mla) (b.lo * c.hi) + t.hi
1066 // t.hi =(mla) (b.hi * c.lo) + t.hi
1067 // a.lo = t.lo
1068 // a.hi = t.hi
1069 //
1070 // LLVM's lowering has fewer instructions, but more register pressure:
1071 // t.lo is live from beginning to end, while GCC delays the two-dest
1072 // instruction till the end, and kills c.hi immediately.
1073 Variable *T_Acc = makeReg(IceType_i32);
1074 Variable *T_Acc1 = makeReg(IceType_i32);
1075 Variable *T_Hi1 = makeReg(IceType_i32);
1076 Variable *Src1RLo = legalizeToVar(Src1Lo);
1077 Variable *Src1RHi = legalizeToVar(Src1Hi);
1078 _mul(T_Acc, Src0RLo, Src1RHi);
1079 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1080 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1081 _add(T_Hi, T_Hi1, T_Acc1);
1082 _mov(DestLo, T_Lo);
1083 _mov(DestHi, T_Hi);
1084 } break;
Jan Voung66c3d5e2015-06-04 17:02:31 -07001085 case InstArithmetic::Shl: {
1086 // a=b<<c ==>
1087 // GCC 4.8 does:
1088 // sub t_c1, c.lo, #32
1089 // lsl t_hi, b.hi, c.lo
1090 // orr t_hi, t_hi, b.lo, lsl t_c1
1091 // rsb t_c2, c.lo, #32
1092 // orr t_hi, t_hi, b.lo, lsr t_c2
1093 // lsl t_lo, b.lo, c.lo
1094 // a.lo = t_lo
1095 // a.hi = t_hi
1096 // Can be strength-reduced for constant-shifts, but we don't do
1097 // that for now.
1098 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
1099 // On ARM, shifts only take the lower 8 bits of the shift register,
1100 // and saturate to the range 0-32, so the negative value will
1101 // saturate to 32.
1102 Variable *T_Hi = makeReg(IceType_i32);
1103 Variable *Src1RLo = legalizeToVar(Src1Lo);
1104 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1105 Variable *T_C1 = makeReg(IceType_i32);
1106 Variable *T_C2 = makeReg(IceType_i32);
1107 _sub(T_C1, Src1RLo, ThirtyTwo);
1108 _lsl(T_Hi, Src0RHi, Src1RLo);
1109 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1110 OperandARM32::LSL, T_C1));
1111 _rsb(T_C2, Src1RLo, ThirtyTwo);
1112 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1113 OperandARM32::LSR, T_C2));
1114 _mov(DestHi, T_Hi);
1115 Variable *T_Lo = makeReg(IceType_i32);
1116 // _mov seems to sometimes have better register preferencing than lsl.
1117 // Otherwise mov w/ lsl shifted register is a pseudo-instruction
1118 // that maps to lsl.
1119 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1120 OperandARM32::LSL, Src1RLo));
1121 _mov(DestLo, T_Lo);
1122 } break;
Jan Voung29719972015-05-19 11:24:51 -07001123 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001124 // a=b>>c (unsigned) ==>
1125 // GCC 4.8 does:
1126 // rsb t_c1, c.lo, #32
1127 // lsr t_lo, b.lo, c.lo
1128 // orr t_lo, t_lo, b.hi, lsl t_c1
1129 // sub t_c2, c.lo, #32
1130 // orr t_lo, t_lo, b.hi, lsr t_c2
1131 // lsr t_hi, b.hi, c.lo
1132 // a.lo = t_lo
1133 // a.hi = t_hi
1134 case InstArithmetic::Ashr: {
1135 // a=b>>c (signed) ==> ...
1136 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
1137 // and the next orr should be conditioned on PLUS. The last two
1138 // right shifts should also be arithmetic.
1139 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1140 Variable *T_Lo = makeReg(IceType_i32);
1141 Variable *Src1RLo = legalizeToVar(Src1Lo);
1142 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1143 Variable *T_C1 = makeReg(IceType_i32);
1144 Variable *T_C2 = makeReg(IceType_i32);
1145 _rsb(T_C1, Src1RLo, ThirtyTwo);
1146 _lsr(T_Lo, Src0RLo, Src1RLo);
1147 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1148 OperandARM32::LSL, T_C1));
1149 OperandARM32::ShiftKind RShiftKind;
1150 CondARM32::Cond Pred;
1151 if (IsAshr) {
1152 _subs(T_C2, Src1RLo, ThirtyTwo);
1153 RShiftKind = OperandARM32::ASR;
1154 Pred = CondARM32::PL;
1155 } else {
1156 _sub(T_C2, Src1RLo, ThirtyTwo);
1157 RShiftKind = OperandARM32::LSR;
1158 Pred = CondARM32::AL;
1159 }
1160 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1161 RShiftKind, T_C2),
1162 Pred);
1163 _mov(DestLo, T_Lo);
1164 Variable *T_Hi = makeReg(IceType_i32);
1165 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1166 RShiftKind, Src1RLo));
1167 _mov(DestHi, T_Hi);
1168 } break;
Jan Voung29719972015-05-19 11:24:51 -07001169 case InstArithmetic::Udiv:
1170 case InstArithmetic::Sdiv:
1171 case InstArithmetic::Urem:
1172 case InstArithmetic::Srem:
1173 UnimplementedError(Func->getContext()->getFlags());
1174 break;
1175 case InstArithmetic::Fadd:
1176 case InstArithmetic::Fsub:
1177 case InstArithmetic::Fmul:
1178 case InstArithmetic::Fdiv:
1179 case InstArithmetic::Frem:
1180 llvm_unreachable("FP instruction with i64 type");
1181 break;
1182 }
Jan Voungb3401d22015-05-18 09:38:21 -07001183 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001184 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb3401d22015-05-18 09:38:21 -07001185 } else { // Dest->getType() is non-i64 scalar
Jan Voung29719972015-05-19 11:24:51 -07001186 Variable *Src0R = legalizeToVar(Inst->getSrc(0));
1187 Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
1188 Variable *T = makeReg(Dest->getType());
Jan Voungb3401d22015-05-18 09:38:21 -07001189 switch (Inst->getOp()) {
1190 case InstArithmetic::_num:
1191 llvm_unreachable("Unknown arithmetic operator");
1192 break;
1193 case InstArithmetic::Add: {
Jan Voung29719972015-05-19 11:24:51 -07001194 _add(T, Src0R, Src1);
1195 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001196 } break;
Jan Voung29719972015-05-19 11:24:51 -07001197 case InstArithmetic::And: {
1198 _and(T, Src0R, Src1);
1199 _mov(Dest, T);
1200 } break;
1201 case InstArithmetic::Or: {
1202 _orr(T, Src0R, Src1);
1203 _mov(Dest, T);
1204 } break;
1205 case InstArithmetic::Xor: {
1206 _eor(T, Src0R, Src1);
1207 _mov(Dest, T);
1208 } break;
1209 case InstArithmetic::Sub: {
1210 _sub(T, Src0R, Src1);
1211 _mov(Dest, T);
1212 } break;
1213 case InstArithmetic::Mul: {
1214 Variable *Src1R = legalizeToVar(Src1);
1215 _mul(T, Src0R, Src1R);
1216 _mov(Dest, T);
1217 } break;
Jan Voungb3401d22015-05-18 09:38:21 -07001218 case InstArithmetic::Shl:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001219 _lsl(T, Src0R, Src1);
1220 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001221 break;
1222 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001223 _lsr(T, Src0R, Src1);
1224 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001225 break;
1226 case InstArithmetic::Ashr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001227 _asr(T, Src0R, Src1);
1228 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001229 break;
1230 case InstArithmetic::Udiv:
1231 UnimplementedError(Func->getContext()->getFlags());
1232 break;
1233 case InstArithmetic::Sdiv:
1234 UnimplementedError(Func->getContext()->getFlags());
1235 break;
1236 case InstArithmetic::Urem:
1237 UnimplementedError(Func->getContext()->getFlags());
1238 break;
1239 case InstArithmetic::Srem:
1240 UnimplementedError(Func->getContext()->getFlags());
1241 break;
1242 case InstArithmetic::Fadd:
1243 UnimplementedError(Func->getContext()->getFlags());
1244 break;
1245 case InstArithmetic::Fsub:
1246 UnimplementedError(Func->getContext()->getFlags());
1247 break;
1248 case InstArithmetic::Fmul:
1249 UnimplementedError(Func->getContext()->getFlags());
1250 break;
1251 case InstArithmetic::Fdiv:
1252 UnimplementedError(Func->getContext()->getFlags());
1253 break;
1254 case InstArithmetic::Frem:
1255 UnimplementedError(Func->getContext()->getFlags());
1256 break;
1257 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001258 }
1259}
1260
1261void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001262 Variable *Dest = Inst->getDest();
1263 Operand *Src0 = Inst->getSrc(0);
1264 assert(Dest->getType() == Src0->getType());
1265 if (Dest->getType() == IceType_i64) {
1266 Src0 = legalize(Src0);
1267 Operand *Src0Lo = loOperand(Src0);
1268 Operand *Src0Hi = hiOperand(Src0);
1269 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1270 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1271 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1272 _mov(T_Lo, Src0Lo);
1273 _mov(DestLo, T_Lo);
1274 _mov(T_Hi, Src0Hi);
1275 _mov(DestHi, T_Hi);
1276 } else {
1277 Operand *SrcR;
1278 if (Dest->hasReg()) {
1279 // If Dest already has a physical register, then legalize the
1280 // Src operand into a Variable with the same register
1281 // assignment. This is mostly a workaround for advanced phi
1282 // lowering's ad-hoc register allocation which assumes no
1283 // register allocation is needed when at least one of the
1284 // operands is non-memory.
1285 // TODO(jvoung): check this for ARM.
1286 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
1287 } else {
1288 // Dest could be a stack operand. Since we could potentially need
1289 // to do a Store (and store can only have Register operands),
1290 // legalize this to a register.
1291 SrcR = legalize(Src0, Legal_Reg);
1292 }
1293 if (isVectorType(Dest->getType())) {
1294 UnimplementedError(Func->getContext()->getFlags());
1295 } else {
1296 _mov(Dest, SrcR);
1297 }
1298 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001299}
1300
1301void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001302 if (Inst->isUnconditional()) {
1303 _br(Inst->getTargetUnconditional());
1304 return;
1305 }
1306 Operand *Cond = Inst->getCondition();
1307 // TODO(jvoung): Handle folding opportunities.
1308
1309 Variable *Src0R = legalizeToVar(Cond);
1310 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1311 _cmp(Src0R, Zero);
1312 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001313}
1314
Jan Voung3bfd99a2015-05-22 16:35:25 -07001315void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001316 MaybeLeafFunc = false;
1317
Jan Voung3bfd99a2015-05-22 16:35:25 -07001318 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.
1319 if (Instr->getNumArgs()) {
1320 UnimplementedError(Func->getContext()->getFlags());
1321 }
1322
1323 // Generate the call instruction. Assign its result to a temporary
1324 // with high register allocation weight.
1325 Variable *Dest = Instr->getDest();
1326 // ReturnReg doubles as ReturnRegLo as necessary.
1327 Variable *ReturnReg = nullptr;
1328 Variable *ReturnRegHi = nullptr;
1329 if (Dest) {
1330 switch (Dest->getType()) {
1331 case IceType_NUM:
1332 llvm_unreachable("Invalid Call dest type");
1333 break;
1334 case IceType_void:
1335 break;
1336 case IceType_i1:
1337 case IceType_i8:
1338 case IceType_i16:
1339 case IceType_i32:
1340 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1341 break;
1342 case IceType_i64:
1343 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1344 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1345 break;
1346 case IceType_f32:
1347 case IceType_f64:
1348 // Use S and D regs.
1349 UnimplementedError(Func->getContext()->getFlags());
1350 break;
1351 case IceType_v4i1:
1352 case IceType_v8i1:
1353 case IceType_v16i1:
1354 case IceType_v16i8:
1355 case IceType_v8i16:
1356 case IceType_v4i32:
1357 case IceType_v4f32:
1358 // Use Q regs.
1359 UnimplementedError(Func->getContext()->getFlags());
1360 break;
1361 }
1362 }
1363 Operand *CallTarget = Instr->getCallTarget();
1364 // Allow ConstantRelocatable to be left alone as a direct call,
1365 // but force other constants like ConstantInteger32 to be in
1366 // a register and make it an indirect call.
1367 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1368 CallTarget = legalize(CallTarget, Legal_Reg);
1369 }
1370 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1371 Context.insert(NewCall);
1372 if (ReturnRegHi)
1373 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1374
1375 // Insert a register-kill pseudo instruction.
1376 Context.insert(InstFakeKill::create(Func, NewCall));
1377
1378 // Generate a FakeUse to keep the call live if necessary.
1379 if (Instr->hasSideEffects() && ReturnReg) {
1380 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1381 Context.insert(FakeUse);
1382 }
1383
1384 if (!Dest)
1385 return;
1386
1387 // Assign the result of the call to Dest.
1388 if (ReturnReg) {
1389 if (ReturnRegHi) {
1390 assert(Dest->getType() == IceType_i64);
1391 split64(Dest);
1392 Variable *DestLo = Dest->getLo();
1393 Variable *DestHi = Dest->getHi();
1394 _mov(DestLo, ReturnReg);
1395 _mov(DestHi, ReturnRegHi);
1396 } else {
1397 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1398 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1399 isVectorType(Dest->getType()));
1400 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
1401 UnimplementedError(Func->getContext()->getFlags());
1402 } else {
1403 _mov(Dest, ReturnReg);
1404 }
1405 }
1406 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001407}
1408
1409void TargetARM32::lowerCast(const InstCast *Inst) {
1410 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001411 Variable *Dest = Inst->getDest();
1412 Operand *Src0 = Inst->getSrc(0);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001413 switch (CastKind) {
1414 default:
1415 Func->setError("Cast type not supported");
1416 return;
1417 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001418 if (isVectorType(Dest->getType())) {
1419 UnimplementedError(Func->getContext()->getFlags());
1420 } else if (Dest->getType() == IceType_i64) {
1421 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1422 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1423 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1424 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1425 Variable *T_Lo = makeReg(DestLo->getType());
1426 if (Src0->getType() == IceType_i32) {
1427 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1428 _mov(T_Lo, Src0RF);
1429 } else if (Src0->getType() == IceType_i1) {
1430 Variable *Src0R = legalizeToVar(Src0);
1431 _lsl(T_Lo, Src0R, ShiftAmt);
1432 _asr(T_Lo, T_Lo, ShiftAmt);
1433 } else {
1434 Variable *Src0R = legalizeToVar(Src0);
1435 _sxt(T_Lo, Src0R);
1436 }
1437 _mov(DestLo, T_Lo);
1438 Variable *T_Hi = makeReg(DestHi->getType());
1439 if (Src0->getType() != IceType_i1) {
1440 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1441 OperandARM32::ASR, ShiftAmt));
1442 } else {
1443 // For i1, the asr instruction is already done above.
1444 _mov(T_Hi, T_Lo);
1445 }
1446 _mov(DestHi, T_Hi);
1447 } else if (Src0->getType() == IceType_i1) {
1448 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1449 // lsl t1, src_reg, 31
1450 // asr t1, t1, 31
1451 // dst = t1
1452 Variable *Src0R = legalizeToVar(Src0);
1453 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1454 Variable *T = makeReg(Dest->getType());
1455 _lsl(T, Src0R, ShiftAmt);
1456 _asr(T, T, ShiftAmt);
1457 _mov(Dest, T);
1458 } else {
1459 // t1 = sxt src; dst = t1
1460 Variable *Src0R = legalizeToVar(Src0);
1461 Variable *T = makeReg(Dest->getType());
1462 _sxt(T, Src0R);
1463 _mov(Dest, T);
1464 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001465 break;
1466 }
1467 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001468 if (isVectorType(Dest->getType())) {
1469 UnimplementedError(Func->getContext()->getFlags());
1470 } else if (Dest->getType() == IceType_i64) {
1471 // t1=uxtb src; dst.lo=t1; dst.hi=0
1472 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1473 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1474 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1475 Variable *T_Lo = makeReg(DestLo->getType());
1476 // i32 and i1 can just take up the whole register.
1477 // i32 doesn't need uxt, while i1 will have an and mask later anyway.
1478 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
1479 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1480 _mov(T_Lo, Src0RF);
1481 } else {
1482 Variable *Src0R = legalizeToVar(Src0);
1483 _uxt(T_Lo, Src0R);
1484 }
1485 if (Src0->getType() == IceType_i1) {
1486 Constant *One = Ctx->getConstantInt32(1);
1487 _and(T_Lo, T_Lo, One);
1488 }
1489 _mov(DestLo, T_Lo);
1490 Variable *T_Hi = makeReg(DestLo->getType());
1491 _mov(T_Hi, Zero);
1492 _mov(DestHi, T_Hi);
1493 } else if (Src0->getType() == IceType_i1) {
1494 // t = Src0; t &= 1; Dest = t
1495 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1496 Constant *One = Ctx->getConstantInt32(1);
1497 Variable *T = makeReg(Dest->getType());
1498 // Just use _mov instead of _uxt since all registers are 32-bit.
1499 // _uxt requires the source to be a register so could have required
1500 // a _mov from legalize anyway.
1501 _mov(T, Src0RF);
1502 _and(T, T, One);
1503 _mov(Dest, T);
1504 } else {
1505 // t1 = uxt src; dst = t1
1506 Variable *Src0R = legalizeToVar(Src0);
1507 Variable *T = makeReg(Dest->getType());
1508 _uxt(T, Src0R);
1509 _mov(Dest, T);
1510 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001511 break;
1512 }
1513 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001514 if (isVectorType(Dest->getType())) {
1515 UnimplementedError(Func->getContext()->getFlags());
1516 } else {
1517 Operand *Src0 = Inst->getSrc(0);
1518 if (Src0->getType() == IceType_i64)
1519 Src0 = loOperand(Src0);
1520 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1521 // t1 = trunc Src0RF; Dest = t1
1522 Variable *T = makeReg(Dest->getType());
1523 _mov(T, Src0RF);
1524 if (Dest->getType() == IceType_i1)
1525 _and(T, T, Ctx->getConstantInt1(1));
1526 _mov(Dest, T);
1527 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001528 break;
1529 }
1530 case InstCast::Fptrunc:
Jan Voungb2d50842015-05-12 09:53:50 -07001531 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001532 break;
1533 case InstCast::Fpext: {
Jan Voungb2d50842015-05-12 09:53:50 -07001534 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001535 break;
1536 }
1537 case InstCast::Fptosi:
Jan Voungb2d50842015-05-12 09:53:50 -07001538 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001539 break;
1540 case InstCast::Fptoui:
Jan Voungb2d50842015-05-12 09:53:50 -07001541 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001542 break;
1543 case InstCast::Sitofp:
Jan Voungb2d50842015-05-12 09:53:50 -07001544 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001545 break;
1546 case InstCast::Uitofp: {
Jan Voungb2d50842015-05-12 09:53:50 -07001547 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001548 break;
1549 }
1550 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001551 Operand *Src0 = Inst->getSrc(0);
1552 if (Dest->getType() == Src0->getType()) {
1553 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
1554 lowerAssign(Assign);
1555 return;
1556 }
Jan Voungb2d50842015-05-12 09:53:50 -07001557 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001558 break;
1559 }
1560 }
1561}
1562
1563void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
1564 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001565 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001566}
1567
1568void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
1569 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001570 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001571}
1572
1573void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001574 Variable *Dest = Inst->getDest();
1575 Operand *Src0 = Inst->getSrc(0);
1576 Operand *Src1 = Inst->getSrc(1);
1577
1578 if (isVectorType(Dest->getType())) {
1579 UnimplementedError(Func->getContext()->getFlags());
1580 return;
1581 }
1582
1583 // a=icmp cond, b, c ==>
1584 // GCC does:
1585 // cmp b.hi, c.hi or cmp b.lo, c.lo
1586 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
1587 // mov.<C1> t, #1 mov.<C1> t, #1
1588 // mov.<C2> t, #0 mov.<C2> t, #0
1589 // mov a, t mov a, t
1590 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
1591 // is used for signed compares. In some cases, b and c need to be swapped
1592 // as well.
1593 //
1594 // LLVM does:
1595 // for EQ and NE:
1596 // eor t1, b.hi, c.hi
1597 // eor t2, b.lo, c.hi
1598 // orrs t, t1, t2
1599 // mov.<C> t, #1
1600 // mov a, t
1601 //
1602 // that's nice in that it's just as short but has fewer dependencies
1603 // for better ILP at the cost of more registers.
1604 //
1605 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
1606 // two unconditional mov #0, two cmps, two conditional mov #1,
1607 // and one conditonal reg mov. That has few dependencies for good ILP,
1608 // but is a longer sequence.
1609 //
1610 // So, we are going with the GCC version since it's usually better (except
1611 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
1612 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1613 Constant *One = Ctx->getConstantInt32(1);
1614 if (Src0->getType() == IceType_i64) {
1615 InstIcmp::ICond Conditon = Inst->getCondition();
1616 size_t Index = static_cast<size_t>(Conditon);
1617 assert(Index < TableIcmp64Size);
1618 Variable *Src0Lo, *Src0Hi;
1619 Operand *Src1LoRF, *Src1HiRF;
1620 if (TableIcmp64[Index].Swapped) {
1621 Src0Lo = legalizeToVar(loOperand(Src1));
1622 Src0Hi = legalizeToVar(hiOperand(Src1));
1623 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1624 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1625 } else {
1626 Src0Lo = legalizeToVar(loOperand(Src0));
1627 Src0Hi = legalizeToVar(hiOperand(Src0));
1628 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1629 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1630 }
1631 Variable *T = makeReg(IceType_i32);
1632 if (TableIcmp64[Index].IsSigned) {
1633 Variable *ScratchReg = makeReg(IceType_i32);
1634 _cmp(Src0Lo, Src1LoRF);
1635 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
1636 // ScratchReg isn't going to be used, but we need the
1637 // side-effect of setting flags from this operation.
1638 Context.insert(InstFakeUse::create(Func, ScratchReg));
1639 } else {
1640 _cmp(Src0Hi, Src1HiRF);
1641 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
1642 }
1643 _mov(T, One, TableIcmp64[Index].C1);
1644 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
1645 _mov(Dest, T);
1646 return;
1647 }
1648
1649 // a=icmp cond b, c ==>
1650 // GCC does:
1651 // <u/s>xtb tb, b
1652 // <u/s>xtb tc, c
1653 // cmp tb, tc
1654 // mov.C1 t, #0
1655 // mov.C2 t, #1
1656 // mov a, t
1657 // where the unsigned/sign extension is not needed for 32-bit.
1658 // They also have special cases for EQ and NE. E.g., for NE:
1659 // <extend to tb, tc>
1660 // subs t, tb, tc
1661 // movne t, #1
1662 // mov a, t
1663 //
1664 // LLVM does:
1665 // lsl tb, b, #<N>
1666 // mov t, #0
1667 // cmp tb, c, lsl #<N>
1668 // mov.<C> t, #1
1669 // mov a, t
1670 //
1671 // the left shift is by 0, 16, or 24, which allows the comparison to focus
1672 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
1673 // For the unsigned case, for some reason it does similar to GCC and does
1674 // a uxtb first. It's not clear to me why that special-casing is needed.
1675 //
1676 // We'll go with the LLVM way for now, since it's shorter and has just as
1677 // few dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001678 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
1679 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001680 Constant *ShiftConst = nullptr;
1681 Variable *Src0R = nullptr;
1682 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001683 if (ShiftAmt) {
1684 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001685 Src0R = makeReg(IceType_i32);
1686 _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
1687 } else {
1688 Src0R = legalizeToVar(Src0);
1689 }
1690 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001691 if (ShiftAmt) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001692 Variable *Src1R = legalizeToVar(Src1);
1693 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
1694 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
1695 _cmp(Src0R, Src1RShifted);
1696 } else {
1697 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1698 _cmp(Src0R, Src1RF);
1699 }
1700 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
1701 _mov(Dest, T);
1702 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001703}
1704
1705void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
1706 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001707 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001708}
1709
1710void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
1711 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
1712 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07001713 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001714 return;
1715 }
1716 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07001717 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001718 return;
1719 case Intrinsics::AtomicFenceAll:
1720 // NOTE: FenceAll should prevent and load/store from being moved
1721 // across the fence (both atomic and non-atomic). The InstARM32Mfence
1722 // instruction is currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07001723 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001724 return;
1725 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07001726 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001727 return;
1728 }
1729 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07001730 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001731 return;
1732 }
1733 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07001734 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001735 return;
1736 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07001737 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001738 return;
1739 }
1740 case Intrinsics::Bswap: {
Jan Voungb2d50842015-05-12 09:53:50 -07001741 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001742 return;
1743 }
1744 case Intrinsics::Ctpop: {
Jan Voungb2d50842015-05-12 09:53:50 -07001745 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001746 return;
1747 }
1748 case Intrinsics::Ctlz: {
Jan Voungb2d50842015-05-12 09:53:50 -07001749 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001750 return;
1751 }
1752 case Intrinsics::Cttz: {
Jan Voungb2d50842015-05-12 09:53:50 -07001753 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001754 return;
1755 }
1756 case Intrinsics::Fabs: {
Jan Voungb2d50842015-05-12 09:53:50 -07001757 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001758 return;
1759 }
1760 case Intrinsics::Longjmp: {
1761 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
1762 Call->addArg(Instr->getArg(0));
1763 Call->addArg(Instr->getArg(1));
1764 lowerCall(Call);
1765 return;
1766 }
1767 case Intrinsics::Memcpy: {
1768 // In the future, we could potentially emit an inline memcpy/memset, etc.
1769 // for intrinsic calls w/ a known length.
1770 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
1771 Call->addArg(Instr->getArg(0));
1772 Call->addArg(Instr->getArg(1));
1773 Call->addArg(Instr->getArg(2));
1774 lowerCall(Call);
1775 return;
1776 }
1777 case Intrinsics::Memmove: {
1778 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
1779 Call->addArg(Instr->getArg(0));
1780 Call->addArg(Instr->getArg(1));
1781 Call->addArg(Instr->getArg(2));
1782 lowerCall(Call);
1783 return;
1784 }
1785 case Intrinsics::Memset: {
1786 // The value operand needs to be extended to a stack slot size
1787 // because the PNaCl ABI requires arguments to be at least 32 bits
1788 // wide.
1789 Operand *ValOp = Instr->getArg(1);
1790 assert(ValOp->getType() == IceType_i8);
1791 Variable *ValExt = Func->makeVariable(stackSlotType());
1792 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
1793 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
1794 Call->addArg(Instr->getArg(0));
1795 Call->addArg(ValExt);
1796 Call->addArg(Instr->getArg(2));
1797 lowerCall(Call);
1798 return;
1799 }
1800 case Intrinsics::NaClReadTP: {
1801 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07001802 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001803 } else {
1804 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
1805 lowerCall(Call);
1806 }
1807 return;
1808 }
1809 case Intrinsics::Setjmp: {
1810 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
1811 Call->addArg(Instr->getArg(0));
1812 lowerCall(Call);
1813 return;
1814 }
1815 case Intrinsics::Sqrt: {
Jan Voungb2d50842015-05-12 09:53:50 -07001816 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001817 return;
1818 }
1819 case Intrinsics::Stacksave: {
Jan Voungb2d50842015-05-12 09:53:50 -07001820 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001821 return;
1822 }
1823 case Intrinsics::Stackrestore: {
Jan Voungb2d50842015-05-12 09:53:50 -07001824 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001825 return;
1826 }
1827 case Intrinsics::Trap:
Jan Voungb2d50842015-05-12 09:53:50 -07001828 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001829 return;
1830 case Intrinsics::UnknownIntrinsic:
1831 Func->setError("Should not be lowering UnknownIntrinsic");
1832 return;
1833 }
1834 return;
1835}
1836
Jan Voungbefd03a2015-06-02 11:03:03 -07001837void TargetARM32::lowerLoad(const InstLoad *Load) {
1838 // A Load instruction can be treated the same as an Assign
1839 // instruction, after the source operand is transformed into an
1840 // OperandARM32Mem operand.
1841 Type Ty = Load->getDest()->getType();
1842 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
1843 Variable *DestLoad = Load->getDest();
1844
1845 // TODO(jvoung): handled folding opportunities. Sign and zero extension
1846 // can be folded into a load.
1847 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
1848 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001849}
1850
1851void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07001852 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001853}
1854
1855void TargetARM32::randomlyInsertNop(float Probability) {
1856 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
1857 if (RNG.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07001858 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001859 }
1860}
1861
1862void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
1863 Func->setError("Phi found in regular instruction list");
1864}
1865
1866void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07001867 Variable *Reg = nullptr;
1868 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07001869 Operand *Src0 = Inst->getRetValue();
1870 if (Src0->getType() == IceType_i64) {
1871 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
1872 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
1873 Reg = R0;
1874 Context.insert(InstFakeUse::create(Func, R1));
1875 } else if (isScalarFloatingType(Src0->getType())) {
1876 UnimplementedError(Func->getContext()->getFlags());
1877 } else if (isVectorType(Src0->getType())) {
1878 UnimplementedError(Func->getContext()->getFlags());
1879 } else {
1880 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001881 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07001882 }
Jan Voungb2d50842015-05-12 09:53:50 -07001883 }
1884 // Add a ret instruction even if sandboxing is enabled, because
1885 // addEpilog explicitly looks for a ret instruction as a marker for
1886 // where to insert the frame removal instructions.
1887 // addEpilog is responsible for restoring the "lr" register as needed
1888 // prior to this ret instruction.
1889 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
1890 // Add a fake use of sp to make sure sp stays alive for the entire
1891 // function. Otherwise post-call sp adjustments get dead-code
1892 // eliminated. TODO: Are there more places where the fake use
1893 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
1894 // have a ret instruction.
1895 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
1896 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07001897}
1898
1899void TargetARM32::lowerSelect(const InstSelect *Inst) {
1900 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001901 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001902}
1903
1904void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07001905 Operand *Value = Inst->getData();
1906 Operand *Addr = Inst->getAddr();
1907 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
1908 Type Ty = NewAddr->getType();
1909
1910 if (Ty == IceType_i64) {
1911 Variable *ValueHi = legalizeToVar(hiOperand(Value));
1912 Variable *ValueLo = legalizeToVar(loOperand(Value));
1913 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
1914 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
1915 } else if (isVectorType(Ty)) {
1916 UnimplementedError(Func->getContext()->getFlags());
1917 } else {
1918 Variable *ValueR = legalizeToVar(Value);
1919 _str(ValueR, NewAddr);
1920 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001921}
1922
1923void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07001924 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001925}
1926
1927void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
1928 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001929 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001930}
1931
1932void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voungb3401d22015-05-18 09:38:21 -07001933 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001934}
1935
1936// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
1937// preserve integrity of liveness analysis. Undef values are also
1938// turned into zeroes, since loOperand() and hiOperand() don't expect
1939// Undef input.
1940void TargetARM32::prelowerPhis() {
Jan Voungb2d50842015-05-12 09:53:50 -07001941 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001942}
1943
1944// Lower the pre-ordered list of assignments into mov instructions.
1945// Also has to do some ad-hoc register allocation as necessary.
1946void TargetARM32::lowerPhiAssignments(CfgNode *Node,
1947 const AssignList &Assignments) {
1948 (void)Node;
1949 (void)Assignments;
Jan Voungb2d50842015-05-12 09:53:50 -07001950 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001951}
1952
Jan Voungb3401d22015-05-18 09:38:21 -07001953Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
1954 Variable *Reg = makeReg(Ty, RegNum);
1955 UnimplementedError(Func->getContext()->getFlags());
1956 return Reg;
1957}
1958
1959// Helper for legalize() to emit the right code to lower an operand to a
1960// register of the appropriate type.
1961Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
1962 Type Ty = Src->getType();
1963 Variable *Reg = makeReg(Ty, RegNum);
1964 if (isVectorType(Ty)) {
1965 UnimplementedError(Func->getContext()->getFlags());
1966 } else {
1967 // Mov's Src operand can really only be the flexible second operand type
1968 // or a register. Users should guarantee that.
1969 _mov(Reg, Src);
1970 }
1971 return Reg;
1972}
1973
1974Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
1975 int32_t RegNum) {
1976 // Assert that a physical register is allowed. To date, all calls
1977 // to legalize() allow a physical register. Legal_Flex converts
1978 // registers to the right type OperandARM32FlexReg as needed.
1979 assert(Allowed & Legal_Reg);
1980 // Go through the various types of operands:
1981 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
1982 // Given the above assertion, if type of operand is not legal
1983 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
1984 // to a register.
1985 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
1986 // Before doing anything with a Mem operand, we need to ensure
1987 // that the Base and Index components are in physical registers.
1988 Variable *Base = Mem->getBase();
1989 Variable *Index = Mem->getIndex();
1990 Variable *RegBase = nullptr;
1991 Variable *RegIndex = nullptr;
1992 if (Base) {
1993 RegBase = legalizeToVar(Base);
1994 }
1995 if (Index) {
1996 RegIndex = legalizeToVar(Index);
1997 }
1998 // Create a new operand if there was a change.
1999 if (Base != RegBase || Index != RegIndex) {
2000 // There is only a reg +/- reg or reg + imm form.
2001 // Figure out which to re-create.
2002 if (Mem->isRegReg()) {
2003 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
2004 Mem->getShiftOp(), Mem->getShiftAmt(),
2005 Mem->getAddrMode());
2006 } else {
2007 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
2008 Mem->getOffset(), Mem->getAddrMode());
2009 }
2010 }
2011 if (!(Allowed & Legal_Mem)) {
2012 Type Ty = Mem->getType();
2013 Variable *Reg = makeReg(Ty, RegNum);
2014 _ldr(Reg, Mem);
2015 From = Reg;
2016 } else {
2017 From = Mem;
2018 }
2019 return From;
2020 }
2021
2022 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2023 if (!(Allowed & Legal_Flex)) {
2024 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2025 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2026 From = FlexReg->getReg();
2027 // Fall through and let From be checked as a Variable below,
2028 // where it may or may not need a register.
2029 } else {
2030 return copyToReg(Flex, RegNum);
2031 }
2032 } else {
2033 return copyToReg(Flex, RegNum);
2034 }
2035 } else {
2036 return From;
2037 }
2038 }
2039
2040 if (llvm::isa<Constant>(From)) {
2041 if (llvm::isa<ConstantUndef>(From)) {
2042 // Lower undefs to zero. Another option is to lower undefs to an
2043 // uninitialized register; however, using an uninitialized register
2044 // results in less predictable code.
2045 if (isVectorType(From->getType()))
2046 return makeVectorOfZeros(From->getType(), RegNum);
2047 From = Ctx->getConstantZero(From->getType());
2048 }
2049 // There should be no constants of vector type (other than undef).
2050 assert(!isVectorType(From->getType()));
2051 bool CanBeFlex = Allowed & Legal_Flex;
2052 if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
2053 uint32_t RotateAmt;
2054 uint32_t Immed_8;
2055 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2056 // Check if the immediate will fit in a Flexible second operand,
2057 // if a Flexible second operand is allowed. We need to know the exact
2058 // value, so that rules out relocatable constants.
2059 // Also try the inverse and use MVN if possible.
2060 if (CanBeFlex &&
2061 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
2062 return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
2063 RotateAmt);
2064 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2065 ~Value, &RotateAmt, &Immed_8)) {
2066 auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
2067 Immed_8, RotateAmt);
2068 Type Ty = From->getType();
2069 Variable *Reg = makeReg(Ty, RegNum);
2070 _mvn(Reg, InvertedFlex);
2071 return Reg;
2072 } else {
2073 // Do a movw/movt to a register.
2074 Type Ty = From->getType();
2075 Variable *Reg = makeReg(Ty, RegNum);
2076 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2077 _movw(Reg,
2078 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2079 if (UpperBits != 0) {
2080 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2081 }
2082 return Reg;
2083 }
2084 } else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
2085 Type Ty = From->getType();
2086 Variable *Reg = makeReg(Ty, RegNum);
2087 _movw(Reg, C);
2088 _movt(Reg, C);
2089 return Reg;
2090 } else {
2091 // Load floats/doubles from literal pool.
2092 UnimplementedError(Func->getContext()->getFlags());
2093 From = copyToReg(From, RegNum);
2094 }
2095 return From;
2096 }
2097
2098 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2099 // Check if the variable is guaranteed a physical register. This
2100 // can happen either when the variable is pre-colored or when it is
2101 // assigned infinite weight.
2102 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
2103 // We need a new physical register for the operand if:
2104 // Mem is not allowed and Var isn't guaranteed a physical
2105 // register, or
2106 // RegNum is required and Var->getRegNum() doesn't match.
2107 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2108 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2109 From = copyToReg(From, RegNum);
2110 }
2111 return From;
2112 }
2113 llvm_unreachable("Unhandled operand kind in legalize()");
2114
2115 return From;
2116}
2117
2118// Provide a trivial wrapper to legalize() for this common usage.
2119Variable *TargetARM32::legalizeToVar(Operand *From, int32_t RegNum) {
2120 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2121}
2122
Jan Voungbefd03a2015-06-02 11:03:03 -07002123OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2124 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2125 // It may be the case that address mode optimization already creates
2126 // an OperandARM32Mem, so in that case it wouldn't need another level
2127 // of transformation.
2128 if (Mem) {
2129 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2130 }
2131 // If we didn't do address mode optimization, then we only
2132 // have a base/offset to work with. ARM always requires a base
2133 // register, so just use that to hold the operand.
2134 Variable *Base = legalizeToVar(Operand);
2135 return OperandARM32Mem::create(
2136 Func, Ty, Base,
2137 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2138}
2139
Jan Voungb3401d22015-05-18 09:38:21 -07002140Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2141 // There aren't any 64-bit integer registers for ARM32.
2142 assert(Type != IceType_i64);
2143 Variable *Reg = Func->makeVariable(Type);
2144 if (RegNum == Variable::NoRegister)
2145 Reg->setWeightInfinite();
2146 else
2147 Reg->setRegNum(RegNum);
2148 return Reg;
2149}
2150
Jan Voung55500db2015-05-26 14:25:40 -07002151void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2152 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002153 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07002154 uint32_t Immed_8;
2155 Operand *Mask;
2156 // Use AND or BIC to mask off the bits, depending on which immediate fits
2157 // (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002158 // works better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07002159 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
2160 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
2161 _bic(Reg, Reg, Mask);
2162 } else {
2163 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
2164 _and(Reg, Reg, Mask);
2165 }
2166}
2167
Jan Voungb36ad9b2015-04-21 17:01:49 -07002168void TargetARM32::postLower() {
2169 if (Ctx->getFlags().getOptLevel() == Opt_m1)
2170 return;
Jan Voungb3401d22015-05-18 09:38:21 -07002171 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002172}
2173
2174void TargetARM32::makeRandomRegisterPermutation(
2175 llvm::SmallVectorImpl<int32_t> &Permutation,
2176 const llvm::SmallBitVector &ExcludeRegisters) const {
2177 (void)Permutation;
2178 (void)ExcludeRegisters;
Jan Voungb2d50842015-05-12 09:53:50 -07002179 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002180}
2181
Jan Voung76bb0be2015-05-14 09:26:19 -07002182void TargetARM32::emit(const ConstantInteger32 *C) const {
2183 if (!ALLOW_DUMP)
2184 return;
2185 Ostream &Str = Ctx->getStrEmit();
2186 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002187}
2188
Jan Voung76bb0be2015-05-14 09:26:19 -07002189void TargetARM32::emit(const ConstantInteger64 *) const {
2190 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07002191}
Jan Voung76bb0be2015-05-14 09:26:19 -07002192
2193void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002194 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002195 UnimplementedError(Ctx->getFlags());
2196}
2197
2198void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002199 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002200 UnimplementedError(Ctx->getFlags());
2201}
2202
2203void TargetARM32::emit(const ConstantUndef *) const {
2204 llvm::report_fatal_error("undef value encountered by emitter.");
2205}
Jan Voungb36ad9b2015-04-21 17:01:49 -07002206
2207TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
2208 : TargetDataLowering(Ctx) {}
2209
John Porto8b1a7052015-06-17 13:20:08 -07002210void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
2211 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002212 switch (Ctx->getFlags().getOutFileType()) {
2213 case FT_Elf: {
2214 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07002215 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002216 } break;
2217 case FT_Asm:
2218 case FT_Iasm: {
2219 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
2220 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07002221 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002222 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07002223 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002224 }
2225 }
2226 } break;
2227 }
2228}
2229
John Porto0f86d032015-06-15 07:44:27 -07002230void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002231 if (Ctx->getFlags().getDisableTranslation())
2232 return;
Jan Voungb2d50842015-05-12 09:53:50 -07002233 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002234}
2235
Jan Voungfb792842015-06-11 15:27:50 -07002236TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
2237 : TargetHeaderLowering(Ctx) {}
2238
2239void TargetHeaderARM32::lower() {
2240 OstreamLocker L(Ctx);
2241 Ostream &Str = Ctx->getStrEmit();
2242 Str << ".syntax unified\n";
2243 // Emit build attributes in format: .eabi_attribute TAG, VALUE.
2244 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
2245 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
2246 //
2247 // Tag_conformance should be be emitted first in a file-scope
2248 // sub-subsection of the first public subsection of the attributes.
2249 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
2250 // Chromebooks are at least A15, but do A9 for higher compat.
2251 Str << ".cpu cortex-a9\n"
2252 << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
2253 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
2254 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
2255 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
2256 // TODO(jvoung): check other CPU features like HW div.
2257 Str << ".fpu neon\n"
2258 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
2259 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
2260 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
2261 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
2262 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
2263 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
2264 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
2265 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
2266 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
2267 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
2268 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
2269 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
2270 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2271 // However, for compatibility with current NaCl LLVM, don't claim that.
2272 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2273}
2274
Jan Voungb36ad9b2015-04-21 17:01:49 -07002275} // end of namespace Ice