blob: e09a85fff0b41b4fd692cba9b6f928c6da20099b [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringARM32 class, which consists almost
12/// entirely of the lowering sequence for each high-level instruction.
13///
Jan Voungb36ad9b2015-04-21 17:01:49 -070014//===----------------------------------------------------------------------===//
15
John Porto67f8de92015-06-25 10:14:17 -070016#include "IceTargetLoweringARM32.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070017
18#include "IceCfg.h"
19#include "IceCfgNode.h"
20#include "IceClFlags.h"
21#include "IceDefs.h"
22#include "IceELFObjectWriter.h"
23#include "IceGlobalInits.h"
24#include "IceInstARM32.h"
25#include "IceLiveness.h"
26#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070027#include "IcePhiLoweringImpl.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070028#include "IceRegistersARM32.h"
29#include "IceTargetLoweringARM32.def"
Jan Voungb36ad9b2015-04-21 17:01:49 -070030#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070031#include "llvm/Support/MathExtras.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070032
33namespace Ice {
34
Jan Voungb2d50842015-05-12 09:53:50 -070035namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070036
Jan Voungb2d50842015-05-12 09:53:50 -070037void UnimplementedError(const ClFlags &Flags) {
38 if (!Flags.getSkipUnimplemented()) {
39 // Use llvm_unreachable instead of report_fatal_error, which gives better
40 // stack traces.
41 llvm_unreachable("Not yet implemented");
42 abort();
43 }
44}
Jan Voungb3401d22015-05-18 09:38:21 -070045
Jan Voung3bfd99a2015-05-22 16:35:25 -070046// The following table summarizes the logic for lowering the icmp instruction
47// for i32 and narrower types. Each icmp condition has a clear mapping to an
48// ARM32 conditional move instruction.
49
50const struct TableIcmp32_ {
51 CondARM32::Cond Mapping;
52} TableIcmp32[] = {
53#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
54 { CondARM32::C_32 } \
55 ,
56 ICMPARM32_TABLE
57#undef X
58};
59const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
60
61// The following table summarizes the logic for lowering the icmp instruction
62// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
63// The operands may need to be swapped, and there is a slight difference
64// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
65const struct TableIcmp64_ {
66 bool IsSigned;
67 bool Swapped;
68 CondARM32::Cond C1, C2;
69} TableIcmp64[] = {
70#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
71 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
72 ,
73 ICMPARM32_TABLE
74#undef X
75};
76const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
77
78CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
79 size_t Index = static_cast<size_t>(Cond);
80 assert(Index < TableIcmp32Size);
81 return TableIcmp32[Index].Mapping;
82}
83
84// In some cases, there are x-macros tables for both high-level and
85// low-level instructions/operands that use the same enum key value.
86// The tables are kept separate to maintain a proper separation
87// between abstraction layers. There is a risk that the tables could
88// get out of sync if enum values are reordered or if entries are
89// added or deleted. The following dummy namespaces use
90// static_asserts to ensure everything is kept in sync.
91
92// Validate the enum values in ICMPARM32_TABLE.
93namespace dummy1 {
94// Define a temporary set of enum values based on low-level table
95// entries.
96enum _tmp_enum {
97#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
98 ICMPARM32_TABLE
99#undef X
100 _num
101};
102// Define a set of constants based on high-level table entries.
103#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
104ICEINSTICMP_TABLE
105#undef X
106// Define a set of constants based on low-level table entries, and
107// ensure the table entry keys are consistent.
108#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
109 static const int _table2_##val = _tmp_##val; \
110 static_assert( \
111 _table1_##val == _table2_##val, \
112 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
113ICMPARM32_TABLE
114#undef X
115// Repeat the static asserts with respect to the high-level table
116// entries in case the high-level table has extra entries.
117#define X(tag, str) \
118 static_assert( \
119 _table1_##tag == _table2_##tag, \
120 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
121ICEINSTICMP_TABLE
122#undef X
123} // end of namespace dummy1
124
Jan Voung55500db2015-05-26 14:25:40 -0700125// Stack alignment
126const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
127
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700128// Value is in bytes. Return Value adjusted to the next highest multiple
129// of the stack alignment.
130uint32_t applyStackAlignment(uint32_t Value) {
131 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
132}
133
Jan Voungb0a8c242015-06-18 15:00:14 -0700134// Value is in bytes. Return Value adjusted to the next highest multiple
135// of the stack alignment required for the given type.
136uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
137 // Use natural alignment, except that normally (non-NaCl) ARM only
138 // aligns vectors to 8 bytes.
139 // TODO(jvoung): Check this ...
140 size_t typeAlignInBytes = typeWidthInBytes(Ty);
141 if (isVectorType(Ty))
142 typeAlignInBytes = 8;
143 return Utils::applyAlignment(Value, typeAlignInBytes);
144}
145
Jan Voung6ec369e2015-06-30 11:03:15 -0700146// Conservatively check if at compile time we know that the operand is
147// definitely a non-zero integer.
148bool isGuaranteedNonzeroInt(const Operand *Op) {
149 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
150 return Const->getValue() != 0;
151 }
152 return false;
153}
154
Jan Voungb2d50842015-05-12 09:53:50 -0700155} // end of anonymous namespace
156
Jan Voung6ec369e2015-06-30 11:03:15 -0700157TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voungd062f732015-06-15 17:17:31 -0700158 static_assert(
159 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
160 (TargetInstructionSet::ARM32InstructionSet_End -
161 TargetInstructionSet::ARM32InstructionSet_Begin),
162 "ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung6ec369e2015-06-30 11:03:15 -0700163 if (Flags.getTargetInstructionSet() !=
Jan Voungd062f732015-06-15 17:17:31 -0700164 TargetInstructionSet::BaseInstructionSet) {
165 InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung6ec369e2015-06-30 11:03:15 -0700166 (Flags.getTargetInstructionSet() -
Jan Voungd062f732015-06-15 17:17:31 -0700167 TargetInstructionSet::ARM32InstructionSet_Begin) +
168 ARM32InstructionSet::Begin);
169 }
Jan Voung6ec369e2015-06-30 11:03:15 -0700170}
171
172TargetARM32::TargetARM32(Cfg *Func)
173 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700174 // TODO: Don't initialize IntegerRegisters and friends every time.
175 // Instead, initialize in some sort of static initializer for the
176 // class.
177 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
178 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
181 ScratchRegs.resize(RegARM32::Reg_NUM);
182#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
183 isFP) \
184 IntegerRegisters[RegARM32::val] = isInt; \
185 FloatRegisters[RegARM32::val] = isFP; \
186 VectorRegisters[RegARM32::val] = isFP; \
187 ScratchRegs[RegARM32::val] = scratch;
188 REGARM32_TABLE;
189#undef X
190 TypeToRegisterSet[IceType_void] = InvalidRegisters;
191 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
192 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
193 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
194 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
195 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
196 TypeToRegisterSet[IceType_f32] = FloatRegisters;
197 TypeToRegisterSet[IceType_f64] = FloatRegisters;
198 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
199 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
200 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
201 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
202 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
203 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
204 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
205}
206
207void TargetARM32::translateO2() {
208 TimerMarker T(TimerStack::TT_O2, Func);
209
210 // TODO(stichnot): share passes with X86?
211 // https://code.google.com/p/nativeclient/issues/detail?id=4094
212
213 if (!Ctx->getFlags().getPhiEdgeSplit()) {
214 // Lower Phi instructions.
215 Func->placePhiLoads();
216 if (Func->hasError())
217 return;
218 Func->placePhiStores();
219 if (Func->hasError())
220 return;
221 Func->deletePhis();
222 if (Func->hasError())
223 return;
224 Func->dump("After Phi lowering");
225 }
226
227 // Address mode optimization.
228 Func->getVMetadata()->init(VMK_SingleDefs);
229 Func->doAddressOpt();
230
231 // Argument lowering
232 Func->doArgLowering();
233
234 // Target lowering. This requires liveness analysis for some parts
235 // of the lowering decisions, such as compare/branch fusing. If
236 // non-lightweight liveness analysis is used, the instructions need
237 // to be renumbered first. TODO: This renumbering should only be
238 // necessary if we're actually calculating live intervals, which we
239 // only do for register allocation.
240 Func->renumberInstructions();
241 if (Func->hasError())
242 return;
243
244 // TODO: It should be sufficient to use the fastest liveness
245 // calculation, i.e. livenessLightweight(). However, for some
246 // reason that slows down the rest of the translation. Investigate.
247 Func->liveness(Liveness_Basic);
248 if (Func->hasError())
249 return;
250 Func->dump("After ARM32 address mode opt");
251
252 Func->genCode();
253 if (Func->hasError())
254 return;
255 Func->dump("After ARM32 codegen");
256
257 // Register allocation. This requires instruction renumbering and
258 // full liveness analysis.
259 Func->renumberInstructions();
260 if (Func->hasError())
261 return;
262 Func->liveness(Liveness_Intervals);
263 if (Func->hasError())
264 return;
265 // Validate the live range computations. The expensive validation
266 // call is deliberately only made when assertions are enabled.
267 assert(Func->validateLiveness());
268 // The post-codegen dump is done here, after liveness analysis and
269 // associated cleanup, to make the dump cleaner and more useful.
270 Func->dump("After initial ARM32 codegen");
271 Func->getVMetadata()->init(VMK_All);
272 regAlloc(RAK_Global);
273 if (Func->hasError())
274 return;
275 Func->dump("After linear scan regalloc");
276
277 if (Ctx->getFlags().getPhiEdgeSplit()) {
278 Func->advancedPhiLowering();
279 Func->dump("After advanced Phi lowering");
280 }
281
282 // Stack frame mapping.
283 Func->genFrame();
284 if (Func->hasError())
285 return;
286 Func->dump("After stack frame mapping");
287
288 Func->contractEmptyNodes();
289 Func->reorderNodes();
290
291 // Branch optimization. This needs to be done just before code
292 // emission. In particular, no transformations that insert or
293 // reorder CfgNodes should be done after branch optimization. We go
294 // ahead and do it before nop insertion to reduce the amount of work
295 // needed for searching for opportunities.
296 Func->doBranchOpt();
297 Func->dump("After branch optimization");
298
299 // Nop insertion
300 if (Ctx->getFlags().shouldDoNopInsertion()) {
301 Func->doNopInsertion();
302 }
303}
304
305void TargetARM32::translateOm1() {
306 TimerMarker T(TimerStack::TT_Om1, Func);
307
308 // TODO: share passes with X86?
309
310 Func->placePhiLoads();
311 if (Func->hasError())
312 return;
313 Func->placePhiStores();
314 if (Func->hasError())
315 return;
316 Func->deletePhis();
317 if (Func->hasError())
318 return;
319 Func->dump("After Phi lowering");
320
321 Func->doArgLowering();
322
323 Func->genCode();
324 if (Func->hasError())
325 return;
326 Func->dump("After initial ARM32 codegen");
327
328 regAlloc(RAK_InfOnly);
329 if (Func->hasError())
330 return;
331 Func->dump("After regalloc of infinite-weight variables");
332
333 Func->genFrame();
334 if (Func->hasError())
335 return;
336 Func->dump("After stack frame mapping");
337
338 // Nop insertion
339 if (Ctx->getFlags().shouldDoNopInsertion()) {
340 Func->doNopInsertion();
341 }
342}
343
344bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
346 return Br->optimizeBranch(NextNode);
347 }
Jan Voungb2d50842015-05-12 09:53:50 -0700348 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700349}
350
351IceString TargetARM32::RegNames[] = {
352#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
353 isFP) \
354 name,
355 REGARM32_TABLE
356#undef X
357};
358
359IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
360 assert(RegNum < RegARM32::Reg_NUM);
361 (void)Ty;
362 return RegNames[RegNum];
363}
364
365Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
366 if (Ty == IceType_void)
367 Ty = IceType_i32;
368 if (PhysicalRegisters[Ty].empty())
369 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
370 assert(RegNum < PhysicalRegisters[Ty].size());
371 Variable *Reg = PhysicalRegisters[Ty][RegNum];
372 if (Reg == nullptr) {
373 Reg = Func->makeVariable(Ty);
374 Reg->setRegNum(RegNum);
375 PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voungb2d50842015-05-12 09:53:50 -0700376 // Specially mark SP and LR as an "argument" so that it is considered
Jan Voungb36ad9b2015-04-21 17:01:49 -0700377 // live upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700378 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700379 Func->addImplicitArg(Reg);
380 Reg->setIgnoreLiveness();
381 }
382 }
383 return Reg;
384}
385
386void TargetARM32::emitVariable(const Variable *Var) const {
387 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700388 if (Var->hasReg()) {
389 Str << getRegName(Var->getRegNum(), Var->getType());
390 return;
391 }
392 if (Var->getWeight().isInf()) {
393 llvm::report_fatal_error(
394 "Infinite-weight Variable has no register assigned");
395 }
396 int32_t Offset = Var->getStackOffset();
397 if (!hasFramePointer())
398 Offset += getStackAdjustment();
399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register
400 // to materialize a larger offset.
Jan Voungb0a8c242015-06-18 15:00:14 -0700401 constexpr bool SignExt = false;
Jan Voungb2d50842015-05-12 09:53:50 -0700402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
403 llvm::report_fatal_error("Illegal stack offset");
404 }
405 const Type FrameSPTy = IceType_i32;
Jan Voungb3401d22015-05-18 09:38:21 -0700406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
407 if (Offset != 0) {
408 Str << ", " << getConstantPrefix() << Offset;
409 }
410 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700411}
412
Jan Voungb0a8c242015-06-18 15:00:14 -0700413bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
415 return false;
416 int32_t RegLo, RegHi;
417 // Always start i64 registers at an even register, so this may end
418 // up padding away a register.
419 if (NumGPRRegsUsed % 2 != 0) {
420 ++NumGPRRegsUsed;
421 }
422 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
423 ++NumGPRRegsUsed;
424 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
425 ++NumGPRRegsUsed;
426 // If this bumps us past the boundary, don't allocate to a register
427 // and leave any previously speculatively consumed registers as consumed.
428 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
429 return false;
430 Regs->first = RegLo;
431 Regs->second = RegHi;
432 return true;
433}
434
435bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
436 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
437 return false;
438 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
439 ++NumGPRRegsUsed;
440 return true;
441}
442
Jan Voungb36ad9b2015-04-21 17:01:49 -0700443void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700444 VarList &Args = Func->getArgs();
Jan Voungb0a8c242015-06-18 15:00:14 -0700445 TargetARM32::CallingConv CC;
Jan Voungb3401d22015-05-18 09:38:21 -0700446
447 // For each register argument, replace Arg in the argument list with the
448 // home register. Then generate an instruction in the prolog to copy the
449 // home register to the assigned location of Arg.
450 Context.init(Func->getEntryNode());
451 Context.setInsertPoint(Context.getCur());
452
453 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
454 Variable *Arg = Args[I];
455 Type Ty = Arg->getType();
456 // TODO(jvoung): handle float/vector types.
457 if (isVectorType(Ty)) {
458 UnimplementedError(Func->getContext()->getFlags());
459 continue;
460 } else if (isFloatingType(Ty)) {
461 UnimplementedError(Func->getContext()->getFlags());
462 continue;
463 } else if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -0700464 std::pair<int32_t, int32_t> RegPair;
465 if (!CC.I64InRegs(&RegPair))
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700466 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700467 Variable *RegisterArg = Func->makeVariable(Ty);
468 Variable *RegisterLo = Func->makeVariable(IceType_i32);
469 Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700470 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700471 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
472 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
473 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
474 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700475 RegisterLo->setRegNum(RegPair.first);
Jan Voungb3401d22015-05-18 09:38:21 -0700476 RegisterLo->setIsArg();
Jan Voungb0a8c242015-06-18 15:00:14 -0700477 RegisterHi->setRegNum(RegPair.second);
Jan Voungb3401d22015-05-18 09:38:21 -0700478 RegisterHi->setIsArg();
479 RegisterArg->setLoHi(RegisterLo, RegisterHi);
480 RegisterArg->setIsArg();
481 Arg->setIsArg(false);
482
483 Args[I] = RegisterArg;
484 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
485 continue;
486 } else {
487 assert(Ty == IceType_i32);
Jan Voungb0a8c242015-06-18 15:00:14 -0700488 int32_t RegNum;
489 if (!CC.I32InReg(&RegNum))
Jan Voungb3401d22015-05-18 09:38:21 -0700490 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700491 Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700492 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700493 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
494 }
495 RegisterArg->setRegNum(RegNum);
496 RegisterArg->setIsArg();
497 Arg->setIsArg(false);
498
499 Args[I] = RegisterArg;
500 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
501 }
502 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700503}
504
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700505// Helper function for addProlog().
506//
507// This assumes Arg is an argument passed on the stack. This sets the
508// frame offset for Arg and updates InArgsSizeBytes according to Arg's
509// width. For an I64 arg that has been split into Lo and Hi components,
510// it calls itself recursively on the components, taking care to handle
511// Lo first because of the little-endian architecture. Lastly, this
512// function generates an instruction to copy Arg into its assigned
513// register if applicable.
514void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
515 size_t BasicFrameOffset,
516 size_t &InArgsSizeBytes) {
517 Variable *Lo = Arg->getLo();
518 Variable *Hi = Arg->getHi();
519 Type Ty = Arg->getType();
520 if (Lo && Hi && Ty == IceType_i64) {
521 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
522 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
523 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
524 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
525 return;
526 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700527 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700528 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
529 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
530 // If the argument variable has been assigned a register, we need to load
531 // the value from the stack slot.
532 if (Arg->hasReg()) {
533 assert(Ty != IceType_i64);
534 OperandARM32Mem *Mem = OperandARM32Mem::create(
535 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
536 Ctx->getConstantInt32(Arg->getStackOffset())));
537 if (isVectorType(Arg->getType())) {
538 UnimplementedError(Func->getContext()->getFlags());
539 } else {
540 _ldr(Arg, Mem);
541 }
542 // This argument-copying instruction uses an explicit
543 // OperandARM32Mem operand instead of a Variable, so its
544 // fill-from-stack operation has to be tracked separately for
545 // statistics.
546 Ctx->statsUpdateFills();
547 }
548}
549
Jan Voungb36ad9b2015-04-21 17:01:49 -0700550Type TargetARM32::stackSlotType() { return IceType_i32; }
551
552void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700553 // Stack frame layout:
554 //
555 // +------------------------+
556 // | 1. preserved registers |
557 // +------------------------+
558 // | 2. padding |
559 // +------------------------+
560 // | 3. global spill area |
561 // +------------------------+
562 // | 4. padding |
563 // +------------------------+
564 // | 5. local spill area |
565 // +------------------------+
566 // | 6. padding |
567 // +------------------------+
568 // | 7. allocas |
569 // +------------------------+
570 //
571 // The following variables record the size in bytes of the given areas:
572 // * PreservedRegsSizeBytes: area 1
573 // * SpillAreaPaddingBytes: area 2
574 // * GlobalsSize: area 3
575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
576 // * LocalsSpillAreaSize: area 5
577 // * SpillAreaSizeBytes: areas 2 - 6
578 // Determine stack frame offsets for each Variable without a
579 // register assignment. This can be done as one variable per stack
580 // slot. Or, do coalescing by running the register allocator again
581 // with an infinite set of registers (as a side effect, this gives
582 // variables a second chance at physical register assignment).
583 //
584 // A middle ground approach is to leverage sparsity and allocate one
585 // block of space on the frame for globals (variables with
586 // multi-block lifetime), and one block to share for locals
587 // (single-block lifetime).
588
589 Context.init(Node);
590 Context.setInsertPoint(Context.getCur());
591
592 llvm::SmallBitVector CalleeSaves =
593 getRegisterSet(RegSet_CalleeSave, RegSet_None);
594 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
595 VarList SortedSpilledVariables;
596 size_t GlobalsSize = 0;
597 // If there is a separate locals area, this represents that area.
598 // Otherwise it counts any variable not counted by GlobalsSize.
599 SpillAreaSizeBytes = 0;
600 // If there is a separate locals area, this specifies the alignment
601 // for it.
602 uint32_t LocalsSlotsAlignmentBytes = 0;
603 // The entire spill locations area gets aligned to largest natural
604 // alignment of the variables that have a spill slot.
605 uint32_t SpillAreaAlignmentBytes = 0;
606 // For now, we don't have target-specific variables that need special
607 // treatment (no stack-slot-linked SpillVariable type).
608 std::function<bool(Variable *)> TargetVarHook =
609 [](Variable *) { return false; };
610
611 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
612 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
613 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
614 &LocalsSlotsAlignmentBytes, TargetVarHook);
615 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
616 SpillAreaSizeBytes += GlobalsSize;
617
618 // Add push instructions for preserved registers.
619 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
620 // Unlike x86, ARM also has callee-saved float/vector registers.
621 // The "vpush" instruction can handle a whole list of float/vector
622 // registers, but it only handles contiguous sequences of registers
623 // by specifying the start and the length.
624 VarList GPRsToPreserve;
625 GPRsToPreserve.reserve(CalleeSaves.size());
626 uint32_t NumCallee = 0;
627 size_t PreservedRegsSizeBytes = 0;
628 // Consider FP and LR as callee-save / used as needed.
629 if (UsesFramePointer) {
630 CalleeSaves[RegARM32::Reg_fp] = true;
631 assert(RegsUsed[RegARM32::Reg_fp] == false);
632 RegsUsed[RegARM32::Reg_fp] = true;
633 }
634 if (!MaybeLeafFunc) {
635 CalleeSaves[RegARM32::Reg_lr] = true;
636 RegsUsed[RegARM32::Reg_lr] = true;
637 }
638 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
639 if (CalleeSaves[i] && RegsUsed[i]) {
640 // TODO(jvoung): do separate vpush for each floating point
641 // register segment and += 4, or 8 depending on type.
642 ++NumCallee;
643 PreservedRegsSizeBytes += 4;
644 GPRsToPreserve.push_back(getPhysicalRegister(i));
645 }
646 }
647 Ctx->statsUpdateRegistersSaved(NumCallee);
648 if (!GPRsToPreserve.empty())
649 _push(GPRsToPreserve);
650
651 // Generate "mov FP, SP" if needed.
652 if (UsesFramePointer) {
653 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
654 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
655 _mov(FP, SP);
656 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
657 Context.insert(InstFakeUse::create(Func, FP));
658 }
659
660 // Align the variables area. SpillAreaPaddingBytes is the size of
661 // the region after the preserved registers and before the spill areas.
662 // LocalsSlotsPaddingBytes is the amount of padding between the globals
663 // and locals area if they are separate.
664 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
665 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
666 uint32_t SpillAreaPaddingBytes = 0;
667 uint32_t LocalsSlotsPaddingBytes = 0;
668 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
669 GlobalsSize, LocalsSlotsAlignmentBytes,
670 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
671 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
672 uint32_t GlobalsAndSubsequentPaddingSize =
673 GlobalsSize + LocalsSlotsPaddingBytes;
674
675 // Align SP if necessary.
676 if (NeedsStackAlignment) {
677 uint32_t StackOffset = PreservedRegsSizeBytes;
678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
679 SpillAreaSizeBytes = StackSize - StackOffset;
680 }
681
682 // Generate "sub sp, SpillAreaSizeBytes"
683 if (SpillAreaSizeBytes) {
684 // Use the IP inter-procedural scratch register if needed to legalize
685 // the immediate.
686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
689 _sub(SP, SP, SubAmount);
690 }
691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
692
693 resetStackAdjustment();
694
695 // Fill in stack offsets for stack args, and copy args into registers
696 // for those that were register-allocated. Args are pushed right to
697 // left, so Arg[0] is closest to the stack/frame pointer.
698 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
699 size_t BasicFrameOffset = PreservedRegsSizeBytes;
700 if (!UsesFramePointer)
701 BasicFrameOffset += SpillAreaSizeBytes;
702
703 const VarList &Args = Func->getArgs();
704 size_t InArgsSizeBytes = 0;
Jan Voungb0a8c242015-06-18 15:00:14 -0700705 TargetARM32::CallingConv CC;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700706 for (Variable *Arg : Args) {
707 Type Ty = Arg->getType();
Jan Voungb0a8c242015-06-18 15:00:14 -0700708 bool InRegs = false;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700709 // Skip arguments passed in registers.
710 if (isVectorType(Ty)) {
711 UnimplementedError(Func->getContext()->getFlags());
712 continue;
713 } else if (isFloatingType(Ty)) {
714 UnimplementedError(Func->getContext()->getFlags());
715 continue;
Jan Voungb0a8c242015-06-18 15:00:14 -0700716 } else if (Ty == IceType_i64) {
717 std::pair<int32_t, int32_t> DummyRegs;
718 InRegs = CC.I64InRegs(&DummyRegs);
719 } else {
720 assert(Ty == IceType_i32);
721 int32_t DummyReg;
722 InRegs = CC.I32InReg(&DummyReg);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700723 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700724 if (!InRegs)
725 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700726 }
727
728 // Fill in stack offsets for locals.
729 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
730 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
731 UsesFramePointer);
732 this->HasComputedFrame = true;
733
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700734 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700735 OstreamLocker L(Func->getContext());
736 Ostream &Str = Func->getContext()->getStrDump();
737
738 Str << "Stack layout:\n";
739 uint32_t SPAdjustmentPaddingSize =
740 SpillAreaSizeBytes - LocalsSpillAreaSize -
741 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
742 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
743 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
744 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
745 << " globals spill area = " << GlobalsSize << " bytes\n"
746 << " globals-locals spill areas intermediate padding = "
747 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
748 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
749 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
750
751 Str << "Stack details:\n"
752 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
753 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
754 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
755 << " bytes\n"
756 << " is FP based = " << UsesFramePointer << "\n";
757 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700758}
759
760void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700761 InstList &Insts = Node->getInsts();
762 InstList::reverse_iterator RI, E;
763 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
764 if (llvm::isa<InstARM32Ret>(*RI))
765 break;
766 }
767 if (RI == E)
768 return;
769
770 // Convert the reverse_iterator position into its corresponding
771 // (forward) iterator position.
772 InstList::iterator InsertPoint = RI.base();
773 --InsertPoint;
774 Context.init(Node);
775 Context.setInsertPoint(InsertPoint);
776
777 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
778 if (UsesFramePointer) {
779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
780 // For late-stage liveness analysis (e.g. asm-verbose mode),
781 // adding a fake use of SP before the assignment of SP=FP keeps
782 // previous SP adjustments from being dead-code eliminated.
783 Context.insert(InstFakeUse::create(Func, SP));
784 _mov(SP, FP);
785 } else {
786 // add SP, SpillAreaSizeBytes
787 if (SpillAreaSizeBytes) {
788 // Use the IP inter-procedural scratch register if needed to legalize
789 // the immediate. It shouldn't be live at this point.
790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
792 _add(SP, SP, AddAmount);
793 }
794 }
795
796 // Add pop instructions for preserved registers.
797 llvm::SmallBitVector CalleeSaves =
798 getRegisterSet(RegSet_CalleeSave, RegSet_None);
799 VarList GPRsToRestore;
800 GPRsToRestore.reserve(CalleeSaves.size());
801 // Consider FP and LR as callee-save / used as needed.
802 if (UsesFramePointer) {
803 CalleeSaves[RegARM32::Reg_fp] = true;
804 }
805 if (!MaybeLeafFunc) {
806 CalleeSaves[RegARM32::Reg_lr] = true;
807 }
808 // Pop registers in ascending order just like push
809 // (instead of in reverse order).
810 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
811 if (CalleeSaves[i] && RegsUsed[i]) {
812 GPRsToRestore.push_back(getPhysicalRegister(i));
813 }
814 }
815 if (!GPRsToRestore.empty())
816 _pop(GPRsToRestore);
817
818 if (!Ctx->getFlags().getUseSandboxing())
819 return;
820
821 // Change the original ret instruction into a sandboxed return sequence.
822 // bundle_lock
823 // bic lr, #0xc000000f
824 // bx lr
825 // bundle_unlock
826 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
827 // restrict to the lower 1GB as well.
828 Operand *RetMask =
829 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
830 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
831 Variable *RetValue = nullptr;
832 if (RI->getSrcSize())
833 RetValue = llvm::cast<Variable>(RI->getSrc(0));
834 _bundle_lock();
835 _bic(LR, LR, RetMask);
836 _ret(LR, RetValue);
837 _bundle_unlock();
838 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700839}
840
Jan Voungb3401d22015-05-18 09:38:21 -0700841void TargetARM32::split64(Variable *Var) {
842 assert(Var->getType() == IceType_i64);
843 Variable *Lo = Var->getLo();
844 Variable *Hi = Var->getHi();
845 if (Lo) {
846 assert(Hi);
847 return;
848 }
849 assert(Hi == nullptr);
850 Lo = Func->makeVariable(IceType_i32);
851 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700852 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700853 Lo->setName(Func, Var->getName(Func) + "__lo");
854 Hi->setName(Func, Var->getName(Func) + "__hi");
855 }
856 Var->setLoHi(Lo, Hi);
857 if (Var->getIsArg()) {
858 Lo->setIsArg();
859 Hi->setIsArg();
860 }
861}
862
863Operand *TargetARM32::loOperand(Operand *Operand) {
864 assert(Operand->getType() == IceType_i64);
865 if (Operand->getType() != IceType_i64)
866 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700867 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700868 split64(Var);
869 return Var->getLo();
870 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700871 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700872 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
873 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700874 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700875 // Conservatively disallow memory operands with side-effects (pre/post
876 // increment) in case of duplication.
877 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
878 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
879 if (Mem->isRegReg()) {
880 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
881 Mem->getIndex(), Mem->getShiftOp(),
882 Mem->getShiftAmt(), Mem->getAddrMode());
883 } else {
884 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
885 Mem->getOffset(), Mem->getAddrMode());
886 }
887 }
888 llvm_unreachable("Unsupported operand type");
889 return nullptr;
890}
891
892Operand *TargetARM32::hiOperand(Operand *Operand) {
893 assert(Operand->getType() == IceType_i64);
894 if (Operand->getType() != IceType_i64)
895 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700896 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700897 split64(Var);
898 return Var->getHi();
899 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700900 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700901 return Ctx->getConstantInt32(
902 static_cast<uint32_t>(Const->getValue() >> 32));
903 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700904 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -0700905 // Conservatively disallow memory operands with side-effects
906 // in case of duplication.
907 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
908 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
909 const Type SplitType = IceType_i32;
910 if (Mem->isRegReg()) {
911 // We have to make a temp variable T, and add 4 to either Base or Index.
912 // The Index may be shifted, so adding 4 can mean something else.
913 // Thus, prefer T := Base + 4, and use T as the new Base.
914 Variable *Base = Mem->getBase();
915 Constant *Four = Ctx->getConstantInt32(4);
916 Variable *NewBase = Func->makeVariable(Base->getType());
917 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
918 Base, Four));
919 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
920 Mem->getShiftOp(), Mem->getShiftAmt(),
921 Mem->getAddrMode());
922 } else {
923 Variable *Base = Mem->getBase();
924 ConstantInteger32 *Offset = Mem->getOffset();
925 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
926 int32_t NextOffsetVal = Offset->getValue() + 4;
927 const bool SignExt = false;
928 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
929 // We have to make a temp variable and add 4 to either Base or Offset.
930 // If we add 4 to Offset, this will convert a non-RegReg addressing
931 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
932 // RegReg addressing modes, prefer adding to base and replacing instead.
933 // Thus we leave the old offset alone.
934 Constant *Four = Ctx->getConstantInt32(4);
935 Variable *NewBase = Func->makeVariable(Base->getType());
936 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
937 NewBase, Base, Four));
938 Base = NewBase;
939 } else {
940 Offset =
941 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
942 }
943 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
944 Mem->getAddrMode());
945 }
946 }
947 llvm_unreachable("Unsupported operand type");
948 return nullptr;
949}
950
Jan Voungb36ad9b2015-04-21 17:01:49 -0700951llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
952 RegSetMask Exclude) const {
953 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
954
955#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
956 isFP) \
957 if (scratch && (Include & RegSet_CallerSave)) \
958 Registers[RegARM32::val] = true; \
959 if (preserved && (Include & RegSet_CalleeSave)) \
960 Registers[RegARM32::val] = true; \
961 if (stackptr && (Include & RegSet_StackPointer)) \
962 Registers[RegARM32::val] = true; \
963 if (frameptr && (Include & RegSet_FramePointer)) \
964 Registers[RegARM32::val] = true; \
965 if (scratch && (Exclude & RegSet_CallerSave)) \
966 Registers[RegARM32::val] = false; \
967 if (preserved && (Exclude & RegSet_CalleeSave)) \
968 Registers[RegARM32::val] = false; \
969 if (stackptr && (Exclude & RegSet_StackPointer)) \
970 Registers[RegARM32::val] = false; \
971 if (frameptr && (Exclude & RegSet_FramePointer)) \
972 Registers[RegARM32::val] = false;
973
974 REGARM32_TABLE
975
976#undef X
977
978 return Registers;
979}
980
981void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
982 UsesFramePointer = true;
983 // Conservatively require the stack to be aligned. Some stack
984 // adjustment operations implemented below assume that the stack is
985 // aligned before the alloca. All the alloca code ensures that the
986 // stack alignment is preserved after the alloca. The stack alignment
987 // restriction can be relaxed in some cases.
988 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -0700989
990 // TODO(stichnot): minimize the number of adjustments of SP, etc.
991 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
992 Variable *Dest = Inst->getDest();
993 uint32_t AlignmentParam = Inst->getAlignInBytes();
994 // For default align=0, set it to the real value 1, to avoid any
995 // bit-manipulation problems below.
996 AlignmentParam = std::max(AlignmentParam, 1u);
997
998 // LLVM enforces power of 2 alignment.
999 assert(llvm::isPowerOf2_32(AlignmentParam));
1000 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1001
1002 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1003 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1004 alignRegisterPow2(SP, Alignment);
1005 }
1006 Operand *TotalSize = Inst->getSizeInBytes();
1007 if (const auto *ConstantTotalSize =
1008 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1009 uint32_t Value = ConstantTotalSize->getValue();
1010 Value = Utils::applyAlignment(Value, Alignment);
1011 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1012 _sub(SP, SP, SubAmount);
1013 } else {
1014 // Non-constant sizes need to be adjusted to the next highest
1015 // multiple of the required alignment at runtime.
Jan Voungfbdd2442015-07-15 12:36:20 -07001016 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
Jan Voung55500db2015-05-26 14:25:40 -07001017 Variable *T = makeReg(IceType_i32);
1018 _mov(T, TotalSize);
1019 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1020 _add(T, T, AddAmount);
1021 alignRegisterPow2(T, Alignment);
1022 _sub(SP, SP, T);
1023 }
1024 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001025}
1026
Jan Voung6ec369e2015-06-30 11:03:15 -07001027void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1028 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1029 return;
1030 Variable *SrcLoReg = legalizeToVar(SrcLo);
1031 switch (Ty) {
1032 default:
1033 llvm_unreachable("Unexpected type");
1034 case IceType_i8: {
1035 Operand *Mask =
1036 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1037 _tst(SrcLoReg, Mask);
1038 break;
1039 }
1040 case IceType_i16: {
1041 Operand *Mask =
1042 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
1043 _tst(SrcLoReg, Mask);
1044 break;
1045 }
1046 case IceType_i32: {
1047 _tst(SrcLoReg, SrcLoReg);
1048 break;
1049 }
1050 case IceType_i64: {
1051 Variable *ScratchReg = makeReg(IceType_i32);
1052 _orrs(ScratchReg, SrcLoReg, SrcHi);
1053 // ScratchReg isn't going to be used, but we need the
1054 // side-effect of setting flags from this operation.
1055 Context.insert(InstFakeUse::create(Func, ScratchReg));
1056 }
1057 }
1058 InstARM32Label *Label = InstARM32Label::create(Func, this);
1059 _br(Label, CondARM32::NE);
1060 _trap();
1061 Context.insert(Label);
1062}
1063
1064void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1065 Operand *Src1, ExtInstr ExtFunc,
1066 DivInstr DivFunc, const char *DivHelperName,
1067 bool IsRemainder) {
1068 div0Check(Dest->getType(), Src1, nullptr);
1069 Variable *Src1R = legalizeToVar(Src1);
1070 Variable *T0R = Src0R;
1071 Variable *T1R = Src1R;
1072 if (Dest->getType() != IceType_i32) {
1073 T0R = makeReg(IceType_i32);
1074 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
1075 T1R = makeReg(IceType_i32);
1076 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
1077 }
1078 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
1079 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
1080 if (IsRemainder) {
1081 Variable *T2 = makeReg(IceType_i32);
1082 _mls(T2, T, T1R, T0R);
1083 T = T2;
1084 }
1085 _mov(Dest, T);
1086 } else {
1087 constexpr SizeT MaxSrcs = 2;
1088 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1089 Call->addArg(T0R);
1090 Call->addArg(T1R);
1091 lowerCall(Call);
1092 }
1093 return;
1094}
1095
Jan Voungb36ad9b2015-04-21 17:01:49 -07001096void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001097 Variable *Dest = Inst->getDest();
1098 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
1099 // to legalize Src0 to flex or Src1 to flex and there is a reversible
1100 // instruction. E.g., reverse subtract with immediate, register vs
1101 // register, immediate.
1102 // Or it may be the case that the operands aren't swapped, but the
1103 // bits can be flipped and a different operation applied.
1104 // E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voungfbdd2442015-07-15 12:36:20 -07001105 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1106 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voungb3401d22015-05-18 09:38:21 -07001107 if (Dest->getType() == IceType_i64) {
Jan Voung70fa5252015-07-06 14:01:25 -07001108 // These helper-call-involved instructions are lowered in this
1109 // separate switch. This is because we would otherwise assume that
1110 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
1111 // with helper calls, and such unused/redundant instructions will fail
1112 // liveness analysis under -Om1 setting.
1113 switch (Inst->getOp()) {
1114 default:
1115 break;
1116 case InstArithmetic::Udiv:
1117 case InstArithmetic::Sdiv:
1118 case InstArithmetic::Urem:
1119 case InstArithmetic::Srem: {
1120 // Check for divide by 0 (ARM normally doesn't trap, but we want it
1121 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
1122 // to a register, which will hide a constant source operand.
1123 // Instead, check the not-yet-legalized Src1 to optimize-out a divide
1124 // by 0 check.
1125 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1126 if (C64->getValue() == 0) {
1127 _trap();
1128 return;
1129 }
1130 } else {
1131 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1132 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1133 div0Check(IceType_i64, Src1Lo, Src1Hi);
1134 }
1135 // Technically, ARM has their own aeabi routines, but we can use the
1136 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
1137 // but uses the more standard __moddi3 for rem.
1138 const char *HelperName = "";
1139 switch (Inst->getOp()) {
1140 default:
1141 llvm_unreachable("Should have only matched div ops.");
1142 break;
1143 case InstArithmetic::Udiv:
1144 HelperName = H_udiv_i64;
1145 break;
1146 case InstArithmetic::Sdiv:
1147 HelperName = H_sdiv_i64;
1148 break;
1149 case InstArithmetic::Urem:
1150 HelperName = H_urem_i64;
1151 break;
1152 case InstArithmetic::Srem:
1153 HelperName = H_srem_i64;
1154 break;
1155 }
1156 constexpr SizeT MaxSrcs = 2;
1157 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1158 Call->addArg(Src0);
1159 Call->addArg(Src1);
1160 lowerCall(Call);
1161 return;
1162 }
1163 }
Jan Voung29719972015-05-19 11:24:51 -07001164 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1165 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1166 Variable *Src0RLo = legalizeToVar(loOperand(Src0));
1167 Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
Jan Voung70fa5252015-07-06 14:01:25 -07001168 Operand *Src1Lo = loOperand(Src1);
1169 Operand *Src1Hi = hiOperand(Src1);
Jan Voung29719972015-05-19 11:24:51 -07001170 Variable *T_Lo = makeReg(DestLo->getType());
1171 Variable *T_Hi = makeReg(DestHi->getType());
1172 switch (Inst->getOp()) {
1173 case InstArithmetic::_num:
1174 llvm_unreachable("Unknown arithmetic operator");
Jan Voung70fa5252015-07-06 14:01:25 -07001175 return;
Jan Voung29719972015-05-19 11:24:51 -07001176 case InstArithmetic::Add:
Jan Voung70fa5252015-07-06 14:01:25 -07001177 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1178 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001179 _adds(T_Lo, Src0RLo, Src1Lo);
1180 _mov(DestLo, T_Lo);
1181 _adc(T_Hi, Src0RHi, Src1Hi);
1182 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001183 return;
Jan Voung29719972015-05-19 11:24:51 -07001184 case InstArithmetic::And:
Jan Voung70fa5252015-07-06 14:01:25 -07001185 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1186 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001187 _and(T_Lo, Src0RLo, Src1Lo);
1188 _mov(DestLo, T_Lo);
1189 _and(T_Hi, Src0RHi, Src1Hi);
1190 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001191 return;
Jan Voung29719972015-05-19 11:24:51 -07001192 case InstArithmetic::Or:
Jan Voung70fa5252015-07-06 14:01:25 -07001193 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1194 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001195 _orr(T_Lo, Src0RLo, Src1Lo);
1196 _mov(DestLo, T_Lo);
1197 _orr(T_Hi, Src0RHi, Src1Hi);
1198 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001199 return;
Jan Voung29719972015-05-19 11:24:51 -07001200 case InstArithmetic::Xor:
Jan Voung70fa5252015-07-06 14:01:25 -07001201 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1202 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001203 _eor(T_Lo, Src0RLo, Src1Lo);
1204 _mov(DestLo, T_Lo);
1205 _eor(T_Hi, Src0RHi, Src1Hi);
1206 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001207 return;
Jan Voung29719972015-05-19 11:24:51 -07001208 case InstArithmetic::Sub:
Jan Voung70fa5252015-07-06 14:01:25 -07001209 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1210 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001211 _subs(T_Lo, Src0RLo, Src1Lo);
1212 _mov(DestLo, T_Lo);
1213 _sbc(T_Hi, Src0RHi, Src1Hi);
1214 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001215 return;
Jan Voung29719972015-05-19 11:24:51 -07001216 case InstArithmetic::Mul: {
1217 // GCC 4.8 does:
1218 // a=b*c ==>
1219 // t_acc =(mul) (b.lo * c.hi)
1220 // t_acc =(mla) (c.lo * b.hi) + t_acc
1221 // t.hi,t.lo =(umull) b.lo * c.lo
1222 // t.hi += t_acc
1223 // a.lo = t.lo
1224 // a.hi = t.hi
1225 //
1226 // LLVM does:
1227 // t.hi,t.lo =(umull) b.lo * c.lo
1228 // t.hi =(mla) (b.lo * c.hi) + t.hi
1229 // t.hi =(mla) (b.hi * c.lo) + t.hi
1230 // a.lo = t.lo
1231 // a.hi = t.hi
1232 //
1233 // LLVM's lowering has fewer instructions, but more register pressure:
1234 // t.lo is live from beginning to end, while GCC delays the two-dest
1235 // instruction till the end, and kills c.hi immediately.
1236 Variable *T_Acc = makeReg(IceType_i32);
1237 Variable *T_Acc1 = makeReg(IceType_i32);
1238 Variable *T_Hi1 = makeReg(IceType_i32);
1239 Variable *Src1RLo = legalizeToVar(Src1Lo);
1240 Variable *Src1RHi = legalizeToVar(Src1Hi);
1241 _mul(T_Acc, Src0RLo, Src1RHi);
1242 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1243 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1244 _add(T_Hi, T_Hi1, T_Acc1);
1245 _mov(DestLo, T_Lo);
1246 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001247 return;
1248 }
Jan Voung66c3d5e2015-06-04 17:02:31 -07001249 case InstArithmetic::Shl: {
1250 // a=b<<c ==>
1251 // GCC 4.8 does:
1252 // sub t_c1, c.lo, #32
1253 // lsl t_hi, b.hi, c.lo
1254 // orr t_hi, t_hi, b.lo, lsl t_c1
1255 // rsb t_c2, c.lo, #32
1256 // orr t_hi, t_hi, b.lo, lsr t_c2
1257 // lsl t_lo, b.lo, c.lo
1258 // a.lo = t_lo
1259 // a.hi = t_hi
1260 // Can be strength-reduced for constant-shifts, but we don't do
1261 // that for now.
1262 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
1263 // On ARM, shifts only take the lower 8 bits of the shift register,
1264 // and saturate to the range 0-32, so the negative value will
1265 // saturate to 32.
1266 Variable *T_Hi = makeReg(IceType_i32);
1267 Variable *Src1RLo = legalizeToVar(Src1Lo);
1268 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1269 Variable *T_C1 = makeReg(IceType_i32);
1270 Variable *T_C2 = makeReg(IceType_i32);
1271 _sub(T_C1, Src1RLo, ThirtyTwo);
1272 _lsl(T_Hi, Src0RHi, Src1RLo);
1273 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1274 OperandARM32::LSL, T_C1));
1275 _rsb(T_C2, Src1RLo, ThirtyTwo);
1276 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1277 OperandARM32::LSR, T_C2));
1278 _mov(DestHi, T_Hi);
1279 Variable *T_Lo = makeReg(IceType_i32);
1280 // _mov seems to sometimes have better register preferencing than lsl.
1281 // Otherwise mov w/ lsl shifted register is a pseudo-instruction
1282 // that maps to lsl.
1283 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1284 OperandARM32::LSL, Src1RLo));
1285 _mov(DestLo, T_Lo);
Jan Voung70fa5252015-07-06 14:01:25 -07001286 return;
1287 }
Jan Voung29719972015-05-19 11:24:51 -07001288 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001289 // a=b>>c (unsigned) ==>
1290 // GCC 4.8 does:
1291 // rsb t_c1, c.lo, #32
1292 // lsr t_lo, b.lo, c.lo
1293 // orr t_lo, t_lo, b.hi, lsl t_c1
1294 // sub t_c2, c.lo, #32
1295 // orr t_lo, t_lo, b.hi, lsr t_c2
1296 // lsr t_hi, b.hi, c.lo
1297 // a.lo = t_lo
1298 // a.hi = t_hi
1299 case InstArithmetic::Ashr: {
1300 // a=b>>c (signed) ==> ...
1301 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
1302 // and the next orr should be conditioned on PLUS. The last two
1303 // right shifts should also be arithmetic.
1304 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1305 Variable *T_Lo = makeReg(IceType_i32);
1306 Variable *Src1RLo = legalizeToVar(Src1Lo);
1307 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1308 Variable *T_C1 = makeReg(IceType_i32);
1309 Variable *T_C2 = makeReg(IceType_i32);
1310 _rsb(T_C1, Src1RLo, ThirtyTwo);
1311 _lsr(T_Lo, Src0RLo, Src1RLo);
1312 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1313 OperandARM32::LSL, T_C1));
1314 OperandARM32::ShiftKind RShiftKind;
1315 CondARM32::Cond Pred;
1316 if (IsAshr) {
1317 _subs(T_C2, Src1RLo, ThirtyTwo);
1318 RShiftKind = OperandARM32::ASR;
1319 Pred = CondARM32::PL;
1320 } else {
1321 _sub(T_C2, Src1RLo, ThirtyTwo);
1322 RShiftKind = OperandARM32::LSR;
1323 Pred = CondARM32::AL;
1324 }
1325 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1326 RShiftKind, T_C2),
1327 Pred);
1328 _mov(DestLo, T_Lo);
1329 Variable *T_Hi = makeReg(IceType_i32);
1330 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1331 RShiftKind, Src1RLo));
1332 _mov(DestHi, T_Hi);
Jan Voung6ec369e2015-06-30 11:03:15 -07001333 return;
1334 }
Jan Voung29719972015-05-19 11:24:51 -07001335 case InstArithmetic::Fadd:
1336 case InstArithmetic::Fsub:
1337 case InstArithmetic::Fmul:
1338 case InstArithmetic::Fdiv:
1339 case InstArithmetic::Frem:
1340 llvm_unreachable("FP instruction with i64 type");
Jan Voung70fa5252015-07-06 14:01:25 -07001341 return;
1342 case InstArithmetic::Udiv:
1343 case InstArithmetic::Sdiv:
1344 case InstArithmetic::Urem:
1345 case InstArithmetic::Srem:
1346 llvm_unreachable("Call-helper-involved instruction for i64 type "
1347 "should have already been handled before");
1348 return;
Jan Voung29719972015-05-19 11:24:51 -07001349 }
Jan Voung70fa5252015-07-06 14:01:25 -07001350 return;
Jan Voungb3401d22015-05-18 09:38:21 -07001351 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001352 UnimplementedError(Func->getContext()->getFlags());
Jan Voung70fa5252015-07-06 14:01:25 -07001353 return;
1354 }
1355 // Dest->getType() is a non-i64 scalar.
1356 Variable *Src0R = legalizeToVar(Src0);
1357 Variable *T = makeReg(Dest->getType());
1358 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1359 // whether or not Src1 is a non-zero constant. Once legalized it is more
1360 // difficult to determine (constant may be moved to a register).
1361 switch (Inst->getOp()) {
1362 default:
1363 break;
1364 case InstArithmetic::Udiv: {
1365 constexpr bool IsRemainder = false;
1366 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1367 H_udiv_i32, IsRemainder);
1368 return;
1369 }
1370 case InstArithmetic::Sdiv: {
1371 constexpr bool IsRemainder = false;
1372 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1373 H_sdiv_i32, IsRemainder);
1374 return;
1375 }
1376 case InstArithmetic::Urem: {
1377 constexpr bool IsRemainder = true;
1378 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1379 H_urem_i32, IsRemainder);
1380 return;
1381 }
1382 case InstArithmetic::Srem: {
1383 constexpr bool IsRemainder = true;
1384 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1385 H_srem_i32, IsRemainder);
1386 return;
1387 }
1388 }
1389
1390 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1391 switch (Inst->getOp()) {
1392 case InstArithmetic::_num:
1393 llvm_unreachable("Unknown arithmetic operator");
1394 return;
1395 case InstArithmetic::Add:
1396 _add(T, Src0R, Src1RF);
1397 _mov(Dest, T);
1398 return;
1399 case InstArithmetic::And:
1400 _and(T, Src0R, Src1RF);
1401 _mov(Dest, T);
1402 return;
1403 case InstArithmetic::Or:
1404 _orr(T, Src0R, Src1RF);
1405 _mov(Dest, T);
1406 return;
1407 case InstArithmetic::Xor:
1408 _eor(T, Src0R, Src1RF);
1409 _mov(Dest, T);
1410 return;
1411 case InstArithmetic::Sub:
1412 _sub(T, Src0R, Src1RF);
1413 _mov(Dest, T);
1414 return;
1415 case InstArithmetic::Mul: {
1416 Variable *Src1R = legalizeToVar(Src1RF);
1417 _mul(T, Src0R, Src1R);
1418 _mov(Dest, T);
1419 return;
1420 }
1421 case InstArithmetic::Shl:
1422 _lsl(T, Src0R, Src1RF);
1423 _mov(Dest, T);
1424 return;
1425 case InstArithmetic::Lshr:
1426 _lsr(T, Src0R, Src1RF);
1427 _mov(Dest, T);
1428 return;
1429 case InstArithmetic::Ashr:
1430 _asr(T, Src0R, Src1RF);
1431 _mov(Dest, T);
1432 return;
1433 case InstArithmetic::Udiv:
1434 case InstArithmetic::Sdiv:
1435 case InstArithmetic::Urem:
1436 case InstArithmetic::Srem:
1437 llvm_unreachable("Integer div/rem should have been handled earlier.");
1438 return;
1439 case InstArithmetic::Fadd:
1440 UnimplementedError(Func->getContext()->getFlags());
1441 return;
1442 case InstArithmetic::Fsub:
1443 UnimplementedError(Func->getContext()->getFlags());
1444 return;
1445 case InstArithmetic::Fmul:
1446 UnimplementedError(Func->getContext()->getFlags());
1447 return;
1448 case InstArithmetic::Fdiv:
1449 UnimplementedError(Func->getContext()->getFlags());
1450 return;
1451 case InstArithmetic::Frem:
1452 UnimplementedError(Func->getContext()->getFlags());
1453 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001454 }
1455}
1456
1457void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001458 Variable *Dest = Inst->getDest();
1459 Operand *Src0 = Inst->getSrc(0);
1460 assert(Dest->getType() == Src0->getType());
1461 if (Dest->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001462 Src0 = legalizeUndef(Src0);
1463 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1464 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
Jan Voungb3401d22015-05-18 09:38:21 -07001465 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1466 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1467 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1468 _mov(T_Lo, Src0Lo);
1469 _mov(DestLo, T_Lo);
1470 _mov(T_Hi, Src0Hi);
1471 _mov(DestHi, T_Hi);
1472 } else {
1473 Operand *SrcR;
1474 if (Dest->hasReg()) {
1475 // If Dest already has a physical register, then legalize the
1476 // Src operand into a Variable with the same register
1477 // assignment. This is mostly a workaround for advanced phi
1478 // lowering's ad-hoc register allocation which assumes no
1479 // register allocation is needed when at least one of the
1480 // operands is non-memory.
1481 // TODO(jvoung): check this for ARM.
1482 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
1483 } else {
1484 // Dest could be a stack operand. Since we could potentially need
1485 // to do a Store (and store can only have Register operands),
1486 // legalize this to a register.
1487 SrcR = legalize(Src0, Legal_Reg);
1488 }
1489 if (isVectorType(Dest->getType())) {
1490 UnimplementedError(Func->getContext()->getFlags());
1491 } else {
1492 _mov(Dest, SrcR);
1493 }
1494 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001495}
1496
1497void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001498 if (Inst->isUnconditional()) {
1499 _br(Inst->getTargetUnconditional());
1500 return;
1501 }
1502 Operand *Cond = Inst->getCondition();
1503 // TODO(jvoung): Handle folding opportunities.
1504
1505 Variable *Src0R = legalizeToVar(Cond);
1506 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1507 _cmp(Src0R, Zero);
Jan Voung6ec369e2015-06-30 11:03:15 -07001508 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001509}
1510
Jan Voung3bfd99a2015-05-22 16:35:25 -07001511void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001512 MaybeLeafFunc = false;
Jan Voungb0a8c242015-06-18 15:00:14 -07001513 NeedsStackAlignment = true;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001514
Jan Voungb0a8c242015-06-18 15:00:14 -07001515 // Assign arguments to registers and stack. Also reserve stack.
1516 TargetARM32::CallingConv CC;
1517 // Pair of Arg Operand -> GPR number assignments.
1518 llvm::SmallVector<std::pair<Operand *, int32_t>,
1519 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
1520 // Pair of Arg Operand -> stack offset.
1521 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1522 int32_t ParameterAreaSizeBytes = 0;
1523
1524 // Classify each argument operand according to the location where the
1525 // argument is passed.
1526 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001527 Operand *Arg = legalizeUndef(Instr->getArg(i));
Jan Voungb0a8c242015-06-18 15:00:14 -07001528 Type Ty = Arg->getType();
1529 bool InRegs = false;
1530 if (isVectorType(Ty)) {
1531 UnimplementedError(Func->getContext()->getFlags());
1532 } else if (isFloatingType(Ty)) {
1533 UnimplementedError(Func->getContext()->getFlags());
1534 } else if (Ty == IceType_i64) {
1535 std::pair<int32_t, int32_t> Regs;
1536 if (CC.I64InRegs(&Regs)) {
1537 InRegs = true;
1538 Operand *Lo = loOperand(Arg);
1539 Operand *Hi = hiOperand(Arg);
1540 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1541 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1542 }
1543 } else {
1544 assert(Ty == IceType_i32);
1545 int32_t Reg;
1546 if (CC.I32InReg(&Reg)) {
1547 InRegs = true;
1548 GPRArgs.push_back(std::make_pair(Arg, Reg));
1549 }
1550 }
1551
1552 if (!InRegs) {
1553 ParameterAreaSizeBytes =
1554 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1555 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1556 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1557 }
1558 }
1559
1560 // Adjust the parameter area so that the stack is aligned. It is
1561 // assumed that the stack is already aligned at the start of the
1562 // calling sequence.
1563 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1564
1565 // Subtract the appropriate amount for the argument area. This also
1566 // takes care of setting the stack adjustment during emission.
1567 //
1568 // TODO: If for some reason the call instruction gets dead-code
1569 // eliminated after lowering, we would need to ensure that the
1570 // pre-call and the post-call esp adjustment get eliminated as well.
1571 if (ParameterAreaSizeBytes) {
1572 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1573 Legal_Reg | Legal_Flex);
1574 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1575 }
1576
1577 // Copy arguments that are passed on the stack to the appropriate
1578 // stack locations.
Jan Voungf645d852015-07-09 10:35:09 -07001579 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001580 for (auto &StackArg : StackArgs) {
1581 ConstantInteger32 *Loc =
1582 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1583 Type Ty = StackArg.first->getType();
1584 OperandARM32Mem *Addr;
1585 constexpr bool SignExt = false;
1586 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1587 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1588 } else {
1589 Variable *NewBase = Func->makeVariable(SP->getType());
1590 lowerArithmetic(
1591 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1592 Addr = formMemoryOperand(NewBase, Ty);
1593 }
1594 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1595 }
1596
1597 // Copy arguments to be passed in registers to the appropriate registers.
1598 for (auto &GPRArg : GPRArgs) {
1599 Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
1600 // Generate a FakeUse of register arguments so that they do not get
1601 // dead code eliminated as a result of the FakeKill of scratch
1602 // registers after the call.
1603 Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001604 }
1605
1606 // Generate the call instruction. Assign its result to a temporary
1607 // with high register allocation weight.
1608 Variable *Dest = Instr->getDest();
1609 // ReturnReg doubles as ReturnRegLo as necessary.
1610 Variable *ReturnReg = nullptr;
1611 Variable *ReturnRegHi = nullptr;
1612 if (Dest) {
1613 switch (Dest->getType()) {
1614 case IceType_NUM:
1615 llvm_unreachable("Invalid Call dest type");
1616 break;
1617 case IceType_void:
1618 break;
1619 case IceType_i1:
1620 case IceType_i8:
1621 case IceType_i16:
1622 case IceType_i32:
1623 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1624 break;
1625 case IceType_i64:
1626 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1627 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1628 break;
1629 case IceType_f32:
1630 case IceType_f64:
1631 // Use S and D regs.
1632 UnimplementedError(Func->getContext()->getFlags());
1633 break;
1634 case IceType_v4i1:
1635 case IceType_v8i1:
1636 case IceType_v16i1:
1637 case IceType_v16i8:
1638 case IceType_v8i16:
1639 case IceType_v4i32:
1640 case IceType_v4f32:
1641 // Use Q regs.
1642 UnimplementedError(Func->getContext()->getFlags());
1643 break;
1644 }
1645 }
1646 Operand *CallTarget = Instr->getCallTarget();
Jan Voungb0a8c242015-06-18 15:00:14 -07001647 // TODO(jvoung): Handle sandboxing.
1648 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
1649
Jan Voung3bfd99a2015-05-22 16:35:25 -07001650 // Allow ConstantRelocatable to be left alone as a direct call,
1651 // but force other constants like ConstantInteger32 to be in
1652 // a register and make it an indirect call.
1653 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1654 CallTarget = legalize(CallTarget, Legal_Reg);
1655 }
1656 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1657 Context.insert(NewCall);
1658 if (ReturnRegHi)
1659 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1660
Jan Voungb0a8c242015-06-18 15:00:14 -07001661 // Add the appropriate offset to SP. The call instruction takes care
1662 // of resetting the stack offset during emission.
1663 if (ParameterAreaSizeBytes) {
1664 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1665 Legal_Reg | Legal_Flex);
Jan Voungf645d852015-07-09 10:35:09 -07001666 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001667 _add(SP, SP, AddAmount);
1668 }
1669
Jan Voung3bfd99a2015-05-22 16:35:25 -07001670 // Insert a register-kill pseudo instruction.
1671 Context.insert(InstFakeKill::create(Func, NewCall));
1672
1673 // Generate a FakeUse to keep the call live if necessary.
1674 if (Instr->hasSideEffects() && ReturnReg) {
1675 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1676 Context.insert(FakeUse);
1677 }
1678
1679 if (!Dest)
1680 return;
1681
1682 // Assign the result of the call to Dest.
1683 if (ReturnReg) {
1684 if (ReturnRegHi) {
1685 assert(Dest->getType() == IceType_i64);
1686 split64(Dest);
1687 Variable *DestLo = Dest->getLo();
1688 Variable *DestHi = Dest->getHi();
1689 _mov(DestLo, ReturnReg);
1690 _mov(DestHi, ReturnRegHi);
1691 } else {
1692 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1693 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1694 isVectorType(Dest->getType()));
1695 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
1696 UnimplementedError(Func->getContext()->getFlags());
1697 } else {
1698 _mov(Dest, ReturnReg);
1699 }
1700 }
1701 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001702}
1703
1704void TargetARM32::lowerCast(const InstCast *Inst) {
1705 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001706 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07001707 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Jan Voungb36ad9b2015-04-21 17:01:49 -07001708 switch (CastKind) {
1709 default:
1710 Func->setError("Cast type not supported");
1711 return;
1712 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001713 if (isVectorType(Dest->getType())) {
1714 UnimplementedError(Func->getContext()->getFlags());
1715 } else if (Dest->getType() == IceType_i64) {
1716 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1717 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1718 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1719 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1720 Variable *T_Lo = makeReg(DestLo->getType());
1721 if (Src0->getType() == IceType_i32) {
1722 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1723 _mov(T_Lo, Src0RF);
1724 } else if (Src0->getType() == IceType_i1) {
1725 Variable *Src0R = legalizeToVar(Src0);
1726 _lsl(T_Lo, Src0R, ShiftAmt);
1727 _asr(T_Lo, T_Lo, ShiftAmt);
1728 } else {
1729 Variable *Src0R = legalizeToVar(Src0);
1730 _sxt(T_Lo, Src0R);
1731 }
1732 _mov(DestLo, T_Lo);
1733 Variable *T_Hi = makeReg(DestHi->getType());
1734 if (Src0->getType() != IceType_i1) {
1735 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1736 OperandARM32::ASR, ShiftAmt));
1737 } else {
1738 // For i1, the asr instruction is already done above.
1739 _mov(T_Hi, T_Lo);
1740 }
1741 _mov(DestHi, T_Hi);
1742 } else if (Src0->getType() == IceType_i1) {
1743 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1744 // lsl t1, src_reg, 31
1745 // asr t1, t1, 31
1746 // dst = t1
1747 Variable *Src0R = legalizeToVar(Src0);
1748 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1749 Variable *T = makeReg(Dest->getType());
1750 _lsl(T, Src0R, ShiftAmt);
1751 _asr(T, T, ShiftAmt);
1752 _mov(Dest, T);
1753 } else {
1754 // t1 = sxt src; dst = t1
1755 Variable *Src0R = legalizeToVar(Src0);
1756 Variable *T = makeReg(Dest->getType());
1757 _sxt(T, Src0R);
1758 _mov(Dest, T);
1759 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001760 break;
1761 }
1762 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001763 if (isVectorType(Dest->getType())) {
1764 UnimplementedError(Func->getContext()->getFlags());
1765 } else if (Dest->getType() == IceType_i64) {
1766 // t1=uxtb src; dst.lo=t1; dst.hi=0
1767 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1768 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1769 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1770 Variable *T_Lo = makeReg(DestLo->getType());
1771 // i32 and i1 can just take up the whole register.
1772 // i32 doesn't need uxt, while i1 will have an and mask later anyway.
1773 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
1774 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1775 _mov(T_Lo, Src0RF);
1776 } else {
1777 Variable *Src0R = legalizeToVar(Src0);
1778 _uxt(T_Lo, Src0R);
1779 }
1780 if (Src0->getType() == IceType_i1) {
1781 Constant *One = Ctx->getConstantInt32(1);
1782 _and(T_Lo, T_Lo, One);
1783 }
1784 _mov(DestLo, T_Lo);
1785 Variable *T_Hi = makeReg(DestLo->getType());
1786 _mov(T_Hi, Zero);
1787 _mov(DestHi, T_Hi);
1788 } else if (Src0->getType() == IceType_i1) {
1789 // t = Src0; t &= 1; Dest = t
1790 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1791 Constant *One = Ctx->getConstantInt32(1);
1792 Variable *T = makeReg(Dest->getType());
1793 // Just use _mov instead of _uxt since all registers are 32-bit.
1794 // _uxt requires the source to be a register so could have required
1795 // a _mov from legalize anyway.
1796 _mov(T, Src0RF);
1797 _and(T, T, One);
1798 _mov(Dest, T);
1799 } else {
1800 // t1 = uxt src; dst = t1
1801 Variable *Src0R = legalizeToVar(Src0);
1802 Variable *T = makeReg(Dest->getType());
1803 _uxt(T, Src0R);
1804 _mov(Dest, T);
1805 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001806 break;
1807 }
1808 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001809 if (isVectorType(Dest->getType())) {
1810 UnimplementedError(Func->getContext()->getFlags());
1811 } else {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001812 if (Src0->getType() == IceType_i64)
1813 Src0 = loOperand(Src0);
1814 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1815 // t1 = trunc Src0RF; Dest = t1
1816 Variable *T = makeReg(Dest->getType());
1817 _mov(T, Src0RF);
1818 if (Dest->getType() == IceType_i1)
1819 _and(T, T, Ctx->getConstantInt1(1));
1820 _mov(Dest, T);
1821 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001822 break;
1823 }
1824 case InstCast::Fptrunc:
Jan Voungb2d50842015-05-12 09:53:50 -07001825 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001826 break;
1827 case InstCast::Fpext: {
Jan Voungb2d50842015-05-12 09:53:50 -07001828 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001829 break;
1830 }
1831 case InstCast::Fptosi:
Jan Voungb2d50842015-05-12 09:53:50 -07001832 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001833 break;
1834 case InstCast::Fptoui:
Jan Voungb2d50842015-05-12 09:53:50 -07001835 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001836 break;
1837 case InstCast::Sitofp:
Jan Voungb2d50842015-05-12 09:53:50 -07001838 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001839 break;
1840 case InstCast::Uitofp: {
Jan Voungb2d50842015-05-12 09:53:50 -07001841 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001842 break;
1843 }
1844 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001845 Operand *Src0 = Inst->getSrc(0);
1846 if (Dest->getType() == Src0->getType()) {
1847 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
1848 lowerAssign(Assign);
1849 return;
1850 }
Jan Voungb2d50842015-05-12 09:53:50 -07001851 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001852 break;
1853 }
1854 }
1855}
1856
1857void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
1858 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001859 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001860}
1861
1862void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
1863 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001864 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001865}
1866
1867void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001868 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07001869 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1870 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001871
1872 if (isVectorType(Dest->getType())) {
1873 UnimplementedError(Func->getContext()->getFlags());
1874 return;
1875 }
1876
1877 // a=icmp cond, b, c ==>
1878 // GCC does:
1879 // cmp b.hi, c.hi or cmp b.lo, c.lo
1880 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
1881 // mov.<C1> t, #1 mov.<C1> t, #1
1882 // mov.<C2> t, #0 mov.<C2> t, #0
1883 // mov a, t mov a, t
1884 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
1885 // is used for signed compares. In some cases, b and c need to be swapped
1886 // as well.
1887 //
1888 // LLVM does:
1889 // for EQ and NE:
1890 // eor t1, b.hi, c.hi
1891 // eor t2, b.lo, c.hi
1892 // orrs t, t1, t2
1893 // mov.<C> t, #1
1894 // mov a, t
1895 //
1896 // that's nice in that it's just as short but has fewer dependencies
1897 // for better ILP at the cost of more registers.
1898 //
1899 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
1900 // two unconditional mov #0, two cmps, two conditional mov #1,
1901 // and one conditonal reg mov. That has few dependencies for good ILP,
1902 // but is a longer sequence.
1903 //
1904 // So, we are going with the GCC version since it's usually better (except
1905 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
1906 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1907 Constant *One = Ctx->getConstantInt32(1);
1908 if (Src0->getType() == IceType_i64) {
1909 InstIcmp::ICond Conditon = Inst->getCondition();
1910 size_t Index = static_cast<size_t>(Conditon);
1911 assert(Index < TableIcmp64Size);
1912 Variable *Src0Lo, *Src0Hi;
1913 Operand *Src1LoRF, *Src1HiRF;
1914 if (TableIcmp64[Index].Swapped) {
1915 Src0Lo = legalizeToVar(loOperand(Src1));
1916 Src0Hi = legalizeToVar(hiOperand(Src1));
1917 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1918 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1919 } else {
1920 Src0Lo = legalizeToVar(loOperand(Src0));
1921 Src0Hi = legalizeToVar(hiOperand(Src0));
1922 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1923 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1924 }
1925 Variable *T = makeReg(IceType_i32);
1926 if (TableIcmp64[Index].IsSigned) {
1927 Variable *ScratchReg = makeReg(IceType_i32);
1928 _cmp(Src0Lo, Src1LoRF);
1929 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
1930 // ScratchReg isn't going to be used, but we need the
1931 // side-effect of setting flags from this operation.
1932 Context.insert(InstFakeUse::create(Func, ScratchReg));
1933 } else {
1934 _cmp(Src0Hi, Src1HiRF);
1935 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
1936 }
1937 _mov(T, One, TableIcmp64[Index].C1);
1938 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
1939 _mov(Dest, T);
1940 return;
1941 }
1942
1943 // a=icmp cond b, c ==>
1944 // GCC does:
1945 // <u/s>xtb tb, b
1946 // <u/s>xtb tc, c
1947 // cmp tb, tc
1948 // mov.C1 t, #0
1949 // mov.C2 t, #1
1950 // mov a, t
1951 // where the unsigned/sign extension is not needed for 32-bit.
1952 // They also have special cases for EQ and NE. E.g., for NE:
1953 // <extend to tb, tc>
1954 // subs t, tb, tc
1955 // movne t, #1
1956 // mov a, t
1957 //
1958 // LLVM does:
1959 // lsl tb, b, #<N>
1960 // mov t, #0
1961 // cmp tb, c, lsl #<N>
1962 // mov.<C> t, #1
1963 // mov a, t
1964 //
1965 // the left shift is by 0, 16, or 24, which allows the comparison to focus
1966 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
1967 // For the unsigned case, for some reason it does similar to GCC and does
1968 // a uxtb first. It's not clear to me why that special-casing is needed.
1969 //
1970 // We'll go with the LLVM way for now, since it's shorter and has just as
1971 // few dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001972 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
1973 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001974 Constant *ShiftConst = nullptr;
1975 Variable *Src0R = nullptr;
1976 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001977 if (ShiftAmt) {
1978 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001979 Src0R = makeReg(IceType_i32);
1980 _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
1981 } else {
1982 Src0R = legalizeToVar(Src0);
1983 }
1984 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001985 if (ShiftAmt) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001986 Variable *Src1R = legalizeToVar(Src1);
1987 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
1988 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
1989 _cmp(Src0R, Src1RShifted);
1990 } else {
1991 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1992 _cmp(Src0R, Src1RF);
1993 }
1994 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
1995 _mov(Dest, T);
1996 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001997}
1998
1999void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2000 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002001 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002002}
2003
2004void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2005 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
2006 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07002007 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002008 return;
2009 }
2010 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07002011 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002012 return;
2013 case Intrinsics::AtomicFenceAll:
2014 // NOTE: FenceAll should prevent and load/store from being moved
2015 // across the fence (both atomic and non-atomic). The InstARM32Mfence
2016 // instruction is currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07002017 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002018 return;
2019 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07002020 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002021 return;
2022 }
2023 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07002024 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002025 return;
2026 }
2027 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07002028 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002029 return;
2030 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07002031 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002032 return;
2033 }
2034 case Intrinsics::Bswap: {
Jan Voungf645d852015-07-09 10:35:09 -07002035 Variable *Dest = Instr->getDest();
2036 Operand *Val = Instr->getArg(0);
2037 Type Ty = Val->getType();
2038 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002039 Val = legalizeUndef(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002040 Variable *Val_Lo = legalizeToVar(loOperand(Val));
2041 Variable *Val_Hi = legalizeToVar(hiOperand(Val));
2042 Variable *T_Lo = makeReg(IceType_i32);
2043 Variable *T_Hi = makeReg(IceType_i32);
2044 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2045 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2046 _rev(T_Lo, Val_Lo);
2047 _rev(T_Hi, Val_Hi);
2048 _mov(DestLo, T_Hi);
2049 _mov(DestHi, T_Lo);
2050 } else {
2051 assert(Ty == IceType_i32 || Ty == IceType_i16);
2052 Variable *ValR = legalizeToVar(Val);
2053 Variable *T = makeReg(Ty);
2054 _rev(T, ValR);
2055 if (Val->getType() == IceType_i16) {
2056 Operand *Sixteen =
2057 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2058 _lsr(T, T, Sixteen);
2059 }
2060 _mov(Dest, T);
2061 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002062 return;
2063 }
2064 case Intrinsics::Ctpop: {
Jan Voungf645d852015-07-09 10:35:09 -07002065 Variable *Dest = Instr->getDest();
2066 Operand *Val = Instr->getArg(0);
2067 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2068 ? H_call_ctpop_i32
2069 : H_call_ctpop_i64,
2070 Dest, 1);
2071 Call->addArg(Val);
2072 lowerCall(Call);
2073 // The popcount helpers always return 32-bit values, while the intrinsic's
2074 // signature matches some 64-bit platform's native instructions and
2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
2076 // just in case the user doesn't do that in the IR or doesn't toss the bits
2077 // via truncate.
2078 if (Val->getType() == IceType_i64) {
2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2080 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2081 _mov(DestHi, Zero);
2082 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002083 return;
2084 }
2085 case Intrinsics::Ctlz: {
Jan Voungf645d852015-07-09 10:35:09 -07002086 // The "is zero undef" parameter is ignored and we always return
2087 // a well-defined value.
2088 Operand *Val = Instr->getArg(0);
2089 Variable *ValLoR;
2090 Variable *ValHiR = nullptr;
2091 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002092 Val = legalizeUndef(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002093 ValLoR = legalizeToVar(loOperand(Val));
2094 ValHiR = legalizeToVar(hiOperand(Val));
2095 } else {
2096 ValLoR = legalizeToVar(Val);
2097 }
2098 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002099 return;
2100 }
2101 case Intrinsics::Cttz: {
Jan Voungf645d852015-07-09 10:35:09 -07002102 // Essentially like Clz, but reverse the bits first.
2103 Operand *Val = Instr->getArg(0);
2104 Variable *ValLoR;
2105 Variable *ValHiR = nullptr;
2106 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002107 Val = legalizeUndef(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002108 ValLoR = legalizeToVar(loOperand(Val));
2109 ValHiR = legalizeToVar(hiOperand(Val));
2110 Variable *TLo = makeReg(IceType_i32);
2111 Variable *THi = makeReg(IceType_i32);
2112 _rbit(TLo, ValLoR);
2113 _rbit(THi, ValHiR);
2114 ValLoR = THi;
2115 ValHiR = TLo;
2116 } else {
2117 ValLoR = legalizeToVar(Val);
2118 Variable *T = makeReg(IceType_i32);
2119 _rbit(T, ValLoR);
2120 ValLoR = T;
2121 }
2122 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002123 return;
2124 }
2125 case Intrinsics::Fabs: {
Jan Voungb2d50842015-05-12 09:53:50 -07002126 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002127 return;
2128 }
2129 case Intrinsics::Longjmp: {
2130 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2131 Call->addArg(Instr->getArg(0));
2132 Call->addArg(Instr->getArg(1));
2133 lowerCall(Call);
2134 return;
2135 }
2136 case Intrinsics::Memcpy: {
2137 // In the future, we could potentially emit an inline memcpy/memset, etc.
2138 // for intrinsic calls w/ a known length.
2139 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
2140 Call->addArg(Instr->getArg(0));
2141 Call->addArg(Instr->getArg(1));
2142 Call->addArg(Instr->getArg(2));
2143 lowerCall(Call);
2144 return;
2145 }
2146 case Intrinsics::Memmove: {
2147 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
2148 Call->addArg(Instr->getArg(0));
2149 Call->addArg(Instr->getArg(1));
2150 Call->addArg(Instr->getArg(2));
2151 lowerCall(Call);
2152 return;
2153 }
2154 case Intrinsics::Memset: {
Jan Voungf645d852015-07-09 10:35:09 -07002155 // The value operand needs to be extended to a stack slot size because the
2156 // PNaCl ABI requires arguments to be at least 32 bits wide.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002157 Operand *ValOp = Instr->getArg(1);
2158 assert(ValOp->getType() == IceType_i8);
2159 Variable *ValExt = Func->makeVariable(stackSlotType());
2160 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voungf645d852015-07-09 10:35:09 -07002161 // Technically, ARM has their own __aeabi_memset, but we can use plain
2162 // memset too. The value and size argument need to be flipped if we ever
2163 // decide to use __aeabi_memset.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002164 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
2165 Call->addArg(Instr->getArg(0));
2166 Call->addArg(ValExt);
2167 Call->addArg(Instr->getArg(2));
2168 lowerCall(Call);
2169 return;
2170 }
2171 case Intrinsics::NaClReadTP: {
2172 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07002173 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002174 } else {
2175 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
2176 lowerCall(Call);
2177 }
2178 return;
2179 }
2180 case Intrinsics::Setjmp: {
2181 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
2182 Call->addArg(Instr->getArg(0));
2183 lowerCall(Call);
2184 return;
2185 }
2186 case Intrinsics::Sqrt: {
Jan Voungb2d50842015-05-12 09:53:50 -07002187 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002188 return;
2189 }
2190 case Intrinsics::Stacksave: {
Jan Voungf645d852015-07-09 10:35:09 -07002191 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2192 Variable *Dest = Instr->getDest();
2193 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002194 return;
2195 }
2196 case Intrinsics::Stackrestore: {
Jan Voungf645d852015-07-09 10:35:09 -07002197 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2198 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2199 _mov_nonkillable(SP, Val);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002200 return;
2201 }
2202 case Intrinsics::Trap:
Jan Voungf645d852015-07-09 10:35:09 -07002203 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002204 return;
2205 case Intrinsics::UnknownIntrinsic:
2206 Func->setError("Should not be lowering UnknownIntrinsic");
2207 return;
2208 }
2209 return;
2210}
2211
Jan Voungf645d852015-07-09 10:35:09 -07002212void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
2213 Type Ty = Dest->getType();
2214 assert(Ty == IceType_i32 || Ty == IceType_i64);
2215 Variable *T = makeReg(IceType_i32);
2216 _clz(T, ValLoR);
2217 if (Ty == IceType_i64) {
2218 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2219 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2220 Operand *Zero =
2221 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2222 Operand *ThirtyTwo =
2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2224 _cmp(ValHiR, Zero);
2225 Variable *T2 = makeReg(IceType_i32);
2226 _add(T2, T, ThirtyTwo);
2227 _clz(T2, ValHiR, CondARM32::NE);
2228 // T2 is actually a source as well when the predicate is not AL
2229 // (since it may leave T2 alone). We use set_dest_nonkillable to
2230 // prolong the liveness of T2 as if it was used as a source.
2231 _set_dest_nonkillable();
2232 _mov(DestLo, T2);
2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
2234 return;
2235 }
2236 _mov(Dest, T);
2237 return;
2238}
2239
Jan Voungbefd03a2015-06-02 11:03:03 -07002240void TargetARM32::lowerLoad(const InstLoad *Load) {
2241 // A Load instruction can be treated the same as an Assign
2242 // instruction, after the source operand is transformed into an
2243 // OperandARM32Mem operand.
2244 Type Ty = Load->getDest()->getType();
2245 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2246 Variable *DestLoad = Load->getDest();
2247
2248 // TODO(jvoung): handled folding opportunities. Sign and zero extension
2249 // can be folded into a load.
2250 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2251 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002252}
2253
2254void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07002255 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002256}
2257
2258void TargetARM32::randomlyInsertNop(float Probability) {
2259 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
2260 if (RNG.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07002261 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002262 }
2263}
2264
2265void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
2266 Func->setError("Phi found in regular instruction list");
2267}
2268
2269void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07002270 Variable *Reg = nullptr;
2271 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07002272 Operand *Src0 = Inst->getRetValue();
2273 if (Src0->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002274 Src0 = legalizeUndef(Src0);
Jan Voungb3401d22015-05-18 09:38:21 -07002275 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
2276 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
2277 Reg = R0;
2278 Context.insert(InstFakeUse::create(Func, R1));
2279 } else if (isScalarFloatingType(Src0->getType())) {
2280 UnimplementedError(Func->getContext()->getFlags());
2281 } else if (isVectorType(Src0->getType())) {
2282 UnimplementedError(Func->getContext()->getFlags());
2283 } else {
2284 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002285 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07002286 }
Jan Voungb2d50842015-05-12 09:53:50 -07002287 }
2288 // Add a ret instruction even if sandboxing is enabled, because
2289 // addEpilog explicitly looks for a ret instruction as a marker for
2290 // where to insert the frame removal instructions.
2291 // addEpilog is responsible for restoring the "lr" register as needed
2292 // prior to this ret instruction.
2293 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2294 // Add a fake use of sp to make sure sp stays alive for the entire
2295 // function. Otherwise post-call sp adjustments get dead-code
2296 // eliminated. TODO: Are there more places where the fake use
2297 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2298 // have a ret instruction.
Jan Voungf645d852015-07-09 10:35:09 -07002299 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb2d50842015-05-12 09:53:50 -07002300 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002301}
2302
2303void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Vounge0df91f2015-06-30 08:47:06 -07002304 Variable *Dest = Inst->getDest();
2305 Type DestTy = Dest->getType();
2306 Operand *SrcT = Inst->getTrueOperand();
2307 Operand *SrcF = Inst->getFalseOperand();
2308 Operand *Condition = Inst->getCondition();
2309
2310 if (isVectorType(DestTy)) {
2311 UnimplementedError(Func->getContext()->getFlags());
2312 return;
2313 }
2314 if (isFloatingType(DestTy)) {
2315 UnimplementedError(Func->getContext()->getFlags());
2316 return;
2317 }
2318 // TODO(jvoung): handle folding opportunities.
2319 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
2320 Variable *CmpOpnd0 = legalizeToVar(Condition);
2321 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
2322 _cmp(CmpOpnd0, CmpOpnd1);
2323 CondARM32::Cond Cond = CondARM32::NE;
2324 if (DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002325 SrcT = legalizeUndef(SrcT);
2326 SrcF = legalizeUndef(SrcF);
Jan Vounge0df91f2015-06-30 08:47:06 -07002327 // Set the low portion.
2328 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2329 Variable *TLo = nullptr;
2330 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
2331 _mov(TLo, SrcFLo);
2332 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
2333 _mov_nonkillable(TLo, SrcTLo, Cond);
2334 _mov(DestLo, TLo);
2335 // Set the high portion.
2336 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2337 Variable *THi = nullptr;
2338 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
2339 _mov(THi, SrcFHi);
2340 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
2341 _mov_nonkillable(THi, SrcTHi, Cond);
2342 _mov(DestHi, THi);
2343 return;
2344 }
2345 Variable *T = nullptr;
2346 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
2347 _mov(T, SrcF);
2348 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
2349 _mov_nonkillable(T, SrcT, Cond);
2350 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002351}
2352
2353void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07002354 Operand *Value = Inst->getData();
2355 Operand *Addr = Inst->getAddr();
2356 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
2357 Type Ty = NewAddr->getType();
2358
2359 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002360 Value = legalizeUndef(Value);
Jan Voungbefd03a2015-06-02 11:03:03 -07002361 Variable *ValueHi = legalizeToVar(hiOperand(Value));
2362 Variable *ValueLo = legalizeToVar(loOperand(Value));
2363 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
2364 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
2365 } else if (isVectorType(Ty)) {
2366 UnimplementedError(Func->getContext()->getFlags());
2367 } else {
2368 Variable *ValueR = legalizeToVar(Value);
2369 _str(ValueR, NewAddr);
2370 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002371}
2372
2373void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07002374 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002375}
2376
2377void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scullfdc54db2015-06-29 11:21:18 -07002378 // This implements the most naive possible lowering.
2379 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2380 Operand *Src0 = Inst->getComparison();
2381 SizeT NumCases = Inst->getNumCases();
2382 if (Src0->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002383 Src0 = legalizeUndef(Src0);
Andrew Scullfdc54db2015-06-29 11:21:18 -07002384 Variable *Src0Lo = legalizeToVar(loOperand(Src0));
2385 Variable *Src0Hi = legalizeToVar(hiOperand(Src0));
2386 for (SizeT I = 0; I < NumCases; ++I) {
2387 Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
2388 Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
2389 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
2390 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
2391 _cmp(Src0Lo, ValueLo);
2392 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
2393 _br(Inst->getLabel(I), CondARM32::EQ);
2394 }
2395 _br(Inst->getLabelDefault());
2396 return;
2397 }
Jan Vounge0df91f2015-06-30 08:47:06 -07002398
Andrew Scullfdc54db2015-06-29 11:21:18 -07002399 // 32 bit integer
2400 Variable *Src0Var = legalizeToVar(Src0);
2401 for (SizeT I = 0; I < NumCases; ++I) {
2402 Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
2403 Value = legalize(Value, Legal_Reg | Legal_Flex);
2404 _cmp(Src0Var, Value);
2405 _br(Inst->getLabel(I), CondARM32::EQ);
2406 }
2407 _br(Inst->getLabelDefault());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002408}
2409
2410void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voung6ec369e2015-06-30 11:03:15 -07002411 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002412}
2413
Jan Voungb36ad9b2015-04-21 17:01:49 -07002414void TargetARM32::prelowerPhis() {
Jan Voung53483692015-07-16 10:47:46 -07002415 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002416}
2417
2418// Lower the pre-ordered list of assignments into mov instructions.
2419// Also has to do some ad-hoc register allocation as necessary.
2420void TargetARM32::lowerPhiAssignments(CfgNode *Node,
2421 const AssignList &Assignments) {
2422 (void)Node;
2423 (void)Assignments;
Jan Voungb2d50842015-05-12 09:53:50 -07002424 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002425}
2426
Jan Voungb3401d22015-05-18 09:38:21 -07002427Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
2428 Variable *Reg = makeReg(Ty, RegNum);
2429 UnimplementedError(Func->getContext()->getFlags());
2430 return Reg;
2431}
2432
2433// Helper for legalize() to emit the right code to lower an operand to a
2434// register of the appropriate type.
2435Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2436 Type Ty = Src->getType();
2437 Variable *Reg = makeReg(Ty, RegNum);
2438 if (isVectorType(Ty)) {
2439 UnimplementedError(Func->getContext()->getFlags());
2440 } else {
2441 // Mov's Src operand can really only be the flexible second operand type
2442 // or a register. Users should guarantee that.
2443 _mov(Reg, Src);
2444 }
2445 return Reg;
2446}
2447
2448Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2449 int32_t RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002450 Type Ty = From->getType();
Jan Voungb3401d22015-05-18 09:38:21 -07002451 // Assert that a physical register is allowed. To date, all calls
2452 // to legalize() allow a physical register. Legal_Flex converts
2453 // registers to the right type OperandARM32FlexReg as needed.
2454 assert(Allowed & Legal_Reg);
2455 // Go through the various types of operands:
2456 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
2457 // Given the above assertion, if type of operand is not legal
2458 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
2459 // to a register.
2460 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2461 // Before doing anything with a Mem operand, we need to ensure
2462 // that the Base and Index components are in physical registers.
2463 Variable *Base = Mem->getBase();
2464 Variable *Index = Mem->getIndex();
2465 Variable *RegBase = nullptr;
2466 Variable *RegIndex = nullptr;
2467 if (Base) {
2468 RegBase = legalizeToVar(Base);
2469 }
2470 if (Index) {
2471 RegIndex = legalizeToVar(Index);
2472 }
2473 // Create a new operand if there was a change.
2474 if (Base != RegBase || Index != RegIndex) {
2475 // There is only a reg +/- reg or reg + imm form.
2476 // Figure out which to re-create.
2477 if (Mem->isRegReg()) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002478 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
Jan Voungb3401d22015-05-18 09:38:21 -07002479 Mem->getShiftOp(), Mem->getShiftAmt(),
2480 Mem->getAddrMode());
2481 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07002482 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(),
2483 Mem->getAddrMode());
Jan Voungb3401d22015-05-18 09:38:21 -07002484 }
2485 }
2486 if (!(Allowed & Legal_Mem)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002487 Variable *Reg = makeReg(Ty, RegNum);
2488 _ldr(Reg, Mem);
2489 From = Reg;
2490 } else {
2491 From = Mem;
2492 }
2493 return From;
2494 }
2495
2496 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2497 if (!(Allowed & Legal_Flex)) {
2498 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2499 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2500 From = FlexReg->getReg();
2501 // Fall through and let From be checked as a Variable below,
2502 // where it may or may not need a register.
2503 } else {
2504 return copyToReg(Flex, RegNum);
2505 }
2506 } else {
2507 return copyToReg(Flex, RegNum);
2508 }
2509 } else {
2510 return From;
2511 }
2512 }
2513
2514 if (llvm::isa<Constant>(From)) {
2515 if (llvm::isa<ConstantUndef>(From)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002516 From = legalizeUndef(From, RegNum);
2517 if (isVectorType(Ty))
2518 return From;
Jan Voungb3401d22015-05-18 09:38:21 -07002519 }
2520 // There should be no constants of vector type (other than undef).
Jan Voungfbdd2442015-07-15 12:36:20 -07002521 assert(!isVectorType(Ty));
Jan Voungb3401d22015-05-18 09:38:21 -07002522 bool CanBeFlex = Allowed & Legal_Flex;
Jan Voungfbdd2442015-07-15 12:36:20 -07002523 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002524 uint32_t RotateAmt;
2525 uint32_t Immed_8;
2526 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2527 // Check if the immediate will fit in a Flexible second operand,
2528 // if a Flexible second operand is allowed. We need to know the exact
2529 // value, so that rules out relocatable constants.
2530 // Also try the inverse and use MVN if possible.
2531 if (CanBeFlex &&
2532 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002533 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002534 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2535 ~Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002536 auto InvertedFlex =
2537 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002538 Variable *Reg = makeReg(Ty, RegNum);
2539 _mvn(Reg, InvertedFlex);
2540 return Reg;
2541 } else {
2542 // Do a movw/movt to a register.
Jan Voungb3401d22015-05-18 09:38:21 -07002543 Variable *Reg = makeReg(Ty, RegNum);
2544 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2545 _movw(Reg,
2546 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2547 if (UpperBits != 0) {
2548 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2549 }
2550 return Reg;
2551 }
Jan Voungfbdd2442015-07-15 12:36:20 -07002552 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002553 Variable *Reg = makeReg(Ty, RegNum);
2554 _movw(Reg, C);
2555 _movt(Reg, C);
2556 return Reg;
2557 } else {
2558 // Load floats/doubles from literal pool.
2559 UnimplementedError(Func->getContext()->getFlags());
2560 From = copyToReg(From, RegNum);
2561 }
2562 return From;
2563 }
2564
2565 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2566 // Check if the variable is guaranteed a physical register. This
2567 // can happen either when the variable is pre-colored or when it is
2568 // assigned infinite weight.
2569 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
2570 // We need a new physical register for the operand if:
2571 // Mem is not allowed and Var isn't guaranteed a physical
2572 // register, or
2573 // RegNum is required and Var->getRegNum() doesn't match.
2574 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2575 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2576 From = copyToReg(From, RegNum);
2577 }
2578 return From;
2579 }
2580 llvm_unreachable("Unhandled operand kind in legalize()");
2581
2582 return From;
2583}
2584
Jan Voungfbdd2442015-07-15 12:36:20 -07002585/// Provide a trivial wrapper to legalize() for this common usage.
Jan Voungb3401d22015-05-18 09:38:21 -07002586Variable *TargetARM32::legalizeToVar(Operand *From, int32_t RegNum) {
2587 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2588}
2589
Jan Voungfbdd2442015-07-15 12:36:20 -07002590/// Legalize undef values to concrete values.
2591Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
2592 Type Ty = From->getType();
2593 if (llvm::isa<ConstantUndef>(From)) {
2594 // Lower undefs to zero. Another option is to lower undefs to an
2595 // uninitialized register; however, using an uninitialized register
2596 // results in less predictable code.
2597 //
2598 // If in the future the implementation is changed to lower undef
2599 // values to uninitialized registers, a FakeDef will be needed:
2600 // Context.insert(InstFakeDef::create(Func, Reg));
2601 // This is in order to ensure that the live range of Reg is not
2602 // overestimated. If the constant being lowered is a 64 bit value,
2603 // then the result should be split and the lo and hi components will
2604 // need to go in uninitialized registers.
2605 if (isVectorType(Ty))
2606 return makeVectorOfZeros(Ty, RegNum);
2607 return Ctx->getConstantZero(Ty);
2608 }
2609 return From;
2610}
2611
Jan Voungbefd03a2015-06-02 11:03:03 -07002612OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2613 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2614 // It may be the case that address mode optimization already creates
2615 // an OperandARM32Mem, so in that case it wouldn't need another level
2616 // of transformation.
2617 if (Mem) {
2618 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2619 }
2620 // If we didn't do address mode optimization, then we only
2621 // have a base/offset to work with. ARM always requires a base
2622 // register, so just use that to hold the operand.
2623 Variable *Base = legalizeToVar(Operand);
2624 return OperandARM32Mem::create(
2625 Func, Ty, Base,
2626 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2627}
2628
Jan Voungb3401d22015-05-18 09:38:21 -07002629Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2630 // There aren't any 64-bit integer registers for ARM32.
2631 assert(Type != IceType_i64);
2632 Variable *Reg = Func->makeVariable(Type);
2633 if (RegNum == Variable::NoRegister)
2634 Reg->setWeightInfinite();
2635 else
2636 Reg->setRegNum(RegNum);
2637 return Reg;
2638}
2639
Jan Voung55500db2015-05-26 14:25:40 -07002640void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2641 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002642 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07002643 uint32_t Immed_8;
2644 Operand *Mask;
2645 // Use AND or BIC to mask off the bits, depending on which immediate fits
2646 // (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002647 // works better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07002648 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
2649 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
2650 _bic(Reg, Reg, Mask);
2651 } else {
2652 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
2653 _and(Reg, Reg, Mask);
2654 }
2655}
2656
Jan Voungb36ad9b2015-04-21 17:01:49 -07002657void TargetARM32::postLower() {
2658 if (Ctx->getFlags().getOptLevel() == Opt_m1)
2659 return;
Jan Voungb3401d22015-05-18 09:38:21 -07002660 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002661}
2662
2663void TargetARM32::makeRandomRegisterPermutation(
2664 llvm::SmallVectorImpl<int32_t> &Permutation,
2665 const llvm::SmallBitVector &ExcludeRegisters) const {
2666 (void)Permutation;
2667 (void)ExcludeRegisters;
Jan Voungb2d50842015-05-12 09:53:50 -07002668 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002669}
2670
Jan Voung76bb0be2015-05-14 09:26:19 -07002671void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07002672 if (!BuildDefs::dump())
Jan Voung76bb0be2015-05-14 09:26:19 -07002673 return;
2674 Ostream &Str = Ctx->getStrEmit();
2675 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002676}
2677
Jan Voung76bb0be2015-05-14 09:26:19 -07002678void TargetARM32::emit(const ConstantInteger64 *) const {
2679 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07002680}
Jan Voung76bb0be2015-05-14 09:26:19 -07002681
2682void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002683 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002684 UnimplementedError(Ctx->getFlags());
2685}
2686
2687void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002688 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002689 UnimplementedError(Ctx->getFlags());
2690}
2691
2692void TargetARM32::emit(const ConstantUndef *) const {
2693 llvm::report_fatal_error("undef value encountered by emitter.");
2694}
Jan Voungb36ad9b2015-04-21 17:01:49 -07002695
2696TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
2697 : TargetDataLowering(Ctx) {}
2698
John Porto8b1a7052015-06-17 13:20:08 -07002699void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
2700 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002701 switch (Ctx->getFlags().getOutFileType()) {
2702 case FT_Elf: {
2703 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07002704 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002705 } break;
2706 case FT_Asm:
2707 case FT_Iasm: {
2708 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
2709 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07002710 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002711 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07002712 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002713 }
2714 }
2715 } break;
2716 }
2717}
2718
John Porto0f86d032015-06-15 07:44:27 -07002719void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002720 if (Ctx->getFlags().getDisableTranslation())
2721 return;
Jan Voungb2d50842015-05-12 09:53:50 -07002722 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002723}
2724
Jan Voungfb792842015-06-11 15:27:50 -07002725TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung6ec369e2015-06-30 11:03:15 -07002726 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voungfb792842015-06-11 15:27:50 -07002727
2728void TargetHeaderARM32::lower() {
2729 OstreamLocker L(Ctx);
2730 Ostream &Str = Ctx->getStrEmit();
2731 Str << ".syntax unified\n";
2732 // Emit build attributes in format: .eabi_attribute TAG, VALUE.
2733 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
2734 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
2735 //
2736 // Tag_conformance should be be emitted first in a file-scope
2737 // sub-subsection of the first public subsection of the attributes.
2738 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
2739 // Chromebooks are at least A15, but do A9 for higher compat.
Jan Voung6ec369e2015-06-30 11:03:15 -07002740 // For some reason, the LLVM ARM asm parser has the .cpu directive override
2741 // the mattr specified on the commandline. So to test hwdiv, we need to set
2742 // the .cpu directive higher (can't just rely on --mattr=...).
2743 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2744 Str << ".cpu cortex-a15\n";
2745 } else {
2746 Str << ".cpu cortex-a9\n";
2747 }
2748 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voungfb792842015-06-11 15:27:50 -07002749 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
2750 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
2751 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voungfb792842015-06-11 15:27:50 -07002752 Str << ".fpu neon\n"
2753 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
2754 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
2755 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
2756 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
2757 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
2758 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
2759 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
2760 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
2761 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
2762 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
2763 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung6ec369e2015-06-30 11:03:15 -07002765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
2767 }
Jan Voungfb792842015-06-11 15:27:50 -07002768 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2769 // However, for compatibility with current NaCl LLVM, don't claim that.
2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2771}
2772
Jan Voungb36ad9b2015-04-21 17:01:49 -07002773} // end of namespace Ice