blob: 6639da855c97822e11cd531bab1a4b637af61442 [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringARM32 class, which consists almost
12/// entirely of the lowering sequence for each high-level instruction.
13///
Jan Voungb36ad9b2015-04-21 17:01:49 -070014//===----------------------------------------------------------------------===//
15
John Porto67f8de92015-06-25 10:14:17 -070016#include "IceTargetLoweringARM32.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070017
18#include "IceCfg.h"
19#include "IceCfgNode.h"
20#include "IceClFlags.h"
21#include "IceDefs.h"
22#include "IceELFObjectWriter.h"
23#include "IceGlobalInits.h"
24#include "IceInstARM32.h"
25#include "IceLiveness.h"
26#include "IceOperand.h"
27#include "IceRegistersARM32.h"
28#include "IceTargetLoweringARM32.def"
Jan Voungb36ad9b2015-04-21 17:01:49 -070029#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070030#include "llvm/Support/MathExtras.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070031
32namespace Ice {
33
Jan Voungb2d50842015-05-12 09:53:50 -070034namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070035
Jan Voungb2d50842015-05-12 09:53:50 -070036void UnimplementedError(const ClFlags &Flags) {
37 if (!Flags.getSkipUnimplemented()) {
38 // Use llvm_unreachable instead of report_fatal_error, which gives better
39 // stack traces.
40 llvm_unreachable("Not yet implemented");
41 abort();
42 }
43}
Jan Voungb3401d22015-05-18 09:38:21 -070044
Jan Voung3bfd99a2015-05-22 16:35:25 -070045// The following table summarizes the logic for lowering the icmp instruction
46// for i32 and narrower types. Each icmp condition has a clear mapping to an
47// ARM32 conditional move instruction.
48
49const struct TableIcmp32_ {
50 CondARM32::Cond Mapping;
51} TableIcmp32[] = {
52#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
53 { CondARM32::C_32 } \
54 ,
55 ICMPARM32_TABLE
56#undef X
57};
58const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
59
60// The following table summarizes the logic for lowering the icmp instruction
61// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
62// The operands may need to be swapped, and there is a slight difference
63// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
64const struct TableIcmp64_ {
65 bool IsSigned;
66 bool Swapped;
67 CondARM32::Cond C1, C2;
68} TableIcmp64[] = {
69#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
70 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
71 ,
72 ICMPARM32_TABLE
73#undef X
74};
75const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
76
77CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
78 size_t Index = static_cast<size_t>(Cond);
79 assert(Index < TableIcmp32Size);
80 return TableIcmp32[Index].Mapping;
81}
82
83// In some cases, there are x-macros tables for both high-level and
84// low-level instructions/operands that use the same enum key value.
85// The tables are kept separate to maintain a proper separation
86// between abstraction layers. There is a risk that the tables could
87// get out of sync if enum values are reordered or if entries are
88// added or deleted. The following dummy namespaces use
89// static_asserts to ensure everything is kept in sync.
90
91// Validate the enum values in ICMPARM32_TABLE.
92namespace dummy1 {
93// Define a temporary set of enum values based on low-level table
94// entries.
95enum _tmp_enum {
96#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
97 ICMPARM32_TABLE
98#undef X
99 _num
100};
101// Define a set of constants based on high-level table entries.
102#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
103ICEINSTICMP_TABLE
104#undef X
105// Define a set of constants based on low-level table entries, and
106// ensure the table entry keys are consistent.
107#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
108 static const int _table2_##val = _tmp_##val; \
109 static_assert( \
110 _table1_##val == _table2_##val, \
111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
112ICMPARM32_TABLE
113#undef X
114// Repeat the static asserts with respect to the high-level table
115// entries in case the high-level table has extra entries.
116#define X(tag, str) \
117 static_assert( \
118 _table1_##tag == _table2_##tag, \
119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
120ICEINSTICMP_TABLE
121#undef X
122} // end of namespace dummy1
123
Jan Voung55500db2015-05-26 14:25:40 -0700124// Stack alignment
125const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
126
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700127// Value is in bytes. Return Value adjusted to the next highest multiple
128// of the stack alignment.
129uint32_t applyStackAlignment(uint32_t Value) {
130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
131}
132
Jan Voungb0a8c242015-06-18 15:00:14 -0700133// Value is in bytes. Return Value adjusted to the next highest multiple
134// of the stack alignment required for the given type.
135uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
136 // Use natural alignment, except that normally (non-NaCl) ARM only
137 // aligns vectors to 8 bytes.
138 // TODO(jvoung): Check this ...
139 size_t typeAlignInBytes = typeWidthInBytes(Ty);
140 if (isVectorType(Ty))
141 typeAlignInBytes = 8;
142 return Utils::applyAlignment(Value, typeAlignInBytes);
143}
144
Jan Voung6ec369e2015-06-30 11:03:15 -0700145// Conservatively check if at compile time we know that the operand is
146// definitely a non-zero integer.
147bool isGuaranteedNonzeroInt(const Operand *Op) {
148 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
149 return Const->getValue() != 0;
150 }
151 return false;
152}
153
Jan Voungb2d50842015-05-12 09:53:50 -0700154} // end of anonymous namespace
155
Jan Voung6ec369e2015-06-30 11:03:15 -0700156TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voungd062f732015-06-15 17:17:31 -0700157 static_assert(
158 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
159 (TargetInstructionSet::ARM32InstructionSet_End -
160 TargetInstructionSet::ARM32InstructionSet_Begin),
161 "ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung6ec369e2015-06-30 11:03:15 -0700162 if (Flags.getTargetInstructionSet() !=
Jan Voungd062f732015-06-15 17:17:31 -0700163 TargetInstructionSet::BaseInstructionSet) {
164 InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung6ec369e2015-06-30 11:03:15 -0700165 (Flags.getTargetInstructionSet() -
Jan Voungd062f732015-06-15 17:17:31 -0700166 TargetInstructionSet::ARM32InstructionSet_Begin) +
167 ARM32InstructionSet::Begin);
168 }
Jan Voung6ec369e2015-06-30 11:03:15 -0700169}
170
171TargetARM32::TargetARM32(Cfg *Func)
172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700173 // TODO: Don't initialize IntegerRegisters and friends every time.
174 // Instead, initialize in some sort of static initializer for the
175 // class.
176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
177 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
178 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
179 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
180 ScratchRegs.resize(RegARM32::Reg_NUM);
181#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
182 isFP) \
183 IntegerRegisters[RegARM32::val] = isInt; \
184 FloatRegisters[RegARM32::val] = isFP; \
185 VectorRegisters[RegARM32::val] = isFP; \
186 ScratchRegs[RegARM32::val] = scratch;
187 REGARM32_TABLE;
188#undef X
189 TypeToRegisterSet[IceType_void] = InvalidRegisters;
190 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
191 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
192 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
193 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
194 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
195 TypeToRegisterSet[IceType_f32] = FloatRegisters;
196 TypeToRegisterSet[IceType_f64] = FloatRegisters;
197 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
198 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
199 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
200 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
201 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
202 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
203 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
204}
205
206void TargetARM32::translateO2() {
207 TimerMarker T(TimerStack::TT_O2, Func);
208
209 // TODO(stichnot): share passes with X86?
210 // https://code.google.com/p/nativeclient/issues/detail?id=4094
211
212 if (!Ctx->getFlags().getPhiEdgeSplit()) {
213 // Lower Phi instructions.
214 Func->placePhiLoads();
215 if (Func->hasError())
216 return;
217 Func->placePhiStores();
218 if (Func->hasError())
219 return;
220 Func->deletePhis();
221 if (Func->hasError())
222 return;
223 Func->dump("After Phi lowering");
224 }
225
226 // Address mode optimization.
227 Func->getVMetadata()->init(VMK_SingleDefs);
228 Func->doAddressOpt();
229
230 // Argument lowering
231 Func->doArgLowering();
232
233 // Target lowering. This requires liveness analysis for some parts
234 // of the lowering decisions, such as compare/branch fusing. If
235 // non-lightweight liveness analysis is used, the instructions need
236 // to be renumbered first. TODO: This renumbering should only be
237 // necessary if we're actually calculating live intervals, which we
238 // only do for register allocation.
239 Func->renumberInstructions();
240 if (Func->hasError())
241 return;
242
243 // TODO: It should be sufficient to use the fastest liveness
244 // calculation, i.e. livenessLightweight(). However, for some
245 // reason that slows down the rest of the translation. Investigate.
246 Func->liveness(Liveness_Basic);
247 if (Func->hasError())
248 return;
249 Func->dump("After ARM32 address mode opt");
250
251 Func->genCode();
252 if (Func->hasError())
253 return;
254 Func->dump("After ARM32 codegen");
255
256 // Register allocation. This requires instruction renumbering and
257 // full liveness analysis.
258 Func->renumberInstructions();
259 if (Func->hasError())
260 return;
261 Func->liveness(Liveness_Intervals);
262 if (Func->hasError())
263 return;
264 // Validate the live range computations. The expensive validation
265 // call is deliberately only made when assertions are enabled.
266 assert(Func->validateLiveness());
267 // The post-codegen dump is done here, after liveness analysis and
268 // associated cleanup, to make the dump cleaner and more useful.
269 Func->dump("After initial ARM32 codegen");
270 Func->getVMetadata()->init(VMK_All);
271 regAlloc(RAK_Global);
272 if (Func->hasError())
273 return;
274 Func->dump("After linear scan regalloc");
275
276 if (Ctx->getFlags().getPhiEdgeSplit()) {
277 Func->advancedPhiLowering();
278 Func->dump("After advanced Phi lowering");
279 }
280
281 // Stack frame mapping.
282 Func->genFrame();
283 if (Func->hasError())
284 return;
285 Func->dump("After stack frame mapping");
286
287 Func->contractEmptyNodes();
288 Func->reorderNodes();
289
290 // Branch optimization. This needs to be done just before code
291 // emission. In particular, no transformations that insert or
292 // reorder CfgNodes should be done after branch optimization. We go
293 // ahead and do it before nop insertion to reduce the amount of work
294 // needed for searching for opportunities.
295 Func->doBranchOpt();
296 Func->dump("After branch optimization");
297
298 // Nop insertion
299 if (Ctx->getFlags().shouldDoNopInsertion()) {
300 Func->doNopInsertion();
301 }
302}
303
304void TargetARM32::translateOm1() {
305 TimerMarker T(TimerStack::TT_Om1, Func);
306
307 // TODO: share passes with X86?
308
309 Func->placePhiLoads();
310 if (Func->hasError())
311 return;
312 Func->placePhiStores();
313 if (Func->hasError())
314 return;
315 Func->deletePhis();
316 if (Func->hasError())
317 return;
318 Func->dump("After Phi lowering");
319
320 Func->doArgLowering();
321
322 Func->genCode();
323 if (Func->hasError())
324 return;
325 Func->dump("After initial ARM32 codegen");
326
327 regAlloc(RAK_InfOnly);
328 if (Func->hasError())
329 return;
330 Func->dump("After regalloc of infinite-weight variables");
331
332 Func->genFrame();
333 if (Func->hasError())
334 return;
335 Func->dump("After stack frame mapping");
336
337 // Nop insertion
338 if (Ctx->getFlags().shouldDoNopInsertion()) {
339 Func->doNopInsertion();
340 }
341}
342
343bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700344 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
345 return Br->optimizeBranch(NextNode);
346 }
Jan Voungb2d50842015-05-12 09:53:50 -0700347 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700348}
349
350IceString TargetARM32::RegNames[] = {
351#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
352 isFP) \
353 name,
354 REGARM32_TABLE
355#undef X
356};
357
358IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
359 assert(RegNum < RegARM32::Reg_NUM);
360 (void)Ty;
361 return RegNames[RegNum];
362}
363
364Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
365 if (Ty == IceType_void)
366 Ty = IceType_i32;
367 if (PhysicalRegisters[Ty].empty())
368 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
369 assert(RegNum < PhysicalRegisters[Ty].size());
370 Variable *Reg = PhysicalRegisters[Ty][RegNum];
371 if (Reg == nullptr) {
372 Reg = Func->makeVariable(Ty);
373 Reg->setRegNum(RegNum);
374 PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voungb2d50842015-05-12 09:53:50 -0700375 // Specially mark SP and LR as an "argument" so that it is considered
Jan Voungb36ad9b2015-04-21 17:01:49 -0700376 // live upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700377 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700378 Func->addImplicitArg(Reg);
379 Reg->setIgnoreLiveness();
380 }
381 }
382 return Reg;
383}
384
385void TargetARM32::emitVariable(const Variable *Var) const {
386 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700387 if (Var->hasReg()) {
388 Str << getRegName(Var->getRegNum(), Var->getType());
389 return;
390 }
391 if (Var->getWeight().isInf()) {
392 llvm::report_fatal_error(
393 "Infinite-weight Variable has no register assigned");
394 }
395 int32_t Offset = Var->getStackOffset();
396 if (!hasFramePointer())
397 Offset += getStackAdjustment();
398 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register
399 // to materialize a larger offset.
Jan Voungb0a8c242015-06-18 15:00:14 -0700400 constexpr bool SignExt = false;
Jan Voungb2d50842015-05-12 09:53:50 -0700401 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
402 llvm::report_fatal_error("Illegal stack offset");
403 }
404 const Type FrameSPTy = IceType_i32;
Jan Voungb3401d22015-05-18 09:38:21 -0700405 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
406 if (Offset != 0) {
407 Str << ", " << getConstantPrefix() << Offset;
408 }
409 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700410}
411
Jan Voungb0a8c242015-06-18 15:00:14 -0700412bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
413 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
414 return false;
415 int32_t RegLo, RegHi;
416 // Always start i64 registers at an even register, so this may end
417 // up padding away a register.
418 if (NumGPRRegsUsed % 2 != 0) {
419 ++NumGPRRegsUsed;
420 }
421 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
422 ++NumGPRRegsUsed;
423 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
424 ++NumGPRRegsUsed;
425 // If this bumps us past the boundary, don't allocate to a register
426 // and leave any previously speculatively consumed registers as consumed.
427 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
428 return false;
429 Regs->first = RegLo;
430 Regs->second = RegHi;
431 return true;
432}
433
434bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
435 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
436 return false;
437 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
438 ++NumGPRRegsUsed;
439 return true;
440}
441
Jan Voungb36ad9b2015-04-21 17:01:49 -0700442void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700443 VarList &Args = Func->getArgs();
Jan Voungb0a8c242015-06-18 15:00:14 -0700444 TargetARM32::CallingConv CC;
Jan Voungb3401d22015-05-18 09:38:21 -0700445
446 // For each register argument, replace Arg in the argument list with the
447 // home register. Then generate an instruction in the prolog to copy the
448 // home register to the assigned location of Arg.
449 Context.init(Func->getEntryNode());
450 Context.setInsertPoint(Context.getCur());
451
452 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
453 Variable *Arg = Args[I];
454 Type Ty = Arg->getType();
455 // TODO(jvoung): handle float/vector types.
456 if (isVectorType(Ty)) {
457 UnimplementedError(Func->getContext()->getFlags());
458 continue;
459 } else if (isFloatingType(Ty)) {
460 UnimplementedError(Func->getContext()->getFlags());
461 continue;
462 } else if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -0700463 std::pair<int32_t, int32_t> RegPair;
464 if (!CC.I64InRegs(&RegPair))
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700465 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700466 Variable *RegisterArg = Func->makeVariable(Ty);
467 Variable *RegisterLo = Func->makeVariable(IceType_i32);
468 Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700469 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700470 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
471 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
472 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
473 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700474 RegisterLo->setRegNum(RegPair.first);
Jan Voungb3401d22015-05-18 09:38:21 -0700475 RegisterLo->setIsArg();
Jan Voungb0a8c242015-06-18 15:00:14 -0700476 RegisterHi->setRegNum(RegPair.second);
Jan Voungb3401d22015-05-18 09:38:21 -0700477 RegisterHi->setIsArg();
478 RegisterArg->setLoHi(RegisterLo, RegisterHi);
479 RegisterArg->setIsArg();
480 Arg->setIsArg(false);
481
482 Args[I] = RegisterArg;
483 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
484 continue;
485 } else {
486 assert(Ty == IceType_i32);
Jan Voungb0a8c242015-06-18 15:00:14 -0700487 int32_t RegNum;
488 if (!CC.I32InReg(&RegNum))
Jan Voungb3401d22015-05-18 09:38:21 -0700489 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700490 Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700491 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700492 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
493 }
494 RegisterArg->setRegNum(RegNum);
495 RegisterArg->setIsArg();
496 Arg->setIsArg(false);
497
498 Args[I] = RegisterArg;
499 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
500 }
501 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700502}
503
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700504// Helper function for addProlog().
505//
506// This assumes Arg is an argument passed on the stack. This sets the
507// frame offset for Arg and updates InArgsSizeBytes according to Arg's
508// width. For an I64 arg that has been split into Lo and Hi components,
509// it calls itself recursively on the components, taking care to handle
510// Lo first because of the little-endian architecture. Lastly, this
511// function generates an instruction to copy Arg into its assigned
512// register if applicable.
513void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
514 size_t BasicFrameOffset,
515 size_t &InArgsSizeBytes) {
516 Variable *Lo = Arg->getLo();
517 Variable *Hi = Arg->getHi();
518 Type Ty = Arg->getType();
519 if (Lo && Hi && Ty == IceType_i64) {
520 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
521 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
522 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
523 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
524 return;
525 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700526 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700527 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
528 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
529 // If the argument variable has been assigned a register, we need to load
530 // the value from the stack slot.
531 if (Arg->hasReg()) {
532 assert(Ty != IceType_i64);
533 OperandARM32Mem *Mem = OperandARM32Mem::create(
534 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
535 Ctx->getConstantInt32(Arg->getStackOffset())));
536 if (isVectorType(Arg->getType())) {
537 UnimplementedError(Func->getContext()->getFlags());
538 } else {
539 _ldr(Arg, Mem);
540 }
541 // This argument-copying instruction uses an explicit
542 // OperandARM32Mem operand instead of a Variable, so its
543 // fill-from-stack operation has to be tracked separately for
544 // statistics.
545 Ctx->statsUpdateFills();
546 }
547}
548
Jan Voungb36ad9b2015-04-21 17:01:49 -0700549Type TargetARM32::stackSlotType() { return IceType_i32; }
550
551void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700552 // Stack frame layout:
553 //
554 // +------------------------+
555 // | 1. preserved registers |
556 // +------------------------+
557 // | 2. padding |
558 // +------------------------+
559 // | 3. global spill area |
560 // +------------------------+
561 // | 4. padding |
562 // +------------------------+
563 // | 5. local spill area |
564 // +------------------------+
565 // | 6. padding |
566 // +------------------------+
567 // | 7. allocas |
568 // +------------------------+
569 //
570 // The following variables record the size in bytes of the given areas:
571 // * PreservedRegsSizeBytes: area 1
572 // * SpillAreaPaddingBytes: area 2
573 // * GlobalsSize: area 3
574 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
575 // * LocalsSpillAreaSize: area 5
576 // * SpillAreaSizeBytes: areas 2 - 6
577 // Determine stack frame offsets for each Variable without a
578 // register assignment. This can be done as one variable per stack
579 // slot. Or, do coalescing by running the register allocator again
580 // with an infinite set of registers (as a side effect, this gives
581 // variables a second chance at physical register assignment).
582 //
583 // A middle ground approach is to leverage sparsity and allocate one
584 // block of space on the frame for globals (variables with
585 // multi-block lifetime), and one block to share for locals
586 // (single-block lifetime).
587
588 Context.init(Node);
589 Context.setInsertPoint(Context.getCur());
590
591 llvm::SmallBitVector CalleeSaves =
592 getRegisterSet(RegSet_CalleeSave, RegSet_None);
593 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
594 VarList SortedSpilledVariables;
595 size_t GlobalsSize = 0;
596 // If there is a separate locals area, this represents that area.
597 // Otherwise it counts any variable not counted by GlobalsSize.
598 SpillAreaSizeBytes = 0;
599 // If there is a separate locals area, this specifies the alignment
600 // for it.
601 uint32_t LocalsSlotsAlignmentBytes = 0;
602 // The entire spill locations area gets aligned to largest natural
603 // alignment of the variables that have a spill slot.
604 uint32_t SpillAreaAlignmentBytes = 0;
605 // For now, we don't have target-specific variables that need special
606 // treatment (no stack-slot-linked SpillVariable type).
607 std::function<bool(Variable *)> TargetVarHook =
608 [](Variable *) { return false; };
609
610 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
611 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
612 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
613 &LocalsSlotsAlignmentBytes, TargetVarHook);
614 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
615 SpillAreaSizeBytes += GlobalsSize;
616
617 // Add push instructions for preserved registers.
618 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
619 // Unlike x86, ARM also has callee-saved float/vector registers.
620 // The "vpush" instruction can handle a whole list of float/vector
621 // registers, but it only handles contiguous sequences of registers
622 // by specifying the start and the length.
623 VarList GPRsToPreserve;
624 GPRsToPreserve.reserve(CalleeSaves.size());
625 uint32_t NumCallee = 0;
626 size_t PreservedRegsSizeBytes = 0;
627 // Consider FP and LR as callee-save / used as needed.
628 if (UsesFramePointer) {
629 CalleeSaves[RegARM32::Reg_fp] = true;
630 assert(RegsUsed[RegARM32::Reg_fp] == false);
631 RegsUsed[RegARM32::Reg_fp] = true;
632 }
633 if (!MaybeLeafFunc) {
634 CalleeSaves[RegARM32::Reg_lr] = true;
635 RegsUsed[RegARM32::Reg_lr] = true;
636 }
637 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
638 if (CalleeSaves[i] && RegsUsed[i]) {
639 // TODO(jvoung): do separate vpush for each floating point
640 // register segment and += 4, or 8 depending on type.
641 ++NumCallee;
642 PreservedRegsSizeBytes += 4;
643 GPRsToPreserve.push_back(getPhysicalRegister(i));
644 }
645 }
646 Ctx->statsUpdateRegistersSaved(NumCallee);
647 if (!GPRsToPreserve.empty())
648 _push(GPRsToPreserve);
649
650 // Generate "mov FP, SP" if needed.
651 if (UsesFramePointer) {
652 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
653 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
654 _mov(FP, SP);
655 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
656 Context.insert(InstFakeUse::create(Func, FP));
657 }
658
659 // Align the variables area. SpillAreaPaddingBytes is the size of
660 // the region after the preserved registers and before the spill areas.
661 // LocalsSlotsPaddingBytes is the amount of padding between the globals
662 // and locals area if they are separate.
663 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
664 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
665 uint32_t SpillAreaPaddingBytes = 0;
666 uint32_t LocalsSlotsPaddingBytes = 0;
667 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
668 GlobalsSize, LocalsSlotsAlignmentBytes,
669 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
670 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
671 uint32_t GlobalsAndSubsequentPaddingSize =
672 GlobalsSize + LocalsSlotsPaddingBytes;
673
674 // Align SP if necessary.
675 if (NeedsStackAlignment) {
676 uint32_t StackOffset = PreservedRegsSizeBytes;
677 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
678 SpillAreaSizeBytes = StackSize - StackOffset;
679 }
680
681 // Generate "sub sp, SpillAreaSizeBytes"
682 if (SpillAreaSizeBytes) {
683 // Use the IP inter-procedural scratch register if needed to legalize
684 // the immediate.
685 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
686 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
687 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
688 _sub(SP, SP, SubAmount);
689 }
690 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
691
692 resetStackAdjustment();
693
694 // Fill in stack offsets for stack args, and copy args into registers
695 // for those that were register-allocated. Args are pushed right to
696 // left, so Arg[0] is closest to the stack/frame pointer.
697 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
698 size_t BasicFrameOffset = PreservedRegsSizeBytes;
699 if (!UsesFramePointer)
700 BasicFrameOffset += SpillAreaSizeBytes;
701
702 const VarList &Args = Func->getArgs();
703 size_t InArgsSizeBytes = 0;
Jan Voungb0a8c242015-06-18 15:00:14 -0700704 TargetARM32::CallingConv CC;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700705 for (Variable *Arg : Args) {
706 Type Ty = Arg->getType();
Jan Voungb0a8c242015-06-18 15:00:14 -0700707 bool InRegs = false;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700708 // Skip arguments passed in registers.
709 if (isVectorType(Ty)) {
710 UnimplementedError(Func->getContext()->getFlags());
711 continue;
712 } else if (isFloatingType(Ty)) {
713 UnimplementedError(Func->getContext()->getFlags());
714 continue;
Jan Voungb0a8c242015-06-18 15:00:14 -0700715 } else if (Ty == IceType_i64) {
716 std::pair<int32_t, int32_t> DummyRegs;
717 InRegs = CC.I64InRegs(&DummyRegs);
718 } else {
719 assert(Ty == IceType_i32);
720 int32_t DummyReg;
721 InRegs = CC.I32InReg(&DummyReg);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700722 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700723 if (!InRegs)
724 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700725 }
726
727 // Fill in stack offsets for locals.
728 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
729 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
730 UsesFramePointer);
731 this->HasComputedFrame = true;
732
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700733 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700734 OstreamLocker L(Func->getContext());
735 Ostream &Str = Func->getContext()->getStrDump();
736
737 Str << "Stack layout:\n";
738 uint32_t SPAdjustmentPaddingSize =
739 SpillAreaSizeBytes - LocalsSpillAreaSize -
740 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
741 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
742 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
743 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
744 << " globals spill area = " << GlobalsSize << " bytes\n"
745 << " globals-locals spill areas intermediate padding = "
746 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
747 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
748 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
749
750 Str << "Stack details:\n"
751 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
752 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
753 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
754 << " bytes\n"
755 << " is FP based = " << UsesFramePointer << "\n";
756 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700757}
758
759void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700760 InstList &Insts = Node->getInsts();
761 InstList::reverse_iterator RI, E;
762 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
763 if (llvm::isa<InstARM32Ret>(*RI))
764 break;
765 }
766 if (RI == E)
767 return;
768
769 // Convert the reverse_iterator position into its corresponding
770 // (forward) iterator position.
771 InstList::iterator InsertPoint = RI.base();
772 --InsertPoint;
773 Context.init(Node);
774 Context.setInsertPoint(InsertPoint);
775
776 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
777 if (UsesFramePointer) {
778 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
779 // For late-stage liveness analysis (e.g. asm-verbose mode),
780 // adding a fake use of SP before the assignment of SP=FP keeps
781 // previous SP adjustments from being dead-code eliminated.
782 Context.insert(InstFakeUse::create(Func, SP));
783 _mov(SP, FP);
784 } else {
785 // add SP, SpillAreaSizeBytes
786 if (SpillAreaSizeBytes) {
787 // Use the IP inter-procedural scratch register if needed to legalize
788 // the immediate. It shouldn't be live at this point.
789 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
790 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
791 _add(SP, SP, AddAmount);
792 }
793 }
794
795 // Add pop instructions for preserved registers.
796 llvm::SmallBitVector CalleeSaves =
797 getRegisterSet(RegSet_CalleeSave, RegSet_None);
798 VarList GPRsToRestore;
799 GPRsToRestore.reserve(CalleeSaves.size());
800 // Consider FP and LR as callee-save / used as needed.
801 if (UsesFramePointer) {
802 CalleeSaves[RegARM32::Reg_fp] = true;
803 }
804 if (!MaybeLeafFunc) {
805 CalleeSaves[RegARM32::Reg_lr] = true;
806 }
807 // Pop registers in ascending order just like push
808 // (instead of in reverse order).
809 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
810 if (CalleeSaves[i] && RegsUsed[i]) {
811 GPRsToRestore.push_back(getPhysicalRegister(i));
812 }
813 }
814 if (!GPRsToRestore.empty())
815 _pop(GPRsToRestore);
816
817 if (!Ctx->getFlags().getUseSandboxing())
818 return;
819
820 // Change the original ret instruction into a sandboxed return sequence.
821 // bundle_lock
822 // bic lr, #0xc000000f
823 // bx lr
824 // bundle_unlock
825 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
826 // restrict to the lower 1GB as well.
827 Operand *RetMask =
828 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
829 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
830 Variable *RetValue = nullptr;
831 if (RI->getSrcSize())
832 RetValue = llvm::cast<Variable>(RI->getSrc(0));
833 _bundle_lock();
834 _bic(LR, LR, RetMask);
835 _ret(LR, RetValue);
836 _bundle_unlock();
837 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700838}
839
Jan Voungb3401d22015-05-18 09:38:21 -0700840void TargetARM32::split64(Variable *Var) {
841 assert(Var->getType() == IceType_i64);
842 Variable *Lo = Var->getLo();
843 Variable *Hi = Var->getHi();
844 if (Lo) {
845 assert(Hi);
846 return;
847 }
848 assert(Hi == nullptr);
849 Lo = Func->makeVariable(IceType_i32);
850 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700851 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700852 Lo->setName(Func, Var->getName(Func) + "__lo");
853 Hi->setName(Func, Var->getName(Func) + "__hi");
854 }
855 Var->setLoHi(Lo, Hi);
856 if (Var->getIsArg()) {
857 Lo->setIsArg();
858 Hi->setIsArg();
859 }
860}
861
862Operand *TargetARM32::loOperand(Operand *Operand) {
863 assert(Operand->getType() == IceType_i64);
864 if (Operand->getType() != IceType_i64)
865 return Operand;
866 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
867 split64(Var);
868 return Var->getLo();
869 }
870 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
871 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
872 }
873 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
874 // Conservatively disallow memory operands with side-effects (pre/post
875 // increment) in case of duplication.
876 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
877 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
878 if (Mem->isRegReg()) {
879 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
880 Mem->getIndex(), Mem->getShiftOp(),
881 Mem->getShiftAmt(), Mem->getAddrMode());
882 } else {
883 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
884 Mem->getOffset(), Mem->getAddrMode());
885 }
886 }
887 llvm_unreachable("Unsupported operand type");
888 return nullptr;
889}
890
891Operand *TargetARM32::hiOperand(Operand *Operand) {
892 assert(Operand->getType() == IceType_i64);
893 if (Operand->getType() != IceType_i64)
894 return Operand;
895 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
896 split64(Var);
897 return Var->getHi();
898 }
899 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
900 return Ctx->getConstantInt32(
901 static_cast<uint32_t>(Const->getValue() >> 32));
902 }
903 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
904 // Conservatively disallow memory operands with side-effects
905 // in case of duplication.
906 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
907 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
908 const Type SplitType = IceType_i32;
909 if (Mem->isRegReg()) {
910 // We have to make a temp variable T, and add 4 to either Base or Index.
911 // The Index may be shifted, so adding 4 can mean something else.
912 // Thus, prefer T := Base + 4, and use T as the new Base.
913 Variable *Base = Mem->getBase();
914 Constant *Four = Ctx->getConstantInt32(4);
915 Variable *NewBase = Func->makeVariable(Base->getType());
916 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
917 Base, Four));
918 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
919 Mem->getShiftOp(), Mem->getShiftAmt(),
920 Mem->getAddrMode());
921 } else {
922 Variable *Base = Mem->getBase();
923 ConstantInteger32 *Offset = Mem->getOffset();
924 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
925 int32_t NextOffsetVal = Offset->getValue() + 4;
926 const bool SignExt = false;
927 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
928 // We have to make a temp variable and add 4 to either Base or Offset.
929 // If we add 4 to Offset, this will convert a non-RegReg addressing
930 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
931 // RegReg addressing modes, prefer adding to base and replacing instead.
932 // Thus we leave the old offset alone.
933 Constant *Four = Ctx->getConstantInt32(4);
934 Variable *NewBase = Func->makeVariable(Base->getType());
935 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
936 NewBase, Base, Four));
937 Base = NewBase;
938 } else {
939 Offset =
940 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
941 }
942 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
943 Mem->getAddrMode());
944 }
945 }
946 llvm_unreachable("Unsupported operand type");
947 return nullptr;
948}
949
Jan Voungb36ad9b2015-04-21 17:01:49 -0700950llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
951 RegSetMask Exclude) const {
952 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
953
954#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
955 isFP) \
956 if (scratch && (Include & RegSet_CallerSave)) \
957 Registers[RegARM32::val] = true; \
958 if (preserved && (Include & RegSet_CalleeSave)) \
959 Registers[RegARM32::val] = true; \
960 if (stackptr && (Include & RegSet_StackPointer)) \
961 Registers[RegARM32::val] = true; \
962 if (frameptr && (Include & RegSet_FramePointer)) \
963 Registers[RegARM32::val] = true; \
964 if (scratch && (Exclude & RegSet_CallerSave)) \
965 Registers[RegARM32::val] = false; \
966 if (preserved && (Exclude & RegSet_CalleeSave)) \
967 Registers[RegARM32::val] = false; \
968 if (stackptr && (Exclude & RegSet_StackPointer)) \
969 Registers[RegARM32::val] = false; \
970 if (frameptr && (Exclude & RegSet_FramePointer)) \
971 Registers[RegARM32::val] = false;
972
973 REGARM32_TABLE
974
975#undef X
976
977 return Registers;
978}
979
980void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
981 UsesFramePointer = true;
982 // Conservatively require the stack to be aligned. Some stack
983 // adjustment operations implemented below assume that the stack is
984 // aligned before the alloca. All the alloca code ensures that the
985 // stack alignment is preserved after the alloca. The stack alignment
986 // restriction can be relaxed in some cases.
987 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -0700988
989 // TODO(stichnot): minimize the number of adjustments of SP, etc.
990 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
991 Variable *Dest = Inst->getDest();
992 uint32_t AlignmentParam = Inst->getAlignInBytes();
993 // For default align=0, set it to the real value 1, to avoid any
994 // bit-manipulation problems below.
995 AlignmentParam = std::max(AlignmentParam, 1u);
996
997 // LLVM enforces power of 2 alignment.
998 assert(llvm::isPowerOf2_32(AlignmentParam));
999 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1000
1001 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1002 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1003 alignRegisterPow2(SP, Alignment);
1004 }
1005 Operand *TotalSize = Inst->getSizeInBytes();
1006 if (const auto *ConstantTotalSize =
1007 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1008 uint32_t Value = ConstantTotalSize->getValue();
1009 Value = Utils::applyAlignment(Value, Alignment);
1010 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1011 _sub(SP, SP, SubAmount);
1012 } else {
1013 // Non-constant sizes need to be adjusted to the next highest
1014 // multiple of the required alignment at runtime.
1015 TotalSize = legalize(TotalSize);
1016 Variable *T = makeReg(IceType_i32);
1017 _mov(T, TotalSize);
1018 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1019 _add(T, T, AddAmount);
1020 alignRegisterPow2(T, Alignment);
1021 _sub(SP, SP, T);
1022 }
1023 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001024}
1025
Jan Voung6ec369e2015-06-30 11:03:15 -07001026void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1027 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1028 return;
1029 Variable *SrcLoReg = legalizeToVar(SrcLo);
1030 switch (Ty) {
1031 default:
1032 llvm_unreachable("Unexpected type");
1033 case IceType_i8: {
1034 Operand *Mask =
1035 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1036 _tst(SrcLoReg, Mask);
1037 break;
1038 }
1039 case IceType_i16: {
1040 Operand *Mask =
1041 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
1042 _tst(SrcLoReg, Mask);
1043 break;
1044 }
1045 case IceType_i32: {
1046 _tst(SrcLoReg, SrcLoReg);
1047 break;
1048 }
1049 case IceType_i64: {
1050 Variable *ScratchReg = makeReg(IceType_i32);
1051 _orrs(ScratchReg, SrcLoReg, SrcHi);
1052 // ScratchReg isn't going to be used, but we need the
1053 // side-effect of setting flags from this operation.
1054 Context.insert(InstFakeUse::create(Func, ScratchReg));
1055 }
1056 }
1057 InstARM32Label *Label = InstARM32Label::create(Func, this);
1058 _br(Label, CondARM32::NE);
1059 _trap();
1060 Context.insert(Label);
1061}
1062
1063void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1064 Operand *Src1, ExtInstr ExtFunc,
1065 DivInstr DivFunc, const char *DivHelperName,
1066 bool IsRemainder) {
1067 div0Check(Dest->getType(), Src1, nullptr);
1068 Variable *Src1R = legalizeToVar(Src1);
1069 Variable *T0R = Src0R;
1070 Variable *T1R = Src1R;
1071 if (Dest->getType() != IceType_i32) {
1072 T0R = makeReg(IceType_i32);
1073 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
1074 T1R = makeReg(IceType_i32);
1075 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
1076 }
1077 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
1078 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
1079 if (IsRemainder) {
1080 Variable *T2 = makeReg(IceType_i32);
1081 _mls(T2, T, T1R, T0R);
1082 T = T2;
1083 }
1084 _mov(Dest, T);
1085 } else {
1086 constexpr SizeT MaxSrcs = 2;
1087 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1088 Call->addArg(T0R);
1089 Call->addArg(T1R);
1090 lowerCall(Call);
1091 }
1092 return;
1093}
1094
Jan Voungb36ad9b2015-04-21 17:01:49 -07001095void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001096 Variable *Dest = Inst->getDest();
1097 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
1098 // to legalize Src0 to flex or Src1 to flex and there is a reversible
1099 // instruction. E.g., reverse subtract with immediate, register vs
1100 // register, immediate.
1101 // Or it may be the case that the operands aren't swapped, but the
1102 // bits can be flipped and a different operation applied.
1103 // E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung29719972015-05-19 11:24:51 -07001104 Operand *Src0 = Inst->getSrc(0);
1105 Operand *Src1 = Inst->getSrc(1);
Jan Voungb3401d22015-05-18 09:38:21 -07001106 if (Dest->getType() == IceType_i64) {
Jan Voung70fa5252015-07-06 14:01:25 -07001107 // These helper-call-involved instructions are lowered in this
1108 // separate switch. This is because we would otherwise assume that
1109 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
1110 // with helper calls, and such unused/redundant instructions will fail
1111 // liveness analysis under -Om1 setting.
1112 switch (Inst->getOp()) {
1113 default:
1114 break;
1115 case InstArithmetic::Udiv:
1116 case InstArithmetic::Sdiv:
1117 case InstArithmetic::Urem:
1118 case InstArithmetic::Srem: {
1119 // Check for divide by 0 (ARM normally doesn't trap, but we want it
1120 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
1121 // to a register, which will hide a constant source operand.
1122 // Instead, check the not-yet-legalized Src1 to optimize-out a divide
1123 // by 0 check.
1124 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1125 if (C64->getValue() == 0) {
1126 _trap();
1127 return;
1128 }
1129 } else {
1130 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1131 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1132 div0Check(IceType_i64, Src1Lo, Src1Hi);
1133 }
1134 // Technically, ARM has their own aeabi routines, but we can use the
1135 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
1136 // but uses the more standard __moddi3 for rem.
1137 const char *HelperName = "";
1138 switch (Inst->getOp()) {
1139 default:
1140 llvm_unreachable("Should have only matched div ops.");
1141 break;
1142 case InstArithmetic::Udiv:
1143 HelperName = H_udiv_i64;
1144 break;
1145 case InstArithmetic::Sdiv:
1146 HelperName = H_sdiv_i64;
1147 break;
1148 case InstArithmetic::Urem:
1149 HelperName = H_urem_i64;
1150 break;
1151 case InstArithmetic::Srem:
1152 HelperName = H_srem_i64;
1153 break;
1154 }
1155 constexpr SizeT MaxSrcs = 2;
1156 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1157 Call->addArg(Src0);
1158 Call->addArg(Src1);
1159 lowerCall(Call);
1160 return;
1161 }
1162 }
Jan Voung29719972015-05-19 11:24:51 -07001163 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1164 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1165 Variable *Src0RLo = legalizeToVar(loOperand(Src0));
1166 Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
Jan Voung70fa5252015-07-06 14:01:25 -07001167 Operand *Src1Lo = loOperand(Src1);
1168 Operand *Src1Hi = hiOperand(Src1);
Jan Voung29719972015-05-19 11:24:51 -07001169 Variable *T_Lo = makeReg(DestLo->getType());
1170 Variable *T_Hi = makeReg(DestHi->getType());
1171 switch (Inst->getOp()) {
1172 case InstArithmetic::_num:
1173 llvm_unreachable("Unknown arithmetic operator");
Jan Voung70fa5252015-07-06 14:01:25 -07001174 return;
Jan Voung29719972015-05-19 11:24:51 -07001175 case InstArithmetic::Add:
Jan Voung70fa5252015-07-06 14:01:25 -07001176 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1177 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001178 _adds(T_Lo, Src0RLo, Src1Lo);
1179 _mov(DestLo, T_Lo);
1180 _adc(T_Hi, Src0RHi, Src1Hi);
1181 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001182 return;
Jan Voung29719972015-05-19 11:24:51 -07001183 case InstArithmetic::And:
Jan Voung70fa5252015-07-06 14:01:25 -07001184 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1185 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001186 _and(T_Lo, Src0RLo, Src1Lo);
1187 _mov(DestLo, T_Lo);
1188 _and(T_Hi, Src0RHi, Src1Hi);
1189 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001190 return;
Jan Voung29719972015-05-19 11:24:51 -07001191 case InstArithmetic::Or:
Jan Voung70fa5252015-07-06 14:01:25 -07001192 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1193 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001194 _orr(T_Lo, Src0RLo, Src1Lo);
1195 _mov(DestLo, T_Lo);
1196 _orr(T_Hi, Src0RHi, Src1Hi);
1197 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001198 return;
Jan Voung29719972015-05-19 11:24:51 -07001199 case InstArithmetic::Xor:
Jan Voung70fa5252015-07-06 14:01:25 -07001200 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1201 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001202 _eor(T_Lo, Src0RLo, Src1Lo);
1203 _mov(DestLo, T_Lo);
1204 _eor(T_Hi, Src0RHi, Src1Hi);
1205 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001206 return;
Jan Voung29719972015-05-19 11:24:51 -07001207 case InstArithmetic::Sub:
Jan Voung70fa5252015-07-06 14:01:25 -07001208 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1209 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001210 _subs(T_Lo, Src0RLo, Src1Lo);
1211 _mov(DestLo, T_Lo);
1212 _sbc(T_Hi, Src0RHi, Src1Hi);
1213 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001214 return;
Jan Voung29719972015-05-19 11:24:51 -07001215 case InstArithmetic::Mul: {
1216 // GCC 4.8 does:
1217 // a=b*c ==>
1218 // t_acc =(mul) (b.lo * c.hi)
1219 // t_acc =(mla) (c.lo * b.hi) + t_acc
1220 // t.hi,t.lo =(umull) b.lo * c.lo
1221 // t.hi += t_acc
1222 // a.lo = t.lo
1223 // a.hi = t.hi
1224 //
1225 // LLVM does:
1226 // t.hi,t.lo =(umull) b.lo * c.lo
1227 // t.hi =(mla) (b.lo * c.hi) + t.hi
1228 // t.hi =(mla) (b.hi * c.lo) + t.hi
1229 // a.lo = t.lo
1230 // a.hi = t.hi
1231 //
1232 // LLVM's lowering has fewer instructions, but more register pressure:
1233 // t.lo is live from beginning to end, while GCC delays the two-dest
1234 // instruction till the end, and kills c.hi immediately.
1235 Variable *T_Acc = makeReg(IceType_i32);
1236 Variable *T_Acc1 = makeReg(IceType_i32);
1237 Variable *T_Hi1 = makeReg(IceType_i32);
1238 Variable *Src1RLo = legalizeToVar(Src1Lo);
1239 Variable *Src1RHi = legalizeToVar(Src1Hi);
1240 _mul(T_Acc, Src0RLo, Src1RHi);
1241 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1242 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1243 _add(T_Hi, T_Hi1, T_Acc1);
1244 _mov(DestLo, T_Lo);
1245 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001246 return;
1247 }
Jan Voung66c3d5e2015-06-04 17:02:31 -07001248 case InstArithmetic::Shl: {
1249 // a=b<<c ==>
1250 // GCC 4.8 does:
1251 // sub t_c1, c.lo, #32
1252 // lsl t_hi, b.hi, c.lo
1253 // orr t_hi, t_hi, b.lo, lsl t_c1
1254 // rsb t_c2, c.lo, #32
1255 // orr t_hi, t_hi, b.lo, lsr t_c2
1256 // lsl t_lo, b.lo, c.lo
1257 // a.lo = t_lo
1258 // a.hi = t_hi
1259 // Can be strength-reduced for constant-shifts, but we don't do
1260 // that for now.
1261 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
1262 // On ARM, shifts only take the lower 8 bits of the shift register,
1263 // and saturate to the range 0-32, so the negative value will
1264 // saturate to 32.
1265 Variable *T_Hi = makeReg(IceType_i32);
1266 Variable *Src1RLo = legalizeToVar(Src1Lo);
1267 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1268 Variable *T_C1 = makeReg(IceType_i32);
1269 Variable *T_C2 = makeReg(IceType_i32);
1270 _sub(T_C1, Src1RLo, ThirtyTwo);
1271 _lsl(T_Hi, Src0RHi, Src1RLo);
1272 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1273 OperandARM32::LSL, T_C1));
1274 _rsb(T_C2, Src1RLo, ThirtyTwo);
1275 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1276 OperandARM32::LSR, T_C2));
1277 _mov(DestHi, T_Hi);
1278 Variable *T_Lo = makeReg(IceType_i32);
1279 // _mov seems to sometimes have better register preferencing than lsl.
1280 // Otherwise mov w/ lsl shifted register is a pseudo-instruction
1281 // that maps to lsl.
1282 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1283 OperandARM32::LSL, Src1RLo));
1284 _mov(DestLo, T_Lo);
Jan Voung70fa5252015-07-06 14:01:25 -07001285 return;
1286 }
Jan Voung29719972015-05-19 11:24:51 -07001287 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001288 // a=b>>c (unsigned) ==>
1289 // GCC 4.8 does:
1290 // rsb t_c1, c.lo, #32
1291 // lsr t_lo, b.lo, c.lo
1292 // orr t_lo, t_lo, b.hi, lsl t_c1
1293 // sub t_c2, c.lo, #32
1294 // orr t_lo, t_lo, b.hi, lsr t_c2
1295 // lsr t_hi, b.hi, c.lo
1296 // a.lo = t_lo
1297 // a.hi = t_hi
1298 case InstArithmetic::Ashr: {
1299 // a=b>>c (signed) ==> ...
1300 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
1301 // and the next orr should be conditioned on PLUS. The last two
1302 // right shifts should also be arithmetic.
1303 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1304 Variable *T_Lo = makeReg(IceType_i32);
1305 Variable *Src1RLo = legalizeToVar(Src1Lo);
1306 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1307 Variable *T_C1 = makeReg(IceType_i32);
1308 Variable *T_C2 = makeReg(IceType_i32);
1309 _rsb(T_C1, Src1RLo, ThirtyTwo);
1310 _lsr(T_Lo, Src0RLo, Src1RLo);
1311 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1312 OperandARM32::LSL, T_C1));
1313 OperandARM32::ShiftKind RShiftKind;
1314 CondARM32::Cond Pred;
1315 if (IsAshr) {
1316 _subs(T_C2, Src1RLo, ThirtyTwo);
1317 RShiftKind = OperandARM32::ASR;
1318 Pred = CondARM32::PL;
1319 } else {
1320 _sub(T_C2, Src1RLo, ThirtyTwo);
1321 RShiftKind = OperandARM32::LSR;
1322 Pred = CondARM32::AL;
1323 }
1324 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1325 RShiftKind, T_C2),
1326 Pred);
1327 _mov(DestLo, T_Lo);
1328 Variable *T_Hi = makeReg(IceType_i32);
1329 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1330 RShiftKind, Src1RLo));
1331 _mov(DestHi, T_Hi);
Jan Voung6ec369e2015-06-30 11:03:15 -07001332 return;
1333 }
Jan Voung29719972015-05-19 11:24:51 -07001334 case InstArithmetic::Fadd:
1335 case InstArithmetic::Fsub:
1336 case InstArithmetic::Fmul:
1337 case InstArithmetic::Fdiv:
1338 case InstArithmetic::Frem:
1339 llvm_unreachable("FP instruction with i64 type");
Jan Voung70fa5252015-07-06 14:01:25 -07001340 return;
1341 case InstArithmetic::Udiv:
1342 case InstArithmetic::Sdiv:
1343 case InstArithmetic::Urem:
1344 case InstArithmetic::Srem:
1345 llvm_unreachable("Call-helper-involved instruction for i64 type "
1346 "should have already been handled before");
1347 return;
Jan Voung29719972015-05-19 11:24:51 -07001348 }
Jan Voung70fa5252015-07-06 14:01:25 -07001349 return;
Jan Voungb3401d22015-05-18 09:38:21 -07001350 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001351 UnimplementedError(Func->getContext()->getFlags());
Jan Voung70fa5252015-07-06 14:01:25 -07001352 return;
1353 }
1354 // Dest->getType() is a non-i64 scalar.
1355 Variable *Src0R = legalizeToVar(Src0);
1356 Variable *T = makeReg(Dest->getType());
1357 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1358 // whether or not Src1 is a non-zero constant. Once legalized it is more
1359 // difficult to determine (constant may be moved to a register).
1360 switch (Inst->getOp()) {
1361 default:
1362 break;
1363 case InstArithmetic::Udiv: {
1364 constexpr bool IsRemainder = false;
1365 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1366 H_udiv_i32, IsRemainder);
1367 return;
1368 }
1369 case InstArithmetic::Sdiv: {
1370 constexpr bool IsRemainder = false;
1371 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1372 H_sdiv_i32, IsRemainder);
1373 return;
1374 }
1375 case InstArithmetic::Urem: {
1376 constexpr bool IsRemainder = true;
1377 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1378 H_urem_i32, IsRemainder);
1379 return;
1380 }
1381 case InstArithmetic::Srem: {
1382 constexpr bool IsRemainder = true;
1383 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1384 H_srem_i32, IsRemainder);
1385 return;
1386 }
1387 }
1388
1389 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1390 switch (Inst->getOp()) {
1391 case InstArithmetic::_num:
1392 llvm_unreachable("Unknown arithmetic operator");
1393 return;
1394 case InstArithmetic::Add:
1395 _add(T, Src0R, Src1RF);
1396 _mov(Dest, T);
1397 return;
1398 case InstArithmetic::And:
1399 _and(T, Src0R, Src1RF);
1400 _mov(Dest, T);
1401 return;
1402 case InstArithmetic::Or:
1403 _orr(T, Src0R, Src1RF);
1404 _mov(Dest, T);
1405 return;
1406 case InstArithmetic::Xor:
1407 _eor(T, Src0R, Src1RF);
1408 _mov(Dest, T);
1409 return;
1410 case InstArithmetic::Sub:
1411 _sub(T, Src0R, Src1RF);
1412 _mov(Dest, T);
1413 return;
1414 case InstArithmetic::Mul: {
1415 Variable *Src1R = legalizeToVar(Src1RF);
1416 _mul(T, Src0R, Src1R);
1417 _mov(Dest, T);
1418 return;
1419 }
1420 case InstArithmetic::Shl:
1421 _lsl(T, Src0R, Src1RF);
1422 _mov(Dest, T);
1423 return;
1424 case InstArithmetic::Lshr:
1425 _lsr(T, Src0R, Src1RF);
1426 _mov(Dest, T);
1427 return;
1428 case InstArithmetic::Ashr:
1429 _asr(T, Src0R, Src1RF);
1430 _mov(Dest, T);
1431 return;
1432 case InstArithmetic::Udiv:
1433 case InstArithmetic::Sdiv:
1434 case InstArithmetic::Urem:
1435 case InstArithmetic::Srem:
1436 llvm_unreachable("Integer div/rem should have been handled earlier.");
1437 return;
1438 case InstArithmetic::Fadd:
1439 UnimplementedError(Func->getContext()->getFlags());
1440 return;
1441 case InstArithmetic::Fsub:
1442 UnimplementedError(Func->getContext()->getFlags());
1443 return;
1444 case InstArithmetic::Fmul:
1445 UnimplementedError(Func->getContext()->getFlags());
1446 return;
1447 case InstArithmetic::Fdiv:
1448 UnimplementedError(Func->getContext()->getFlags());
1449 return;
1450 case InstArithmetic::Frem:
1451 UnimplementedError(Func->getContext()->getFlags());
1452 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001453 }
1454}
1455
1456void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001457 Variable *Dest = Inst->getDest();
1458 Operand *Src0 = Inst->getSrc(0);
1459 assert(Dest->getType() == Src0->getType());
1460 if (Dest->getType() == IceType_i64) {
1461 Src0 = legalize(Src0);
1462 Operand *Src0Lo = loOperand(Src0);
1463 Operand *Src0Hi = hiOperand(Src0);
1464 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1465 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1466 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1467 _mov(T_Lo, Src0Lo);
1468 _mov(DestLo, T_Lo);
1469 _mov(T_Hi, Src0Hi);
1470 _mov(DestHi, T_Hi);
1471 } else {
1472 Operand *SrcR;
1473 if (Dest->hasReg()) {
1474 // If Dest already has a physical register, then legalize the
1475 // Src operand into a Variable with the same register
1476 // assignment. This is mostly a workaround for advanced phi
1477 // lowering's ad-hoc register allocation which assumes no
1478 // register allocation is needed when at least one of the
1479 // operands is non-memory.
1480 // TODO(jvoung): check this for ARM.
1481 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
1482 } else {
1483 // Dest could be a stack operand. Since we could potentially need
1484 // to do a Store (and store can only have Register operands),
1485 // legalize this to a register.
1486 SrcR = legalize(Src0, Legal_Reg);
1487 }
1488 if (isVectorType(Dest->getType())) {
1489 UnimplementedError(Func->getContext()->getFlags());
1490 } else {
1491 _mov(Dest, SrcR);
1492 }
1493 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001494}
1495
1496void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001497 if (Inst->isUnconditional()) {
1498 _br(Inst->getTargetUnconditional());
1499 return;
1500 }
1501 Operand *Cond = Inst->getCondition();
1502 // TODO(jvoung): Handle folding opportunities.
1503
1504 Variable *Src0R = legalizeToVar(Cond);
1505 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1506 _cmp(Src0R, Zero);
Jan Voung6ec369e2015-06-30 11:03:15 -07001507 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001508}
1509
Jan Voung3bfd99a2015-05-22 16:35:25 -07001510void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001511 MaybeLeafFunc = false;
Jan Voungb0a8c242015-06-18 15:00:14 -07001512 NeedsStackAlignment = true;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001513
Jan Voungb0a8c242015-06-18 15:00:14 -07001514 // Assign arguments to registers and stack. Also reserve stack.
1515 TargetARM32::CallingConv CC;
1516 // Pair of Arg Operand -> GPR number assignments.
1517 llvm::SmallVector<std::pair<Operand *, int32_t>,
1518 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
1519 // Pair of Arg Operand -> stack offset.
1520 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1521 int32_t ParameterAreaSizeBytes = 0;
1522
1523 // Classify each argument operand according to the location where the
1524 // argument is passed.
1525 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1526 Operand *Arg = Instr->getArg(i);
1527 Type Ty = Arg->getType();
1528 bool InRegs = false;
1529 if (isVectorType(Ty)) {
1530 UnimplementedError(Func->getContext()->getFlags());
1531 } else if (isFloatingType(Ty)) {
1532 UnimplementedError(Func->getContext()->getFlags());
1533 } else if (Ty == IceType_i64) {
1534 std::pair<int32_t, int32_t> Regs;
1535 if (CC.I64InRegs(&Regs)) {
1536 InRegs = true;
1537 Operand *Lo = loOperand(Arg);
1538 Operand *Hi = hiOperand(Arg);
1539 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1540 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1541 }
1542 } else {
1543 assert(Ty == IceType_i32);
1544 int32_t Reg;
1545 if (CC.I32InReg(&Reg)) {
1546 InRegs = true;
1547 GPRArgs.push_back(std::make_pair(Arg, Reg));
1548 }
1549 }
1550
1551 if (!InRegs) {
1552 ParameterAreaSizeBytes =
1553 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1554 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1555 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1556 }
1557 }
1558
1559 // Adjust the parameter area so that the stack is aligned. It is
1560 // assumed that the stack is already aligned at the start of the
1561 // calling sequence.
1562 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1563
1564 // Subtract the appropriate amount for the argument area. This also
1565 // takes care of setting the stack adjustment during emission.
1566 //
1567 // TODO: If for some reason the call instruction gets dead-code
1568 // eliminated after lowering, we would need to ensure that the
1569 // pre-call and the post-call esp adjustment get eliminated as well.
1570 if (ParameterAreaSizeBytes) {
1571 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1572 Legal_Reg | Legal_Flex);
1573 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1574 }
1575
1576 // Copy arguments that are passed on the stack to the appropriate
1577 // stack locations.
1578 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
1579 for (auto &StackArg : StackArgs) {
1580 ConstantInteger32 *Loc =
1581 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1582 Type Ty = StackArg.first->getType();
1583 OperandARM32Mem *Addr;
1584 constexpr bool SignExt = false;
1585 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1586 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1587 } else {
1588 Variable *NewBase = Func->makeVariable(SP->getType());
1589 lowerArithmetic(
1590 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1591 Addr = formMemoryOperand(NewBase, Ty);
1592 }
1593 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1594 }
1595
1596 // Copy arguments to be passed in registers to the appropriate registers.
1597 for (auto &GPRArg : GPRArgs) {
1598 Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
1599 // Generate a FakeUse of register arguments so that they do not get
1600 // dead code eliminated as a result of the FakeKill of scratch
1601 // registers after the call.
1602 Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001603 }
1604
1605 // Generate the call instruction. Assign its result to a temporary
1606 // with high register allocation weight.
1607 Variable *Dest = Instr->getDest();
1608 // ReturnReg doubles as ReturnRegLo as necessary.
1609 Variable *ReturnReg = nullptr;
1610 Variable *ReturnRegHi = nullptr;
1611 if (Dest) {
1612 switch (Dest->getType()) {
1613 case IceType_NUM:
1614 llvm_unreachable("Invalid Call dest type");
1615 break;
1616 case IceType_void:
1617 break;
1618 case IceType_i1:
1619 case IceType_i8:
1620 case IceType_i16:
1621 case IceType_i32:
1622 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1623 break;
1624 case IceType_i64:
1625 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1626 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1627 break;
1628 case IceType_f32:
1629 case IceType_f64:
1630 // Use S and D regs.
1631 UnimplementedError(Func->getContext()->getFlags());
1632 break;
1633 case IceType_v4i1:
1634 case IceType_v8i1:
1635 case IceType_v16i1:
1636 case IceType_v16i8:
1637 case IceType_v8i16:
1638 case IceType_v4i32:
1639 case IceType_v4f32:
1640 // Use Q regs.
1641 UnimplementedError(Func->getContext()->getFlags());
1642 break;
1643 }
1644 }
1645 Operand *CallTarget = Instr->getCallTarget();
Jan Voungb0a8c242015-06-18 15:00:14 -07001646 // TODO(jvoung): Handle sandboxing.
1647 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
1648
Jan Voung3bfd99a2015-05-22 16:35:25 -07001649 // Allow ConstantRelocatable to be left alone as a direct call,
1650 // but force other constants like ConstantInteger32 to be in
1651 // a register and make it an indirect call.
1652 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1653 CallTarget = legalize(CallTarget, Legal_Reg);
1654 }
1655 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1656 Context.insert(NewCall);
1657 if (ReturnRegHi)
1658 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1659
Jan Voungb0a8c242015-06-18 15:00:14 -07001660 // Add the appropriate offset to SP. The call instruction takes care
1661 // of resetting the stack offset during emission.
1662 if (ParameterAreaSizeBytes) {
1663 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1664 Legal_Reg | Legal_Flex);
1665 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
1666 _add(SP, SP, AddAmount);
1667 }
1668
Jan Voung3bfd99a2015-05-22 16:35:25 -07001669 // Insert a register-kill pseudo instruction.
1670 Context.insert(InstFakeKill::create(Func, NewCall));
1671
1672 // Generate a FakeUse to keep the call live if necessary.
1673 if (Instr->hasSideEffects() && ReturnReg) {
1674 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1675 Context.insert(FakeUse);
1676 }
1677
1678 if (!Dest)
1679 return;
1680
1681 // Assign the result of the call to Dest.
1682 if (ReturnReg) {
1683 if (ReturnRegHi) {
1684 assert(Dest->getType() == IceType_i64);
1685 split64(Dest);
1686 Variable *DestLo = Dest->getLo();
1687 Variable *DestHi = Dest->getHi();
1688 _mov(DestLo, ReturnReg);
1689 _mov(DestHi, ReturnRegHi);
1690 } else {
1691 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1692 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1693 isVectorType(Dest->getType()));
1694 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
1695 UnimplementedError(Func->getContext()->getFlags());
1696 } else {
1697 _mov(Dest, ReturnReg);
1698 }
1699 }
1700 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001701}
1702
1703void TargetARM32::lowerCast(const InstCast *Inst) {
1704 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001705 Variable *Dest = Inst->getDest();
1706 Operand *Src0 = Inst->getSrc(0);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001707 switch (CastKind) {
1708 default:
1709 Func->setError("Cast type not supported");
1710 return;
1711 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001712 if (isVectorType(Dest->getType())) {
1713 UnimplementedError(Func->getContext()->getFlags());
1714 } else if (Dest->getType() == IceType_i64) {
1715 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1716 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1717 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1718 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1719 Variable *T_Lo = makeReg(DestLo->getType());
1720 if (Src0->getType() == IceType_i32) {
1721 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1722 _mov(T_Lo, Src0RF);
1723 } else if (Src0->getType() == IceType_i1) {
1724 Variable *Src0R = legalizeToVar(Src0);
1725 _lsl(T_Lo, Src0R, ShiftAmt);
1726 _asr(T_Lo, T_Lo, ShiftAmt);
1727 } else {
1728 Variable *Src0R = legalizeToVar(Src0);
1729 _sxt(T_Lo, Src0R);
1730 }
1731 _mov(DestLo, T_Lo);
1732 Variable *T_Hi = makeReg(DestHi->getType());
1733 if (Src0->getType() != IceType_i1) {
1734 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1735 OperandARM32::ASR, ShiftAmt));
1736 } else {
1737 // For i1, the asr instruction is already done above.
1738 _mov(T_Hi, T_Lo);
1739 }
1740 _mov(DestHi, T_Hi);
1741 } else if (Src0->getType() == IceType_i1) {
1742 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1743 // lsl t1, src_reg, 31
1744 // asr t1, t1, 31
1745 // dst = t1
1746 Variable *Src0R = legalizeToVar(Src0);
1747 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1748 Variable *T = makeReg(Dest->getType());
1749 _lsl(T, Src0R, ShiftAmt);
1750 _asr(T, T, ShiftAmt);
1751 _mov(Dest, T);
1752 } else {
1753 // t1 = sxt src; dst = t1
1754 Variable *Src0R = legalizeToVar(Src0);
1755 Variable *T = makeReg(Dest->getType());
1756 _sxt(T, Src0R);
1757 _mov(Dest, T);
1758 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001759 break;
1760 }
1761 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001762 if (isVectorType(Dest->getType())) {
1763 UnimplementedError(Func->getContext()->getFlags());
1764 } else if (Dest->getType() == IceType_i64) {
1765 // t1=uxtb src; dst.lo=t1; dst.hi=0
1766 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1767 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1768 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1769 Variable *T_Lo = makeReg(DestLo->getType());
1770 // i32 and i1 can just take up the whole register.
1771 // i32 doesn't need uxt, while i1 will have an and mask later anyway.
1772 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
1773 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1774 _mov(T_Lo, Src0RF);
1775 } else {
1776 Variable *Src0R = legalizeToVar(Src0);
1777 _uxt(T_Lo, Src0R);
1778 }
1779 if (Src0->getType() == IceType_i1) {
1780 Constant *One = Ctx->getConstantInt32(1);
1781 _and(T_Lo, T_Lo, One);
1782 }
1783 _mov(DestLo, T_Lo);
1784 Variable *T_Hi = makeReg(DestLo->getType());
1785 _mov(T_Hi, Zero);
1786 _mov(DestHi, T_Hi);
1787 } else if (Src0->getType() == IceType_i1) {
1788 // t = Src0; t &= 1; Dest = t
1789 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1790 Constant *One = Ctx->getConstantInt32(1);
1791 Variable *T = makeReg(Dest->getType());
1792 // Just use _mov instead of _uxt since all registers are 32-bit.
1793 // _uxt requires the source to be a register so could have required
1794 // a _mov from legalize anyway.
1795 _mov(T, Src0RF);
1796 _and(T, T, One);
1797 _mov(Dest, T);
1798 } else {
1799 // t1 = uxt src; dst = t1
1800 Variable *Src0R = legalizeToVar(Src0);
1801 Variable *T = makeReg(Dest->getType());
1802 _uxt(T, Src0R);
1803 _mov(Dest, T);
1804 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001805 break;
1806 }
1807 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001808 if (isVectorType(Dest->getType())) {
1809 UnimplementedError(Func->getContext()->getFlags());
1810 } else {
1811 Operand *Src0 = Inst->getSrc(0);
1812 if (Src0->getType() == IceType_i64)
1813 Src0 = loOperand(Src0);
1814 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1815 // t1 = trunc Src0RF; Dest = t1
1816 Variable *T = makeReg(Dest->getType());
1817 _mov(T, Src0RF);
1818 if (Dest->getType() == IceType_i1)
1819 _and(T, T, Ctx->getConstantInt1(1));
1820 _mov(Dest, T);
1821 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001822 break;
1823 }
1824 case InstCast::Fptrunc:
Jan Voungb2d50842015-05-12 09:53:50 -07001825 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001826 break;
1827 case InstCast::Fpext: {
Jan Voungb2d50842015-05-12 09:53:50 -07001828 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001829 break;
1830 }
1831 case InstCast::Fptosi:
Jan Voungb2d50842015-05-12 09:53:50 -07001832 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001833 break;
1834 case InstCast::Fptoui:
Jan Voungb2d50842015-05-12 09:53:50 -07001835 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001836 break;
1837 case InstCast::Sitofp:
Jan Voungb2d50842015-05-12 09:53:50 -07001838 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001839 break;
1840 case InstCast::Uitofp: {
Jan Voungb2d50842015-05-12 09:53:50 -07001841 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001842 break;
1843 }
1844 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001845 Operand *Src0 = Inst->getSrc(0);
1846 if (Dest->getType() == Src0->getType()) {
1847 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
1848 lowerAssign(Assign);
1849 return;
1850 }
Jan Voungb2d50842015-05-12 09:53:50 -07001851 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001852 break;
1853 }
1854 }
1855}
1856
1857void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
1858 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001859 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001860}
1861
1862void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
1863 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001864 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001865}
1866
1867void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001868 Variable *Dest = Inst->getDest();
1869 Operand *Src0 = Inst->getSrc(0);
1870 Operand *Src1 = Inst->getSrc(1);
1871
1872 if (isVectorType(Dest->getType())) {
1873 UnimplementedError(Func->getContext()->getFlags());
1874 return;
1875 }
1876
1877 // a=icmp cond, b, c ==>
1878 // GCC does:
1879 // cmp b.hi, c.hi or cmp b.lo, c.lo
1880 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
1881 // mov.<C1> t, #1 mov.<C1> t, #1
1882 // mov.<C2> t, #0 mov.<C2> t, #0
1883 // mov a, t mov a, t
1884 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
1885 // is used for signed compares. In some cases, b and c need to be swapped
1886 // as well.
1887 //
1888 // LLVM does:
1889 // for EQ and NE:
1890 // eor t1, b.hi, c.hi
1891 // eor t2, b.lo, c.hi
1892 // orrs t, t1, t2
1893 // mov.<C> t, #1
1894 // mov a, t
1895 //
1896 // that's nice in that it's just as short but has fewer dependencies
1897 // for better ILP at the cost of more registers.
1898 //
1899 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
1900 // two unconditional mov #0, two cmps, two conditional mov #1,
1901 // and one conditonal reg mov. That has few dependencies for good ILP,
1902 // but is a longer sequence.
1903 //
1904 // So, we are going with the GCC version since it's usually better (except
1905 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
1906 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1907 Constant *One = Ctx->getConstantInt32(1);
1908 if (Src0->getType() == IceType_i64) {
1909 InstIcmp::ICond Conditon = Inst->getCondition();
1910 size_t Index = static_cast<size_t>(Conditon);
1911 assert(Index < TableIcmp64Size);
1912 Variable *Src0Lo, *Src0Hi;
1913 Operand *Src1LoRF, *Src1HiRF;
1914 if (TableIcmp64[Index].Swapped) {
1915 Src0Lo = legalizeToVar(loOperand(Src1));
1916 Src0Hi = legalizeToVar(hiOperand(Src1));
1917 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1918 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1919 } else {
1920 Src0Lo = legalizeToVar(loOperand(Src0));
1921 Src0Hi = legalizeToVar(hiOperand(Src0));
1922 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1923 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1924 }
1925 Variable *T = makeReg(IceType_i32);
1926 if (TableIcmp64[Index].IsSigned) {
1927 Variable *ScratchReg = makeReg(IceType_i32);
1928 _cmp(Src0Lo, Src1LoRF);
1929 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
1930 // ScratchReg isn't going to be used, but we need the
1931 // side-effect of setting flags from this operation.
1932 Context.insert(InstFakeUse::create(Func, ScratchReg));
1933 } else {
1934 _cmp(Src0Hi, Src1HiRF);
1935 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
1936 }
1937 _mov(T, One, TableIcmp64[Index].C1);
1938 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
1939 _mov(Dest, T);
1940 return;
1941 }
1942
1943 // a=icmp cond b, c ==>
1944 // GCC does:
1945 // <u/s>xtb tb, b
1946 // <u/s>xtb tc, c
1947 // cmp tb, tc
1948 // mov.C1 t, #0
1949 // mov.C2 t, #1
1950 // mov a, t
1951 // where the unsigned/sign extension is not needed for 32-bit.
1952 // They also have special cases for EQ and NE. E.g., for NE:
1953 // <extend to tb, tc>
1954 // subs t, tb, tc
1955 // movne t, #1
1956 // mov a, t
1957 //
1958 // LLVM does:
1959 // lsl tb, b, #<N>
1960 // mov t, #0
1961 // cmp tb, c, lsl #<N>
1962 // mov.<C> t, #1
1963 // mov a, t
1964 //
1965 // the left shift is by 0, 16, or 24, which allows the comparison to focus
1966 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
1967 // For the unsigned case, for some reason it does similar to GCC and does
1968 // a uxtb first. It's not clear to me why that special-casing is needed.
1969 //
1970 // We'll go with the LLVM way for now, since it's shorter and has just as
1971 // few dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001972 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
1973 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001974 Constant *ShiftConst = nullptr;
1975 Variable *Src0R = nullptr;
1976 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001977 if (ShiftAmt) {
1978 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001979 Src0R = makeReg(IceType_i32);
1980 _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
1981 } else {
1982 Src0R = legalizeToVar(Src0);
1983 }
1984 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001985 if (ShiftAmt) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001986 Variable *Src1R = legalizeToVar(Src1);
1987 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
1988 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
1989 _cmp(Src0R, Src1RShifted);
1990 } else {
1991 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1992 _cmp(Src0R, Src1RF);
1993 }
1994 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
1995 _mov(Dest, T);
1996 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001997}
1998
1999void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2000 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002001 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002002}
2003
2004void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2005 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
2006 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07002007 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002008 return;
2009 }
2010 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07002011 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002012 return;
2013 case Intrinsics::AtomicFenceAll:
2014 // NOTE: FenceAll should prevent and load/store from being moved
2015 // across the fence (both atomic and non-atomic). The InstARM32Mfence
2016 // instruction is currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07002017 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002018 return;
2019 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07002020 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002021 return;
2022 }
2023 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07002024 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002025 return;
2026 }
2027 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07002028 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002029 return;
2030 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07002031 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002032 return;
2033 }
2034 case Intrinsics::Bswap: {
Jan Voungb2d50842015-05-12 09:53:50 -07002035 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002036 return;
2037 }
2038 case Intrinsics::Ctpop: {
Jan Voungb2d50842015-05-12 09:53:50 -07002039 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002040 return;
2041 }
2042 case Intrinsics::Ctlz: {
Jan Voungb2d50842015-05-12 09:53:50 -07002043 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002044 return;
2045 }
2046 case Intrinsics::Cttz: {
Jan Voungb2d50842015-05-12 09:53:50 -07002047 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002048 return;
2049 }
2050 case Intrinsics::Fabs: {
Jan Voungb2d50842015-05-12 09:53:50 -07002051 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002052 return;
2053 }
2054 case Intrinsics::Longjmp: {
2055 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2056 Call->addArg(Instr->getArg(0));
2057 Call->addArg(Instr->getArg(1));
2058 lowerCall(Call);
2059 return;
2060 }
2061 case Intrinsics::Memcpy: {
2062 // In the future, we could potentially emit an inline memcpy/memset, etc.
2063 // for intrinsic calls w/ a known length.
2064 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
2065 Call->addArg(Instr->getArg(0));
2066 Call->addArg(Instr->getArg(1));
2067 Call->addArg(Instr->getArg(2));
2068 lowerCall(Call);
2069 return;
2070 }
2071 case Intrinsics::Memmove: {
2072 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
2073 Call->addArg(Instr->getArg(0));
2074 Call->addArg(Instr->getArg(1));
2075 Call->addArg(Instr->getArg(2));
2076 lowerCall(Call);
2077 return;
2078 }
2079 case Intrinsics::Memset: {
2080 // The value operand needs to be extended to a stack slot size
2081 // because the PNaCl ABI requires arguments to be at least 32 bits
2082 // wide.
2083 Operand *ValOp = Instr->getArg(1);
2084 assert(ValOp->getType() == IceType_i8);
2085 Variable *ValExt = Func->makeVariable(stackSlotType());
2086 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
2087 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
2088 Call->addArg(Instr->getArg(0));
2089 Call->addArg(ValExt);
2090 Call->addArg(Instr->getArg(2));
2091 lowerCall(Call);
2092 return;
2093 }
2094 case Intrinsics::NaClReadTP: {
2095 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07002096 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002097 } else {
2098 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
2099 lowerCall(Call);
2100 }
2101 return;
2102 }
2103 case Intrinsics::Setjmp: {
2104 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
2105 Call->addArg(Instr->getArg(0));
2106 lowerCall(Call);
2107 return;
2108 }
2109 case Intrinsics::Sqrt: {
Jan Voungb2d50842015-05-12 09:53:50 -07002110 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002111 return;
2112 }
2113 case Intrinsics::Stacksave: {
Jan Voungb2d50842015-05-12 09:53:50 -07002114 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002115 return;
2116 }
2117 case Intrinsics::Stackrestore: {
Jan Voungb2d50842015-05-12 09:53:50 -07002118 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002119 return;
2120 }
2121 case Intrinsics::Trap:
Jan Voungb2d50842015-05-12 09:53:50 -07002122 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002123 return;
2124 case Intrinsics::UnknownIntrinsic:
2125 Func->setError("Should not be lowering UnknownIntrinsic");
2126 return;
2127 }
2128 return;
2129}
2130
Jan Voungbefd03a2015-06-02 11:03:03 -07002131void TargetARM32::lowerLoad(const InstLoad *Load) {
2132 // A Load instruction can be treated the same as an Assign
2133 // instruction, after the source operand is transformed into an
2134 // OperandARM32Mem operand.
2135 Type Ty = Load->getDest()->getType();
2136 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2137 Variable *DestLoad = Load->getDest();
2138
2139 // TODO(jvoung): handled folding opportunities. Sign and zero extension
2140 // can be folded into a load.
2141 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2142 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002143}
2144
2145void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07002146 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002147}
2148
2149void TargetARM32::randomlyInsertNop(float Probability) {
2150 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
2151 if (RNG.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07002152 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002153 }
2154}
2155
2156void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
2157 Func->setError("Phi found in regular instruction list");
2158}
2159
2160void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07002161 Variable *Reg = nullptr;
2162 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07002163 Operand *Src0 = Inst->getRetValue();
2164 if (Src0->getType() == IceType_i64) {
2165 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
2166 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
2167 Reg = R0;
2168 Context.insert(InstFakeUse::create(Func, R1));
2169 } else if (isScalarFloatingType(Src0->getType())) {
2170 UnimplementedError(Func->getContext()->getFlags());
2171 } else if (isVectorType(Src0->getType())) {
2172 UnimplementedError(Func->getContext()->getFlags());
2173 } else {
2174 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002175 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07002176 }
Jan Voungb2d50842015-05-12 09:53:50 -07002177 }
2178 // Add a ret instruction even if sandboxing is enabled, because
2179 // addEpilog explicitly looks for a ret instruction as a marker for
2180 // where to insert the frame removal instructions.
2181 // addEpilog is responsible for restoring the "lr" register as needed
2182 // prior to this ret instruction.
2183 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2184 // Add a fake use of sp to make sure sp stays alive for the entire
2185 // function. Otherwise post-call sp adjustments get dead-code
2186 // eliminated. TODO: Are there more places where the fake use
2187 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2188 // have a ret instruction.
2189 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
2190 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002191}
2192
2193void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Vounge0df91f2015-06-30 08:47:06 -07002194 Variable *Dest = Inst->getDest();
2195 Type DestTy = Dest->getType();
2196 Operand *SrcT = Inst->getTrueOperand();
2197 Operand *SrcF = Inst->getFalseOperand();
2198 Operand *Condition = Inst->getCondition();
2199
2200 if (isVectorType(DestTy)) {
2201 UnimplementedError(Func->getContext()->getFlags());
2202 return;
2203 }
2204 if (isFloatingType(DestTy)) {
2205 UnimplementedError(Func->getContext()->getFlags());
2206 return;
2207 }
2208 // TODO(jvoung): handle folding opportunities.
2209 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
2210 Variable *CmpOpnd0 = legalizeToVar(Condition);
2211 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
2212 _cmp(CmpOpnd0, CmpOpnd1);
2213 CondARM32::Cond Cond = CondARM32::NE;
2214 if (DestTy == IceType_i64) {
2215 // Set the low portion.
2216 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2217 Variable *TLo = nullptr;
2218 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
2219 _mov(TLo, SrcFLo);
2220 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
2221 _mov_nonkillable(TLo, SrcTLo, Cond);
2222 _mov(DestLo, TLo);
2223 // Set the high portion.
2224 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2225 Variable *THi = nullptr;
2226 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
2227 _mov(THi, SrcFHi);
2228 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
2229 _mov_nonkillable(THi, SrcTHi, Cond);
2230 _mov(DestHi, THi);
2231 return;
2232 }
2233 Variable *T = nullptr;
2234 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
2235 _mov(T, SrcF);
2236 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
2237 _mov_nonkillable(T, SrcT, Cond);
2238 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002239}
2240
2241void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07002242 Operand *Value = Inst->getData();
2243 Operand *Addr = Inst->getAddr();
2244 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
2245 Type Ty = NewAddr->getType();
2246
2247 if (Ty == IceType_i64) {
2248 Variable *ValueHi = legalizeToVar(hiOperand(Value));
2249 Variable *ValueLo = legalizeToVar(loOperand(Value));
2250 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
2251 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
2252 } else if (isVectorType(Ty)) {
2253 UnimplementedError(Func->getContext()->getFlags());
2254 } else {
2255 Variable *ValueR = legalizeToVar(Value);
2256 _str(ValueR, NewAddr);
2257 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002258}
2259
2260void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07002261 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002262}
2263
2264void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scullfdc54db2015-06-29 11:21:18 -07002265 // This implements the most naive possible lowering.
2266 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2267 Operand *Src0 = Inst->getComparison();
2268 SizeT NumCases = Inst->getNumCases();
2269 if (Src0->getType() == IceType_i64) {
2270 // TODO(jvoung): handle and test undef for Src0
2271 Variable *Src0Lo = legalizeToVar(loOperand(Src0));
2272 Variable *Src0Hi = legalizeToVar(hiOperand(Src0));
2273 for (SizeT I = 0; I < NumCases; ++I) {
2274 Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
2275 Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
2276 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
2277 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
2278 _cmp(Src0Lo, ValueLo);
2279 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
2280 _br(Inst->getLabel(I), CondARM32::EQ);
2281 }
2282 _br(Inst->getLabelDefault());
2283 return;
2284 }
Jan Vounge0df91f2015-06-30 08:47:06 -07002285
Andrew Scullfdc54db2015-06-29 11:21:18 -07002286 // 32 bit integer
2287 Variable *Src0Var = legalizeToVar(Src0);
2288 for (SizeT I = 0; I < NumCases; ++I) {
2289 Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
2290 Value = legalize(Value, Legal_Reg | Legal_Flex);
2291 _cmp(Src0Var, Value);
2292 _br(Inst->getLabel(I), CondARM32::EQ);
2293 }
2294 _br(Inst->getLabelDefault());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002295}
2296
2297void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voung6ec369e2015-06-30 11:03:15 -07002298 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002299}
2300
2301// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
2302// preserve integrity of liveness analysis. Undef values are also
2303// turned into zeroes, since loOperand() and hiOperand() don't expect
2304// Undef input.
2305void TargetARM32::prelowerPhis() {
Jan Voungb2d50842015-05-12 09:53:50 -07002306 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002307}
2308
2309// Lower the pre-ordered list of assignments into mov instructions.
2310// Also has to do some ad-hoc register allocation as necessary.
2311void TargetARM32::lowerPhiAssignments(CfgNode *Node,
2312 const AssignList &Assignments) {
2313 (void)Node;
2314 (void)Assignments;
Jan Voungb2d50842015-05-12 09:53:50 -07002315 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002316}
2317
Jan Voungb3401d22015-05-18 09:38:21 -07002318Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
2319 Variable *Reg = makeReg(Ty, RegNum);
2320 UnimplementedError(Func->getContext()->getFlags());
2321 return Reg;
2322}
2323
2324// Helper for legalize() to emit the right code to lower an operand to a
2325// register of the appropriate type.
2326Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2327 Type Ty = Src->getType();
2328 Variable *Reg = makeReg(Ty, RegNum);
2329 if (isVectorType(Ty)) {
2330 UnimplementedError(Func->getContext()->getFlags());
2331 } else {
2332 // Mov's Src operand can really only be the flexible second operand type
2333 // or a register. Users should guarantee that.
2334 _mov(Reg, Src);
2335 }
2336 return Reg;
2337}
2338
2339Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2340 int32_t RegNum) {
2341 // Assert that a physical register is allowed. To date, all calls
2342 // to legalize() allow a physical register. Legal_Flex converts
2343 // registers to the right type OperandARM32FlexReg as needed.
2344 assert(Allowed & Legal_Reg);
2345 // Go through the various types of operands:
2346 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
2347 // Given the above assertion, if type of operand is not legal
2348 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
2349 // to a register.
2350 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2351 // Before doing anything with a Mem operand, we need to ensure
2352 // that the Base and Index components are in physical registers.
2353 Variable *Base = Mem->getBase();
2354 Variable *Index = Mem->getIndex();
2355 Variable *RegBase = nullptr;
2356 Variable *RegIndex = nullptr;
2357 if (Base) {
2358 RegBase = legalizeToVar(Base);
2359 }
2360 if (Index) {
2361 RegIndex = legalizeToVar(Index);
2362 }
2363 // Create a new operand if there was a change.
2364 if (Base != RegBase || Index != RegIndex) {
2365 // There is only a reg +/- reg or reg + imm form.
2366 // Figure out which to re-create.
2367 if (Mem->isRegReg()) {
2368 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
2369 Mem->getShiftOp(), Mem->getShiftAmt(),
2370 Mem->getAddrMode());
2371 } else {
2372 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
2373 Mem->getOffset(), Mem->getAddrMode());
2374 }
2375 }
2376 if (!(Allowed & Legal_Mem)) {
2377 Type Ty = Mem->getType();
2378 Variable *Reg = makeReg(Ty, RegNum);
2379 _ldr(Reg, Mem);
2380 From = Reg;
2381 } else {
2382 From = Mem;
2383 }
2384 return From;
2385 }
2386
2387 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2388 if (!(Allowed & Legal_Flex)) {
2389 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2390 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2391 From = FlexReg->getReg();
2392 // Fall through and let From be checked as a Variable below,
2393 // where it may or may not need a register.
2394 } else {
2395 return copyToReg(Flex, RegNum);
2396 }
2397 } else {
2398 return copyToReg(Flex, RegNum);
2399 }
2400 } else {
2401 return From;
2402 }
2403 }
2404
2405 if (llvm::isa<Constant>(From)) {
2406 if (llvm::isa<ConstantUndef>(From)) {
2407 // Lower undefs to zero. Another option is to lower undefs to an
2408 // uninitialized register; however, using an uninitialized register
2409 // results in less predictable code.
2410 if (isVectorType(From->getType()))
2411 return makeVectorOfZeros(From->getType(), RegNum);
2412 From = Ctx->getConstantZero(From->getType());
2413 }
2414 // There should be no constants of vector type (other than undef).
2415 assert(!isVectorType(From->getType()));
2416 bool CanBeFlex = Allowed & Legal_Flex;
2417 if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
2418 uint32_t RotateAmt;
2419 uint32_t Immed_8;
2420 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2421 // Check if the immediate will fit in a Flexible second operand,
2422 // if a Flexible second operand is allowed. We need to know the exact
2423 // value, so that rules out relocatable constants.
2424 // Also try the inverse and use MVN if possible.
2425 if (CanBeFlex &&
2426 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
2427 return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
2428 RotateAmt);
2429 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2430 ~Value, &RotateAmt, &Immed_8)) {
2431 auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
2432 Immed_8, RotateAmt);
2433 Type Ty = From->getType();
2434 Variable *Reg = makeReg(Ty, RegNum);
2435 _mvn(Reg, InvertedFlex);
2436 return Reg;
2437 } else {
2438 // Do a movw/movt to a register.
2439 Type Ty = From->getType();
2440 Variable *Reg = makeReg(Ty, RegNum);
2441 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2442 _movw(Reg,
2443 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2444 if (UpperBits != 0) {
2445 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2446 }
2447 return Reg;
2448 }
2449 } else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
2450 Type Ty = From->getType();
2451 Variable *Reg = makeReg(Ty, RegNum);
2452 _movw(Reg, C);
2453 _movt(Reg, C);
2454 return Reg;
2455 } else {
2456 // Load floats/doubles from literal pool.
2457 UnimplementedError(Func->getContext()->getFlags());
2458 From = copyToReg(From, RegNum);
2459 }
2460 return From;
2461 }
2462
2463 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2464 // Check if the variable is guaranteed a physical register. This
2465 // can happen either when the variable is pre-colored or when it is
2466 // assigned infinite weight.
2467 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
2468 // We need a new physical register for the operand if:
2469 // Mem is not allowed and Var isn't guaranteed a physical
2470 // register, or
2471 // RegNum is required and Var->getRegNum() doesn't match.
2472 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2473 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2474 From = copyToReg(From, RegNum);
2475 }
2476 return From;
2477 }
2478 llvm_unreachable("Unhandled operand kind in legalize()");
2479
2480 return From;
2481}
2482
2483// Provide a trivial wrapper to legalize() for this common usage.
2484Variable *TargetARM32::legalizeToVar(Operand *From, int32_t RegNum) {
2485 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2486}
2487
Jan Voungbefd03a2015-06-02 11:03:03 -07002488OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2489 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2490 // It may be the case that address mode optimization already creates
2491 // an OperandARM32Mem, so in that case it wouldn't need another level
2492 // of transformation.
2493 if (Mem) {
2494 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2495 }
2496 // If we didn't do address mode optimization, then we only
2497 // have a base/offset to work with. ARM always requires a base
2498 // register, so just use that to hold the operand.
2499 Variable *Base = legalizeToVar(Operand);
2500 return OperandARM32Mem::create(
2501 Func, Ty, Base,
2502 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2503}
2504
Jan Voungb3401d22015-05-18 09:38:21 -07002505Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2506 // There aren't any 64-bit integer registers for ARM32.
2507 assert(Type != IceType_i64);
2508 Variable *Reg = Func->makeVariable(Type);
2509 if (RegNum == Variable::NoRegister)
2510 Reg->setWeightInfinite();
2511 else
2512 Reg->setRegNum(RegNum);
2513 return Reg;
2514}
2515
Jan Voung55500db2015-05-26 14:25:40 -07002516void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2517 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002518 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07002519 uint32_t Immed_8;
2520 Operand *Mask;
2521 // Use AND or BIC to mask off the bits, depending on which immediate fits
2522 // (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002523 // works better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07002524 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
2525 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
2526 _bic(Reg, Reg, Mask);
2527 } else {
2528 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
2529 _and(Reg, Reg, Mask);
2530 }
2531}
2532
Jan Voungb36ad9b2015-04-21 17:01:49 -07002533void TargetARM32::postLower() {
2534 if (Ctx->getFlags().getOptLevel() == Opt_m1)
2535 return;
Jan Voungb3401d22015-05-18 09:38:21 -07002536 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002537}
2538
2539void TargetARM32::makeRandomRegisterPermutation(
2540 llvm::SmallVectorImpl<int32_t> &Permutation,
2541 const llvm::SmallBitVector &ExcludeRegisters) const {
2542 (void)Permutation;
2543 (void)ExcludeRegisters;
Jan Voungb2d50842015-05-12 09:53:50 -07002544 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002545}
2546
Jan Voung76bb0be2015-05-14 09:26:19 -07002547void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07002548 if (!BuildDefs::dump())
Jan Voung76bb0be2015-05-14 09:26:19 -07002549 return;
2550 Ostream &Str = Ctx->getStrEmit();
2551 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002552}
2553
Jan Voung76bb0be2015-05-14 09:26:19 -07002554void TargetARM32::emit(const ConstantInteger64 *) const {
2555 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07002556}
Jan Voung76bb0be2015-05-14 09:26:19 -07002557
2558void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002559 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002560 UnimplementedError(Ctx->getFlags());
2561}
2562
2563void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002564 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002565 UnimplementedError(Ctx->getFlags());
2566}
2567
2568void TargetARM32::emit(const ConstantUndef *) const {
2569 llvm::report_fatal_error("undef value encountered by emitter.");
2570}
Jan Voungb36ad9b2015-04-21 17:01:49 -07002571
2572TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
2573 : TargetDataLowering(Ctx) {}
2574
John Porto8b1a7052015-06-17 13:20:08 -07002575void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
2576 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002577 switch (Ctx->getFlags().getOutFileType()) {
2578 case FT_Elf: {
2579 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07002580 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002581 } break;
2582 case FT_Asm:
2583 case FT_Iasm: {
2584 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
2585 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07002586 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002587 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07002588 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002589 }
2590 }
2591 } break;
2592 }
2593}
2594
John Porto0f86d032015-06-15 07:44:27 -07002595void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002596 if (Ctx->getFlags().getDisableTranslation())
2597 return;
Jan Voungb2d50842015-05-12 09:53:50 -07002598 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002599}
2600
Jan Voungfb792842015-06-11 15:27:50 -07002601TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung6ec369e2015-06-30 11:03:15 -07002602 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voungfb792842015-06-11 15:27:50 -07002603
2604void TargetHeaderARM32::lower() {
2605 OstreamLocker L(Ctx);
2606 Ostream &Str = Ctx->getStrEmit();
2607 Str << ".syntax unified\n";
2608 // Emit build attributes in format: .eabi_attribute TAG, VALUE.
2609 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
2610 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
2611 //
2612 // Tag_conformance should be be emitted first in a file-scope
2613 // sub-subsection of the first public subsection of the attributes.
2614 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
2615 // Chromebooks are at least A15, but do A9 for higher compat.
Jan Voung6ec369e2015-06-30 11:03:15 -07002616 // For some reason, the LLVM ARM asm parser has the .cpu directive override
2617 // the mattr specified on the commandline. So to test hwdiv, we need to set
2618 // the .cpu directive higher (can't just rely on --mattr=...).
2619 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2620 Str << ".cpu cortex-a15\n";
2621 } else {
2622 Str << ".cpu cortex-a9\n";
2623 }
2624 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voungfb792842015-06-11 15:27:50 -07002625 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
2626 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
2627 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voungfb792842015-06-11 15:27:50 -07002628 Str << ".fpu neon\n"
2629 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
2630 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
2631 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
2632 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
2633 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
2634 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
2635 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
2636 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
2637 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
2638 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
2639 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
2640 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung6ec369e2015-06-30 11:03:15 -07002641 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2642 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
2643 }
Jan Voungfb792842015-06-11 15:27:50 -07002644 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2645 // However, for compatibility with current NaCl LLVM, don't claim that.
2646 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2647}
2648
Jan Voungb36ad9b2015-04-21 17:01:49 -07002649} // end of namespace Ice