blob: fef145f3b47dec98fea1db86b5da96d9b5ab4de3 [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringARM32 class, which consists almost
12/// entirely of the lowering sequence for each high-level instruction.
13///
Jan Voungb36ad9b2015-04-21 17:01:49 -070014//===----------------------------------------------------------------------===//
15
John Porto67f8de92015-06-25 10:14:17 -070016#include "IceTargetLoweringARM32.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070017
18#include "IceCfg.h"
19#include "IceCfgNode.h"
20#include "IceClFlags.h"
21#include "IceDefs.h"
22#include "IceELFObjectWriter.h"
23#include "IceGlobalInits.h"
24#include "IceInstARM32.h"
25#include "IceLiveness.h"
26#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070027#include "IcePhiLoweringImpl.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070028#include "IceRegistersARM32.h"
29#include "IceTargetLoweringARM32.def"
Jan Voungb36ad9b2015-04-21 17:01:49 -070030#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070031#include "llvm/Support/MathExtras.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070032
33namespace Ice {
34
Jan Voungb2d50842015-05-12 09:53:50 -070035namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070036
John Portoc31e2ed2015-09-11 05:17:08 -070037// UnimplementedError is defined as a macro so that we can get actual line
38// numbers.
39#define UnimplementedError(Flags) \
40 do { \
41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \
42 /* Use llvm_unreachable instead of report_fatal_error, which gives \
43 better stack traces. */ \
44 llvm_unreachable("Not yet implemented"); \
45 abort(); \
46 } \
47 } while (0)
Jan Voungb3401d22015-05-18 09:38:21 -070048
Jan Voung3bfd99a2015-05-22 16:35:25 -070049// The following table summarizes the logic for lowering the icmp instruction
Andrew Scull57e12682015-09-16 11:30:19 -070050// for i32 and narrower types. Each icmp condition has a clear mapping to an
Jan Voung3bfd99a2015-05-22 16:35:25 -070051// ARM32 conditional move instruction.
52
53const struct TableIcmp32_ {
54 CondARM32::Cond Mapping;
55} TableIcmp32[] = {
56#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
57 { CondARM32::C_32 } \
58 ,
59 ICMPARM32_TABLE
60#undef X
61};
Jan Voung3bfd99a2015-05-22 16:35:25 -070062
63// The following table summarizes the logic for lowering the icmp instruction
64// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
Andrew Scull57e12682015-09-16 11:30:19 -070065// The operands may need to be swapped, and there is a slight difference for
66// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
Jan Voung3bfd99a2015-05-22 16:35:25 -070067const struct TableIcmp64_ {
68 bool IsSigned;
69 bool Swapped;
70 CondARM32::Cond C1, C2;
71} TableIcmp64[] = {
72#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
74 ,
75 ICMPARM32_TABLE
76#undef X
77};
Jan Voung3bfd99a2015-05-22 16:35:25 -070078
79CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
80 size_t Index = static_cast<size_t>(Cond);
Andrew Scull2c688f62015-09-09 11:56:10 -070081 assert(Index < llvm::array_lengthof(TableIcmp32));
Jan Voung3bfd99a2015-05-22 16:35:25 -070082 return TableIcmp32[Index].Mapping;
83}
84
Andrew Scull57e12682015-09-16 11:30:19 -070085// In some cases, there are x-macros tables for both high-level and low-level
86// instructions/operands that use the same enum key value. The tables are kept
87// separate to maintain a proper separation between abstraction layers. There
88// is a risk that the tables could get out of sync if enum values are reordered
89// or if entries are added or deleted. The following dummy namespaces use
Jan Voung3bfd99a2015-05-22 16:35:25 -070090// static_asserts to ensure everything is kept in sync.
91
92// Validate the enum values in ICMPARM32_TABLE.
93namespace dummy1 {
Andrew Scull57e12682015-09-16 11:30:19 -070094// Define a temporary set of enum values based on low-level table entries.
Jan Voung3bfd99a2015-05-22 16:35:25 -070095enum _tmp_enum {
96#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
97 ICMPARM32_TABLE
98#undef X
99 _num
100};
101// Define a set of constants based on high-level table entries.
102#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
103ICEINSTICMP_TABLE
104#undef X
Andrew Scull57e12682015-09-16 11:30:19 -0700105// Define a set of constants based on low-level table entries, and ensure the
106// table entry keys are consistent.
Jan Voung3bfd99a2015-05-22 16:35:25 -0700107#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
108 static const int _table2_##val = _tmp_##val; \
109 static_assert( \
110 _table1_##val == _table2_##val, \
111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
112ICMPARM32_TABLE
113#undef X
Andrew Scull57e12682015-09-16 11:30:19 -0700114// Repeat the static asserts with respect to the high-level table entries in
115// case the high-level table has extra entries.
Jan Voung3bfd99a2015-05-22 16:35:25 -0700116#define X(tag, str) \
117 static_assert( \
118 _table1_##tag == _table2_##tag, \
119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
120ICEINSTICMP_TABLE
121#undef X
122} // end of namespace dummy1
123
Jan Voung55500db2015-05-26 14:25:40 -0700124// Stack alignment
125const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
126
Andrew Scull57e12682015-09-16 11:30:19 -0700127// Value is in bytes. Return Value adjusted to the next highest multiple of the
128// stack alignment.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700129uint32_t applyStackAlignment(uint32_t Value) {
130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
131}
132
Andrew Scull57e12682015-09-16 11:30:19 -0700133// Value is in bytes. Return Value adjusted to the next highest multiple of the
134// stack alignment required for the given type.
Jan Voungb0a8c242015-06-18 15:00:14 -0700135uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
Andrew Scull57e12682015-09-16 11:30:19 -0700136 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
137 // vectors to 8 bytes.
Jan Voungb0a8c242015-06-18 15:00:14 -0700138 // TODO(jvoung): Check this ...
139 size_t typeAlignInBytes = typeWidthInBytes(Ty);
140 if (isVectorType(Ty))
141 typeAlignInBytes = 8;
142 return Utils::applyAlignment(Value, typeAlignInBytes);
143}
144
Jan Voung6ec369e2015-06-30 11:03:15 -0700145// Conservatively check if at compile time we know that the operand is
146// definitely a non-zero integer.
147bool isGuaranteedNonzeroInt(const Operand *Op) {
148 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
149 return Const->getValue() != 0;
150 }
151 return false;
152}
153
Jan Voungb2d50842015-05-12 09:53:50 -0700154} // end of anonymous namespace
155
Jan Voung6ec369e2015-06-30 11:03:15 -0700156TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voungd062f732015-06-15 17:17:31 -0700157 static_assert(
158 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
159 (TargetInstructionSet::ARM32InstructionSet_End -
160 TargetInstructionSet::ARM32InstructionSet_Begin),
161 "ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung6ec369e2015-06-30 11:03:15 -0700162 if (Flags.getTargetInstructionSet() !=
Jan Voungd062f732015-06-15 17:17:31 -0700163 TargetInstructionSet::BaseInstructionSet) {
164 InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung6ec369e2015-06-30 11:03:15 -0700165 (Flags.getTargetInstructionSet() -
Jan Voungd062f732015-06-15 17:17:31 -0700166 TargetInstructionSet::ARM32InstructionSet_Begin) +
167 ARM32InstructionSet::Begin);
168 }
Jan Voung6ec369e2015-06-30 11:03:15 -0700169}
170
171TargetARM32::TargetARM32(Cfg *Func)
172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Andrew Scull57e12682015-09-16 11:30:19 -0700173 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
174 // initialize in some sort of static initializer for the class.
Jan Voung86ebec12015-08-09 07:58:35 -0700175 // Limit this size (or do all bitsets need to be the same width)???
Jan Voungb36ad9b2015-04-21 17:01:49 -0700176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
Jan Voung86ebec12015-08-09 07:58:35 -0700177 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
178 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
Jan Voungb36ad9b2015-04-21 17:01:49 -0700179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
181 ScratchRegs.resize(RegARM32::Reg_NUM);
182#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -0700183 isFP32, isFP64, isVec128, alias_init) \
Jan Voungb36ad9b2015-04-21 17:01:49 -0700184 IntegerRegisters[RegARM32::val] = isInt; \
Jan Voung86ebec12015-08-09 07:58:35 -0700185 Float32Registers[RegARM32::val] = isFP32; \
186 Float64Registers[RegARM32::val] = isFP64; \
187 VectorRegisters[RegARM32::val] = isVec128; \
John Portobb0a5fe2015-09-04 11:23:41 -0700188 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
John Porto5300bfe2015-09-08 09:03:22 -0700189 for (SizeT RegAlias : alias_init) { \
190 assert(!RegisterAliases[RegARM32::val][RegAlias] && \
191 "Duplicate alias for " #val); \
192 RegisterAliases[RegARM32::val].set(RegAlias); \
193 } \
194 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
195 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
Jan Voungb36ad9b2015-04-21 17:01:49 -0700196 ScratchRegs[RegARM32::val] = scratch;
197 REGARM32_TABLE;
198#undef X
199 TypeToRegisterSet[IceType_void] = InvalidRegisters;
200 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
201 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
202 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
203 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
204 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
Jan Voung86ebec12015-08-09 07:58:35 -0700205 TypeToRegisterSet[IceType_f32] = Float32Registers;
206 TypeToRegisterSet[IceType_f64] = Float64Registers;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700207 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
208 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
209 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
210 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
211 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
212 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
213 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
214}
215
216void TargetARM32::translateO2() {
217 TimerMarker T(TimerStack::TT_O2, Func);
218
219 // TODO(stichnot): share passes with X86?
220 // https://code.google.com/p/nativeclient/issues/detail?id=4094
221
222 if (!Ctx->getFlags().getPhiEdgeSplit()) {
223 // Lower Phi instructions.
224 Func->placePhiLoads();
225 if (Func->hasError())
226 return;
227 Func->placePhiStores();
228 if (Func->hasError())
229 return;
230 Func->deletePhis();
231 if (Func->hasError())
232 return;
233 Func->dump("After Phi lowering");
234 }
235
236 // Address mode optimization.
237 Func->getVMetadata()->init(VMK_SingleDefs);
238 Func->doAddressOpt();
239
240 // Argument lowering
241 Func->doArgLowering();
242
Andrew Scull57e12682015-09-16 11:30:19 -0700243 // Target lowering. This requires liveness analysis for some parts of the
244 // lowering decisions, such as compare/branch fusing. If non-lightweight
245 // liveness analysis is used, the instructions need to be renumbered first.
246 // TODO: This renumbering should only be necessary if we're actually
247 // calculating live intervals, which we only do for register allocation.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700248 Func->renumberInstructions();
249 if (Func->hasError())
250 return;
251
Andrew Scull57e12682015-09-16 11:30:19 -0700252 // TODO: It should be sufficient to use the fastest liveness calculation,
253 // i.e. livenessLightweight(). However, for some reason that slows down the
254 // rest of the translation. Investigate.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700255 Func->liveness(Liveness_Basic);
256 if (Func->hasError())
257 return;
258 Func->dump("After ARM32 address mode opt");
259
260 Func->genCode();
261 if (Func->hasError())
262 return;
263 Func->dump("After ARM32 codegen");
264
Andrew Scull57e12682015-09-16 11:30:19 -0700265 // Register allocation. This requires instruction renumbering and full
266 // liveness analysis.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700267 Func->renumberInstructions();
268 if (Func->hasError())
269 return;
270 Func->liveness(Liveness_Intervals);
271 if (Func->hasError())
272 return;
Andrew Scull57e12682015-09-16 11:30:19 -0700273 // Validate the live range computations. The expensive validation call is
274 // deliberately only made when assertions are enabled.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700275 assert(Func->validateLiveness());
Andrew Scull57e12682015-09-16 11:30:19 -0700276 // The post-codegen dump is done here, after liveness analysis and associated
277 // cleanup, to make the dump cleaner and more useful.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700278 Func->dump("After initial ARM32 codegen");
279 Func->getVMetadata()->init(VMK_All);
280 regAlloc(RAK_Global);
281 if (Func->hasError())
282 return;
283 Func->dump("After linear scan regalloc");
284
285 if (Ctx->getFlags().getPhiEdgeSplit()) {
286 Func->advancedPhiLowering();
287 Func->dump("After advanced Phi lowering");
288 }
289
290 // Stack frame mapping.
291 Func->genFrame();
292 if (Func->hasError())
293 return;
294 Func->dump("After stack frame mapping");
295
Jan Voung28068ad2015-07-31 12:58:46 -0700296 legalizeStackSlots();
297 if (Func->hasError())
298 return;
299 Func->dump("After legalizeStackSlots");
300
Jan Voungb36ad9b2015-04-21 17:01:49 -0700301 Func->contractEmptyNodes();
302 Func->reorderNodes();
303
Andrew Scull57e12682015-09-16 11:30:19 -0700304 // Branch optimization. This needs to be done just before code emission. In
305 // particular, no transformations that insert or reorder CfgNodes should be
306 // done after branch optimization. We go ahead and do it before nop insertion
307 // to reduce the amount of work needed for searching for opportunities.
Jan Voungb36ad9b2015-04-21 17:01:49 -0700308 Func->doBranchOpt();
309 Func->dump("After branch optimization");
310
311 // Nop insertion
312 if (Ctx->getFlags().shouldDoNopInsertion()) {
313 Func->doNopInsertion();
314 }
315}
316
317void TargetARM32::translateOm1() {
318 TimerMarker T(TimerStack::TT_Om1, Func);
319
320 // TODO: share passes with X86?
321
322 Func->placePhiLoads();
323 if (Func->hasError())
324 return;
325 Func->placePhiStores();
326 if (Func->hasError())
327 return;
328 Func->deletePhis();
329 if (Func->hasError())
330 return;
331 Func->dump("After Phi lowering");
332
333 Func->doArgLowering();
334
335 Func->genCode();
336 if (Func->hasError())
337 return;
338 Func->dump("After initial ARM32 codegen");
339
340 regAlloc(RAK_InfOnly);
341 if (Func->hasError())
342 return;
343 Func->dump("After regalloc of infinite-weight variables");
344
345 Func->genFrame();
346 if (Func->hasError())
347 return;
348 Func->dump("After stack frame mapping");
349
Jan Voung28068ad2015-07-31 12:58:46 -0700350 legalizeStackSlots();
351 if (Func->hasError())
352 return;
353 Func->dump("After legalizeStackSlots");
354
Jan Voungb36ad9b2015-04-21 17:01:49 -0700355 // Nop insertion
356 if (Ctx->getFlags().shouldDoNopInsertion()) {
357 Func->doNopInsertion();
358 }
359}
360
361bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700362 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
363 return Br->optimizeBranch(NextNode);
364 }
Jan Voungb2d50842015-05-12 09:53:50 -0700365 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700366}
367
Jan Voungb36ad9b2015-04-21 17:01:49 -0700368IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
369 assert(RegNum < RegARM32::Reg_NUM);
370 (void)Ty;
Jan Voung0dab0322015-07-21 14:29:34 -0700371 static const char *RegNames[] = {
372#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -0700373 isFP32, isFP64, isVec128, alias_init) \
Jan Voung0dab0322015-07-21 14:29:34 -0700374 name,
375 REGARM32_TABLE
376#undef X
377 };
378
Jan Voungb36ad9b2015-04-21 17:01:49 -0700379 return RegNames[RegNum];
380}
381
382Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
383 if (Ty == IceType_void)
384 Ty = IceType_i32;
385 if (PhysicalRegisters[Ty].empty())
386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
387 assert(RegNum < PhysicalRegisters[Ty].size());
388 Variable *Reg = PhysicalRegisters[Ty][RegNum];
389 if (Reg == nullptr) {
390 Reg = Func->makeVariable(Ty);
391 Reg->setRegNum(RegNum);
392 PhysicalRegisters[Ty][RegNum] = Reg;
Andrew Scull57e12682015-09-16 11:30:19 -0700393 // Specially mark SP and LR as an "argument" so that it is considered live
394 // upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700395 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700396 Func->addImplicitArg(Reg);
397 Reg->setIgnoreLiveness();
398 }
399 }
400 return Reg;
401}
402
Andrew Scull86df4e92015-07-30 13:54:44 -0700403void TargetARM32::emitJumpTable(const Cfg *Func,
404 const InstJumpTable *JumpTable) const {
405 (void)JumpTable;
406 UnimplementedError(Func->getContext()->getFlags());
407}
408
Jan Voungb36ad9b2015-04-21 17:01:49 -0700409void TargetARM32::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700410 if (!BuildDefs::dump())
411 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700412 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700413 if (Var->hasReg()) {
414 Str << getRegName(Var->getRegNum(), Var->getType());
415 return;
416 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700417 if (Var->mustHaveReg()) {
Jan Voungb2d50842015-05-12 09:53:50 -0700418 llvm::report_fatal_error(
419 "Infinite-weight Variable has no register assigned");
420 }
421 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700422 int32_t BaseRegNum = Var->getBaseRegNum();
423 if (BaseRegNum == Variable::NoRegister) {
424 BaseRegNum = getFrameOrStackReg();
425 if (!hasFramePointer())
426 Offset += getStackAdjustment();
427 }
428 if (!isLegalVariableStackOffset(Offset)) {
Jan Voungb2d50842015-05-12 09:53:50 -0700429 llvm::report_fatal_error("Illegal stack offset");
430 }
Jan Voung28068ad2015-07-31 12:58:46 -0700431 const Type FrameSPTy = stackSlotType();
432 Str << "[" << getRegName(BaseRegNum, FrameSPTy);
Jan Voungb3401d22015-05-18 09:38:21 -0700433 if (Offset != 0) {
434 Str << ", " << getConstantPrefix() << Offset;
435 }
436 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700437}
438
Jan Voungb0a8c242015-06-18 15:00:14 -0700439bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
441 return false;
442 int32_t RegLo, RegHi;
Andrew Scull57e12682015-09-16 11:30:19 -0700443 // Always start i64 registers at an even register, so this may end up padding
444 // away a register.
Jan Voung86ebec12015-08-09 07:58:35 -0700445 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
Jan Voungb0a8c242015-06-18 15:00:14 -0700446 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
447 ++NumGPRRegsUsed;
448 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
449 ++NumGPRRegsUsed;
Andrew Scull57e12682015-09-16 11:30:19 -0700450 // If this bumps us past the boundary, don't allocate to a register and leave
451 // any previously speculatively consumed registers as consumed.
Jan Voungb0a8c242015-06-18 15:00:14 -0700452 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
453 return false;
454 Regs->first = RegLo;
455 Regs->second = RegHi;
456 return true;
457}
458
459bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
460 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
461 return false;
462 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
463 ++NumGPRRegsUsed;
464 return true;
465}
466
Jan Voung86ebec12015-08-09 07:58:35 -0700467bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
468 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
469 return false;
470 if (isVectorType(Ty)) {
471 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
Andrew Scull57e12682015-09-16 11:30:19 -0700472 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
473 // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
474 // Reg_q0. Same thing goes for D registers.
John Porto5300bfe2015-09-08 09:03:22 -0700475 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
476 "ARM32 Q registers are possibly declared incorrectly.");
477 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
Jan Voung86ebec12015-08-09 07:58:35 -0700478 NumFPRegUnits += 4;
Andrew Scull57e12682015-09-16 11:30:19 -0700479 // If this bumps us past the boundary, don't allocate to a register and
480 // leave any previously speculatively consumed registers as consumed.
Jan Voung86ebec12015-08-09 07:58:35 -0700481 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
482 return false;
483 } else if (Ty == IceType_f64) {
John Porto5300bfe2015-09-08 09:03:22 -0700484 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
485 "ARM32 D registers are possibly declared incorrectly.");
Jan Voung86ebec12015-08-09 07:58:35 -0700486 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
John Porto5300bfe2015-09-08 09:03:22 -0700487 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
Jan Voung86ebec12015-08-09 07:58:35 -0700488 NumFPRegUnits += 2;
Andrew Scull57e12682015-09-16 11:30:19 -0700489 // If this bumps us past the boundary, don't allocate to a register and
490 // leave any previously speculatively consumed registers as consumed.
Jan Voung86ebec12015-08-09 07:58:35 -0700491 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
492 return false;
493 } else {
John Porto5300bfe2015-09-08 09:03:22 -0700494 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
495 "ARM32 S registers are possibly declared incorrectly.");
Jan Voung86ebec12015-08-09 07:58:35 -0700496 assert(Ty == IceType_f32);
497 *Reg = RegARM32::Reg_s0 + NumFPRegUnits;
498 ++NumFPRegUnits;
499 }
500 return true;
501}
502
Jan Voungb36ad9b2015-04-21 17:01:49 -0700503void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700504 VarList &Args = Func->getArgs();
Jan Voungb0a8c242015-06-18 15:00:14 -0700505 TargetARM32::CallingConv CC;
Jan Voungb3401d22015-05-18 09:38:21 -0700506
Andrew Scull57e12682015-09-16 11:30:19 -0700507 // For each register argument, replace Arg in the argument list with the home
508 // register. Then generate an instruction in the prolog to copy the home
509 // register to the assigned location of Arg.
Jan Voungb3401d22015-05-18 09:38:21 -0700510 Context.init(Func->getEntryNode());
511 Context.setInsertPoint(Context.getCur());
512
513 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
514 Variable *Arg = Args[I];
515 Type Ty = Arg->getType();
Jan Voung86ebec12015-08-09 07:58:35 -0700516 if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -0700517 std::pair<int32_t, int32_t> RegPair;
518 if (!CC.I64InRegs(&RegPair))
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700519 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700520 Variable *RegisterArg = Func->makeVariable(Ty);
521 Variable *RegisterLo = Func->makeVariable(IceType_i32);
522 Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700523 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700524 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
525 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
526 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
527 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700528 RegisterLo->setRegNum(RegPair.first);
Jan Voungb3401d22015-05-18 09:38:21 -0700529 RegisterLo->setIsArg();
Jan Voungb0a8c242015-06-18 15:00:14 -0700530 RegisterHi->setRegNum(RegPair.second);
Jan Voungb3401d22015-05-18 09:38:21 -0700531 RegisterHi->setIsArg();
532 RegisterArg->setLoHi(RegisterLo, RegisterHi);
533 RegisterArg->setIsArg();
534 Arg->setIsArg(false);
535
536 Args[I] = RegisterArg;
537 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
538 continue;
539 } else {
Jan Voungb0a8c242015-06-18 15:00:14 -0700540 int32_t RegNum;
Jan Voung86ebec12015-08-09 07:58:35 -0700541 if (isVectorType(Ty) || isFloatingType(Ty)) {
542 if (!CC.FPInReg(Ty, &RegNum))
543 continue;
544 } else {
545 assert(Ty == IceType_i32);
546 if (!CC.I32InReg(&RegNum))
547 continue;
548 }
Jan Voungb3401d22015-05-18 09:38:21 -0700549 Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700550 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700551 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
552 }
553 RegisterArg->setRegNum(RegNum);
554 RegisterArg->setIsArg();
555 Arg->setIsArg(false);
556
557 Args[I] = RegisterArg;
558 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
Jan Voung86ebec12015-08-09 07:58:35 -0700559 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700560 }
561 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700562}
563
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700564// Helper function for addProlog().
565//
Andrew Scull57e12682015-09-16 11:30:19 -0700566// This assumes Arg is an argument passed on the stack. This sets the frame
567// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
568// I64 arg that has been split into Lo and Hi components, it calls itself
569// recursively on the components, taking care to handle Lo first because of the
570// little-endian architecture. Lastly, this function generates an instruction
571// to copy Arg into its assigned register if applicable.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700572void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
573 size_t BasicFrameOffset,
574 size_t &InArgsSizeBytes) {
575 Variable *Lo = Arg->getLo();
576 Variable *Hi = Arg->getHi();
577 Type Ty = Arg->getType();
578 if (Lo && Hi && Ty == IceType_i64) {
579 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
580 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
581 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
582 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
583 return;
584 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700585 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700586 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
587 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
Andrew Scull57e12682015-09-16 11:30:19 -0700588 // If the argument variable has been assigned a register, we need to load the
589 // value from the stack slot.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700590 if (Arg->hasReg()) {
591 assert(Ty != IceType_i64);
592 OperandARM32Mem *Mem = OperandARM32Mem::create(
593 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
594 Ctx->getConstantInt32(Arg->getStackOffset())));
595 if (isVectorType(Arg->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -0700596 // Use vld1.$elem or something?
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700597 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -0700598 } else if (isFloatingType(Arg->getType())) {
599 _vldr(Arg, Mem);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700600 } else {
601 _ldr(Arg, Mem);
602 }
Andrew Scull57e12682015-09-16 11:30:19 -0700603 // This argument-copying instruction uses an explicit OperandARM32Mem
604 // operand instead of a Variable, so its fill-from-stack operation has to
605 // be tracked separately for statistics.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700606 Ctx->statsUpdateFills();
607 }
608}
609
Jan Voungb36ad9b2015-04-21 17:01:49 -0700610Type TargetARM32::stackSlotType() { return IceType_i32; }
611
612void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700613 // Stack frame layout:
614 //
615 // +------------------------+
616 // | 1. preserved registers |
617 // +------------------------+
618 // | 2. padding |
Jan Voung28068ad2015-07-31 12:58:46 -0700619 // +------------------------+ <--- FramePointer (if used)
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700620 // | 3. global spill area |
621 // +------------------------+
622 // | 4. padding |
623 // +------------------------+
624 // | 5. local spill area |
625 // +------------------------+
626 // | 6. padding |
627 // +------------------------+
628 // | 7. allocas |
Jan Voung28068ad2015-07-31 12:58:46 -0700629 // +------------------------+ <--- StackPointer
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700630 //
631 // The following variables record the size in bytes of the given areas:
632 // * PreservedRegsSizeBytes: area 1
633 // * SpillAreaPaddingBytes: area 2
634 // * GlobalsSize: area 3
635 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
636 // * LocalsSpillAreaSize: area 5
637 // * SpillAreaSizeBytes: areas 2 - 6
Andrew Scull57e12682015-09-16 11:30:19 -0700638 // Determine stack frame offsets for each Variable without a register
639 // assignment. This can be done as one variable per stack slot. Or, do
640 // coalescing by running the register allocator again with an infinite set of
641 // registers (as a side effect, this gives variables a second chance at
642 // physical register assignment).
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700643 //
Andrew Scull57e12682015-09-16 11:30:19 -0700644 // A middle ground approach is to leverage sparsity and allocate one block of
645 // space on the frame for globals (variables with multi-block lifetime), and
646 // one block to share for locals (single-block lifetime).
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700647
648 Context.init(Node);
649 Context.setInsertPoint(Context.getCur());
650
651 llvm::SmallBitVector CalleeSaves =
652 getRegisterSet(RegSet_CalleeSave, RegSet_None);
653 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
654 VarList SortedSpilledVariables;
655 size_t GlobalsSize = 0;
Andrew Scull57e12682015-09-16 11:30:19 -0700656 // If there is a separate locals area, this represents that area. Otherwise
657 // it counts any variable not counted by GlobalsSize.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700658 SpillAreaSizeBytes = 0;
Andrew Scull57e12682015-09-16 11:30:19 -0700659 // If there is a separate locals area, this specifies the alignment for it.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700660 uint32_t LocalsSlotsAlignmentBytes = 0;
Andrew Scull57e12682015-09-16 11:30:19 -0700661 // The entire spill locations area gets aligned to largest natural alignment
662 // of the variables that have a spill slot.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700663 uint32_t SpillAreaAlignmentBytes = 0;
664 // For now, we don't have target-specific variables that need special
665 // treatment (no stack-slot-linked SpillVariable type).
666 std::function<bool(Variable *)> TargetVarHook =
667 [](Variable *) { return false; };
668
669 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
670 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
671 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
672 &LocalsSlotsAlignmentBytes, TargetVarHook);
673 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
674 SpillAreaSizeBytes += GlobalsSize;
675
Andrew Scull57e12682015-09-16 11:30:19 -0700676 // Add push instructions for preserved registers. On ARM, "push" can push a
677 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
678 // callee-saved float/vector registers. The "vpush" instruction can handle a
679 // whole list of float/vector registers, but it only handles contiguous
680 // sequences of registers by specifying the start and the length.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700681 VarList GPRsToPreserve;
682 GPRsToPreserve.reserve(CalleeSaves.size());
683 uint32_t NumCallee = 0;
684 size_t PreservedRegsSizeBytes = 0;
685 // Consider FP and LR as callee-save / used as needed.
686 if (UsesFramePointer) {
687 CalleeSaves[RegARM32::Reg_fp] = true;
688 assert(RegsUsed[RegARM32::Reg_fp] == false);
689 RegsUsed[RegARM32::Reg_fp] = true;
690 }
691 if (!MaybeLeafFunc) {
692 CalleeSaves[RegARM32::Reg_lr] = true;
693 RegsUsed[RegARM32::Reg_lr] = true;
694 }
695 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
696 if (CalleeSaves[i] && RegsUsed[i]) {
Andrew Scull57e12682015-09-16 11:30:19 -0700697 // TODO(jvoung): do separate vpush for each floating point register
698 // segment and += 4, or 8 depending on type.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700699 ++NumCallee;
700 PreservedRegsSizeBytes += 4;
701 GPRsToPreserve.push_back(getPhysicalRegister(i));
702 }
703 }
704 Ctx->statsUpdateRegistersSaved(NumCallee);
705 if (!GPRsToPreserve.empty())
706 _push(GPRsToPreserve);
707
708 // Generate "mov FP, SP" if needed.
709 if (UsesFramePointer) {
710 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
711 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
712 _mov(FP, SP);
713 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
714 Context.insert(InstFakeUse::create(Func, FP));
715 }
716
Andrew Scull57e12682015-09-16 11:30:19 -0700717 // Align the variables area. SpillAreaPaddingBytes is the size of the region
718 // after the preserved registers and before the spill areas.
719 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
720 // locals area if they are separate.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700721 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
722 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
723 uint32_t SpillAreaPaddingBytes = 0;
724 uint32_t LocalsSlotsPaddingBytes = 0;
725 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
726 GlobalsSize, LocalsSlotsAlignmentBytes,
727 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
728 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
729 uint32_t GlobalsAndSubsequentPaddingSize =
730 GlobalsSize + LocalsSlotsPaddingBytes;
731
732 // Align SP if necessary.
733 if (NeedsStackAlignment) {
734 uint32_t StackOffset = PreservedRegsSizeBytes;
735 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
736 SpillAreaSizeBytes = StackSize - StackOffset;
737 }
738
739 // Generate "sub sp, SpillAreaSizeBytes"
740 if (SpillAreaSizeBytes) {
Jan Voung28068ad2015-07-31 12:58:46 -0700741 // Use the scratch register if needed to legalize the immediate.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700742 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
Jan Voung28068ad2015-07-31 12:58:46 -0700743 Legal_Reg | Legal_Flex, getReservedTmpReg());
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
745 _sub(SP, SP, SubAmount);
746 }
747 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
748
749 resetStackAdjustment();
750
Andrew Scull57e12682015-09-16 11:30:19 -0700751 // Fill in stack offsets for stack args, and copy args into registers for
752 // those that were register-allocated. Args are pushed right to left, so
753 // Arg[0] is closest to the stack/frame pointer.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700754 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
755 size_t BasicFrameOffset = PreservedRegsSizeBytes;
756 if (!UsesFramePointer)
757 BasicFrameOffset += SpillAreaSizeBytes;
758
759 const VarList &Args = Func->getArgs();
760 size_t InArgsSizeBytes = 0;
Jan Voungb0a8c242015-06-18 15:00:14 -0700761 TargetARM32::CallingConv CC;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700762 for (Variable *Arg : Args) {
763 Type Ty = Arg->getType();
Jan Voungb0a8c242015-06-18 15:00:14 -0700764 bool InRegs = false;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700765 // Skip arguments passed in registers.
Jan Voung86ebec12015-08-09 07:58:35 -0700766 if (isVectorType(Ty) || isFloatingType(Ty)) {
767 int32_t DummyReg;
768 InRegs = CC.FPInReg(Ty, &DummyReg);
Jan Voungb0a8c242015-06-18 15:00:14 -0700769 } else if (Ty == IceType_i64) {
770 std::pair<int32_t, int32_t> DummyRegs;
771 InRegs = CC.I64InRegs(&DummyRegs);
772 } else {
773 assert(Ty == IceType_i32);
774 int32_t DummyReg;
775 InRegs = CC.I32InReg(&DummyReg);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700776 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700777 if (!InRegs)
778 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700779 }
780
781 // Fill in stack offsets for locals.
782 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
783 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
784 UsesFramePointer);
785 this->HasComputedFrame = true;
786
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700787 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700788 OstreamLocker L(Func->getContext());
789 Ostream &Str = Func->getContext()->getStrDump();
790
791 Str << "Stack layout:\n";
792 uint32_t SPAdjustmentPaddingSize =
793 SpillAreaSizeBytes - LocalsSpillAreaSize -
794 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
795 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
796 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
797 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
798 << " globals spill area = " << GlobalsSize << " bytes\n"
799 << " globals-locals spill areas intermediate padding = "
800 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
801 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
802 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
803
804 Str << "Stack details:\n"
805 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
806 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
807 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
808 << " bytes\n"
809 << " is FP based = " << UsesFramePointer << "\n";
810 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700811}
812
813void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700814 InstList &Insts = Node->getInsts();
815 InstList::reverse_iterator RI, E;
816 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
817 if (llvm::isa<InstARM32Ret>(*RI))
818 break;
819 }
820 if (RI == E)
821 return;
822
Andrew Scull57e12682015-09-16 11:30:19 -0700823 // Convert the reverse_iterator position into its corresponding (forward)
824 // iterator position.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700825 InstList::iterator InsertPoint = RI.base();
826 --InsertPoint;
827 Context.init(Node);
828 Context.setInsertPoint(InsertPoint);
829
830 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
831 if (UsesFramePointer) {
832 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
Andrew Scull57e12682015-09-16 11:30:19 -0700833 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
834 // use of SP before the assignment of SP=FP keeps previous SP adjustments
835 // from being dead-code eliminated.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700836 Context.insert(InstFakeUse::create(Func, SP));
837 _mov(SP, FP);
838 } else {
839 // add SP, SpillAreaSizeBytes
840 if (SpillAreaSizeBytes) {
Jan Voung28068ad2015-07-31 12:58:46 -0700841 // Use the scratch register if needed to legalize the immediate.
842 Operand *AddAmount =
843 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
844 Legal_Reg | Legal_Flex, getReservedTmpReg());
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700845 _add(SP, SP, AddAmount);
846 }
847 }
848
849 // Add pop instructions for preserved registers.
850 llvm::SmallBitVector CalleeSaves =
851 getRegisterSet(RegSet_CalleeSave, RegSet_None);
852 VarList GPRsToRestore;
853 GPRsToRestore.reserve(CalleeSaves.size());
854 // Consider FP and LR as callee-save / used as needed.
855 if (UsesFramePointer) {
856 CalleeSaves[RegARM32::Reg_fp] = true;
857 }
858 if (!MaybeLeafFunc) {
859 CalleeSaves[RegARM32::Reg_lr] = true;
860 }
Andrew Scull57e12682015-09-16 11:30:19 -0700861 // Pop registers in ascending order just like push (instead of in reverse
862 // order).
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700863 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
864 if (CalleeSaves[i] && RegsUsed[i]) {
865 GPRsToRestore.push_back(getPhysicalRegister(i));
866 }
867 }
868 if (!GPRsToRestore.empty())
869 _pop(GPRsToRestore);
870
871 if (!Ctx->getFlags().getUseSandboxing())
872 return;
873
874 // Change the original ret instruction into a sandboxed return sequence.
875 // bundle_lock
876 // bic lr, #0xc000000f
877 // bx lr
878 // bundle_unlock
879 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
880 // restrict to the lower 1GB as well.
881 Operand *RetMask =
882 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
883 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
884 Variable *RetValue = nullptr;
885 if (RI->getSrcSize())
886 RetValue = llvm::cast<Variable>(RI->getSrc(0));
887 _bundle_lock();
888 _bic(LR, LR, RetMask);
889 _ret(LR, RetValue);
890 _bundle_unlock();
891 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700892}
893
Jan Voung28068ad2015-07-31 12:58:46 -0700894bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
895 constexpr bool SignExt = false;
Andrew Scull57e12682015-09-16 11:30:19 -0700896 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
897 // stackSlotType().
Jan Voung28068ad2015-07-31 12:58:46 -0700898 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
899}
900
901StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
902 Variable *OrigBaseReg) {
903 int32_t Offset = Var->getStackOffset();
Andrew Scull57e12682015-09-16 11:30:19 -0700904 // Legalize will likely need a movw/movt combination, but if the top bits are
905 // all 0 from negating the offset and subtracting, we could use that instead.
Jan Voung28068ad2015-07-31 12:58:46 -0700906 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
907 if (ShouldSub)
908 Offset = -Offset;
909 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
910 Legal_Reg | Legal_Flex, getReservedTmpReg());
911 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
912 if (ShouldSub)
913 _sub(ScratchReg, OrigBaseReg, OffsetVal);
914 else
915 _add(ScratchReg, OrigBaseReg, OffsetVal);
916 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -0700917 NewVar->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -0700918 NewVar->setBaseRegNum(ScratchReg->getRegNum());
919 constexpr int32_t NewOffset = 0;
920 NewVar->setStackOffset(NewOffset);
921 return NewVar;
922}
923
924void TargetARM32::legalizeStackSlots() {
925 // If a stack variable's frame offset doesn't fit, convert from:
926 // ldr X, OFF[SP]
927 // to:
928 // movw/movt TMP, OFF_PART
929 // add TMP, TMP, SP
930 // ldr X, OFF_MORE[TMP]
931 //
932 // This is safe because we have reserved TMP, and add for ARM does not
933 // clobber the flags register.
934 Func->dump("Before legalizeStackSlots");
935 assert(hasComputedFrame());
936 // Early exit, if SpillAreaSizeBytes is really small.
937 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
938 return;
939 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
940 int32_t StackAdjust = 0;
Andrew Scull57e12682015-09-16 11:30:19 -0700941 // Do a fairly naive greedy clustering for now. Pick the first stack slot
Jan Voung28068ad2015-07-31 12:58:46 -0700942 // that's out of bounds and make a new base reg using the architecture's temp
Andrew Scull57e12682015-09-16 11:30:19 -0700943 // register. If that works for the next slot, then great. Otherwise, create a
944 // new base register, clobbering the previous base register. Never share a
945 // base reg across different basic blocks. This isn't ideal if local and
Jan Voung28068ad2015-07-31 12:58:46 -0700946 // multi-block variables are far apart and their references are interspersed.
Andrew Scull57e12682015-09-16 11:30:19 -0700947 // It may help to be more coordinated about assign stack slot numbers and may
948 // help to assign smaller offsets to higher-weight variables so that they
949 // don't depend on this legalization.
Jan Voung28068ad2015-07-31 12:58:46 -0700950 for (CfgNode *Node : Func->getNodes()) {
951 Context.init(Node);
952 StackVariable *NewBaseReg = nullptr;
953 int32_t NewBaseOffset = 0;
954 while (!Context.atEnd()) {
955 PostIncrLoweringContext PostIncrement(Context);
956 Inst *CurInstr = Context.getCur();
957 Variable *Dest = CurInstr->getDest();
958 // Check if the previous NewBaseReg is clobbered, and reset if needed.
959 if ((Dest && NewBaseReg && Dest->hasReg() &&
960 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
961 llvm::isa<InstFakeKill>(CurInstr)) {
962 NewBaseReg = nullptr;
963 NewBaseOffset = 0;
964 }
965 // The stack adjustment only matters if we are using SP instead of FP.
966 if (!hasFramePointer()) {
967 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
968 StackAdjust += AdjInst->getAmount();
969 NewBaseOffset += AdjInst->getAmount();
970 continue;
971 }
972 if (llvm::isa<InstARM32Call>(CurInstr)) {
973 NewBaseOffset -= StackAdjust;
974 StackAdjust = 0;
975 continue;
976 }
977 }
Andrew Scull57e12682015-09-16 11:30:19 -0700978 // For now, only Mov instructions can have stack variables. We need to
Jan Voung28068ad2015-07-31 12:58:46 -0700979 // know the type of instruction because we currently create a fresh one
980 // to replace Dest/Source, rather than mutate in place.
981 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
982 if (!MovInst) {
983 continue;
984 }
985 if (!Dest->hasReg()) {
986 int32_t Offset = Dest->getStackOffset();
987 Offset += StackAdjust;
988 if (!isLegalVariableStackOffset(Offset)) {
989 if (NewBaseReg) {
990 int32_t OffsetDiff = Offset - NewBaseOffset;
991 if (isLegalVariableStackOffset(OffsetDiff)) {
992 StackVariable *NewDest =
993 Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -0700994 NewDest->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -0700995 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
996 NewDest->setStackOffset(OffsetDiff);
997 Variable *NewDestVar = NewDest;
998 _mov(NewDestVar, MovInst->getSrc(0));
999 MovInst->setDeleted();
1000 continue;
1001 }
1002 }
1003 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg);
1004 assert(LegalDest != Dest);
1005 Variable *LegalDestVar = LegalDest;
1006 _mov(LegalDestVar, MovInst->getSrc(0));
1007 MovInst->setDeleted();
1008 NewBaseReg = LegalDest;
1009 NewBaseOffset = Offset;
1010 continue;
1011 }
1012 }
1013 assert(MovInst->getSrcSize() == 1);
1014 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0));
1015 if (Var && !Var->hasReg()) {
1016 int32_t Offset = Var->getStackOffset();
1017 Offset += StackAdjust;
1018 if (!isLegalVariableStackOffset(Offset)) {
1019 if (NewBaseReg) {
1020 int32_t OffsetDiff = Offset - NewBaseOffset;
1021 if (isLegalVariableStackOffset(OffsetDiff)) {
1022 StackVariable *NewVar =
1023 Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -07001024 NewVar->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -07001025 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
1026 NewVar->setStackOffset(OffsetDiff);
1027 _mov(Dest, NewVar);
1028 MovInst->setDeleted();
1029 continue;
1030 }
1031 }
1032 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg);
1033 assert(LegalVar != Var);
1034 _mov(Dest, LegalVar);
1035 MovInst->setDeleted();
1036 NewBaseReg = LegalVar;
1037 NewBaseOffset = Offset;
1038 continue;
1039 }
1040 }
1041 }
1042 }
1043}
1044
Jan Voungb3401d22015-05-18 09:38:21 -07001045void TargetARM32::split64(Variable *Var) {
1046 assert(Var->getType() == IceType_i64);
1047 Variable *Lo = Var->getLo();
1048 Variable *Hi = Var->getHi();
1049 if (Lo) {
1050 assert(Hi);
1051 return;
1052 }
1053 assert(Hi == nullptr);
1054 Lo = Func->makeVariable(IceType_i32);
1055 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001056 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -07001057 Lo->setName(Func, Var->getName(Func) + "__lo");
1058 Hi->setName(Func, Var->getName(Func) + "__hi");
1059 }
1060 Var->setLoHi(Lo, Hi);
1061 if (Var->getIsArg()) {
1062 Lo->setIsArg();
1063 Hi->setIsArg();
1064 }
1065}
1066
1067Operand *TargetARM32::loOperand(Operand *Operand) {
1068 assert(Operand->getType() == IceType_i64);
1069 if (Operand->getType() != IceType_i64)
1070 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001071 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001072 split64(Var);
1073 return Var->getLo();
1074 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001075 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001076 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1077 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001078 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001079 // Conservatively disallow memory operands with side-effects (pre/post
1080 // increment) in case of duplication.
1081 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1082 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1083 if (Mem->isRegReg()) {
1084 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1085 Mem->getIndex(), Mem->getShiftOp(),
1086 Mem->getShiftAmt(), Mem->getAddrMode());
1087 } else {
1088 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1089 Mem->getOffset(), Mem->getAddrMode());
1090 }
1091 }
1092 llvm_unreachable("Unsupported operand type");
1093 return nullptr;
1094}
1095
1096Operand *TargetARM32::hiOperand(Operand *Operand) {
1097 assert(Operand->getType() == IceType_i64);
1098 if (Operand->getType() != IceType_i64)
1099 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001100 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001101 split64(Var);
1102 return Var->getHi();
1103 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001104 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001105 return Ctx->getConstantInt32(
1106 static_cast<uint32_t>(Const->getValue() >> 32));
1107 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Andrew Scull57e12682015-09-16 11:30:19 -07001109 // Conservatively disallow memory operands with side-effects in case of
1110 // duplication.
Jan Voungb3401d22015-05-18 09:38:21 -07001111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1112 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1113 const Type SplitType = IceType_i32;
1114 if (Mem->isRegReg()) {
1115 // We have to make a temp variable T, and add 4 to either Base or Index.
Andrew Scull57e12682015-09-16 11:30:19 -07001116 // The Index may be shifted, so adding 4 can mean something else. Thus,
1117 // prefer T := Base + 4, and use T as the new Base.
Jan Voungb3401d22015-05-18 09:38:21 -07001118 Variable *Base = Mem->getBase();
1119 Constant *Four = Ctx->getConstantInt32(4);
1120 Variable *NewBase = Func->makeVariable(Base->getType());
1121 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1122 Base, Four));
1123 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
1124 Mem->getShiftOp(), Mem->getShiftAmt(),
1125 Mem->getAddrMode());
1126 } else {
1127 Variable *Base = Mem->getBase();
1128 ConstantInteger32 *Offset = Mem->getOffset();
1129 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1130 int32_t NextOffsetVal = Offset->getValue() + 4;
1131 const bool SignExt = false;
1132 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
1133 // We have to make a temp variable and add 4 to either Base or Offset.
1134 // If we add 4 to Offset, this will convert a non-RegReg addressing
1135 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
Andrew Scull57e12682015-09-16 11:30:19 -07001136 // RegReg addressing modes, prefer adding to base and replacing
1137 // instead. Thus we leave the old offset alone.
Jan Voungb3401d22015-05-18 09:38:21 -07001138 Constant *Four = Ctx->getConstantInt32(4);
1139 Variable *NewBase = Func->makeVariable(Base->getType());
1140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1141 NewBase, Base, Four));
1142 Base = NewBase;
1143 } else {
1144 Offset =
1145 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1146 }
1147 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
1148 Mem->getAddrMode());
1149 }
1150 }
1151 llvm_unreachable("Unsupported operand type");
1152 return nullptr;
1153}
1154
Jan Voungb36ad9b2015-04-21 17:01:49 -07001155llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1156 RegSetMask Exclude) const {
1157 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
1158
1159#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -07001160 isFP32, isFP64, isVec128, alias_init) \
Jan Voungb36ad9b2015-04-21 17:01:49 -07001161 if (scratch && (Include & RegSet_CallerSave)) \
1162 Registers[RegARM32::val] = true; \
1163 if (preserved && (Include & RegSet_CalleeSave)) \
1164 Registers[RegARM32::val] = true; \
1165 if (stackptr && (Include & RegSet_StackPointer)) \
1166 Registers[RegARM32::val] = true; \
1167 if (frameptr && (Include & RegSet_FramePointer)) \
1168 Registers[RegARM32::val] = true; \
1169 if (scratch && (Exclude & RegSet_CallerSave)) \
1170 Registers[RegARM32::val] = false; \
1171 if (preserved && (Exclude & RegSet_CalleeSave)) \
1172 Registers[RegARM32::val] = false; \
1173 if (stackptr && (Exclude & RegSet_StackPointer)) \
1174 Registers[RegARM32::val] = false; \
1175 if (frameptr && (Exclude & RegSet_FramePointer)) \
1176 Registers[RegARM32::val] = false;
1177
1178 REGARM32_TABLE
1179
1180#undef X
1181
1182 return Registers;
1183}
1184
1185void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1186 UsesFramePointer = true;
Andrew Scull57e12682015-09-16 11:30:19 -07001187 // Conservatively require the stack to be aligned. Some stack adjustment
1188 // operations implemented below assume that the stack is aligned before the
1189 // alloca. All the alloca code ensures that the stack alignment is preserved
1190 // after the alloca. The stack alignment restriction can be relaxed in some
1191 // cases.
Jan Voungb36ad9b2015-04-21 17:01:49 -07001192 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -07001193
1194 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1195 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1196 Variable *Dest = Inst->getDest();
1197 uint32_t AlignmentParam = Inst->getAlignInBytes();
1198 // For default align=0, set it to the real value 1, to avoid any
1199 // bit-manipulation problems below.
1200 AlignmentParam = std::max(AlignmentParam, 1u);
1201
1202 // LLVM enforces power of 2 alignment.
1203 assert(llvm::isPowerOf2_32(AlignmentParam));
1204 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1205
1206 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1207 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1208 alignRegisterPow2(SP, Alignment);
1209 }
1210 Operand *TotalSize = Inst->getSizeInBytes();
1211 if (const auto *ConstantTotalSize =
1212 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1213 uint32_t Value = ConstantTotalSize->getValue();
1214 Value = Utils::applyAlignment(Value, Alignment);
1215 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1216 _sub(SP, SP, SubAmount);
1217 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001218 // Non-constant sizes need to be adjusted to the next highest multiple of
1219 // the required alignment at runtime.
Jan Voungfbdd2442015-07-15 12:36:20 -07001220 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
Jan Voung55500db2015-05-26 14:25:40 -07001221 Variable *T = makeReg(IceType_i32);
1222 _mov(T, TotalSize);
1223 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1224 _add(T, T, AddAmount);
1225 alignRegisterPow2(T, Alignment);
1226 _sub(SP, SP, T);
1227 }
1228 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001229}
1230
Jan Voung6ec369e2015-06-30 11:03:15 -07001231void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1232 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1233 return;
Andrew Scull97f460d2015-07-21 10:07:42 -07001234 Variable *SrcLoReg = legalizeToReg(SrcLo);
Jan Voung6ec369e2015-06-30 11:03:15 -07001235 switch (Ty) {
1236 default:
1237 llvm_unreachable("Unexpected type");
1238 case IceType_i8: {
1239 Operand *Mask =
1240 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1241 _tst(SrcLoReg, Mask);
1242 break;
1243 }
1244 case IceType_i16: {
1245 Operand *Mask =
1246 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
1247 _tst(SrcLoReg, Mask);
1248 break;
1249 }
1250 case IceType_i32: {
1251 _tst(SrcLoReg, SrcLoReg);
1252 break;
1253 }
1254 case IceType_i64: {
1255 Variable *ScratchReg = makeReg(IceType_i32);
1256 _orrs(ScratchReg, SrcLoReg, SrcHi);
Andrew Scull57e12682015-09-16 11:30:19 -07001257 // ScratchReg isn't going to be used, but we need the side-effect of
1258 // setting flags from this operation.
Jan Voung6ec369e2015-06-30 11:03:15 -07001259 Context.insert(InstFakeUse::create(Func, ScratchReg));
1260 }
1261 }
1262 InstARM32Label *Label = InstARM32Label::create(Func, this);
1263 _br(Label, CondARM32::NE);
1264 _trap();
1265 Context.insert(Label);
1266}
1267
1268void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1269 Operand *Src1, ExtInstr ExtFunc,
1270 DivInstr DivFunc, const char *DivHelperName,
1271 bool IsRemainder) {
1272 div0Check(Dest->getType(), Src1, nullptr);
Andrew Scull97f460d2015-07-21 10:07:42 -07001273 Variable *Src1R = legalizeToReg(Src1);
Jan Voung6ec369e2015-06-30 11:03:15 -07001274 Variable *T0R = Src0R;
1275 Variable *T1R = Src1R;
1276 if (Dest->getType() != IceType_i32) {
1277 T0R = makeReg(IceType_i32);
1278 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
1279 T1R = makeReg(IceType_i32);
1280 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
1281 }
1282 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
1283 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
1284 if (IsRemainder) {
1285 Variable *T2 = makeReg(IceType_i32);
1286 _mls(T2, T, T1R, T0R);
1287 T = T2;
1288 }
1289 _mov(Dest, T);
1290 } else {
1291 constexpr SizeT MaxSrcs = 2;
1292 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1293 Call->addArg(T0R);
1294 Call->addArg(T1R);
1295 lowerCall(Call);
1296 }
1297 return;
1298}
1299
Jan Voungb36ad9b2015-04-21 17:01:49 -07001300void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001301 Variable *Dest = Inst->getDest();
Andrew Scull57e12682015-09-16 11:30:19 -07001302 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1303 // legalize Src0 to flex or Src1 to flex and there is a reversible
1304 // instruction. E.g., reverse subtract with immediate, register vs register,
1305 // immediate.
1306 // Or it may be the case that the operands aren't swapped, but the bits can
1307 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1308 // instead of AND for some masks.
Jan Voungfbdd2442015-07-15 12:36:20 -07001309 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1310 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voungb3401d22015-05-18 09:38:21 -07001311 if (Dest->getType() == IceType_i64) {
Andrew Scull57e12682015-09-16 11:30:19 -07001312 // These helper-call-involved instructions are lowered in this separate
1313 // switch. This is because we would otherwise assume that we need to
1314 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1315 // helper calls, and such unused/redundant instructions will fail liveness
1316 // analysis under -Om1 setting.
Jan Voung70fa5252015-07-06 14:01:25 -07001317 switch (Inst->getOp()) {
1318 default:
1319 break;
1320 case InstArithmetic::Udiv:
1321 case InstArithmetic::Sdiv:
1322 case InstArithmetic::Urem:
1323 case InstArithmetic::Srem: {
Andrew Scull57e12682015-09-16 11:30:19 -07001324 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1325 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1326 // register, which will hide a constant source operand. Instead, check
1327 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
Jan Voung70fa5252015-07-06 14:01:25 -07001328 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1329 if (C64->getValue() == 0) {
1330 _trap();
1331 return;
1332 }
1333 } else {
1334 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1335 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1336 div0Check(IceType_i64, Src1Lo, Src1Hi);
1337 }
1338 // Technically, ARM has their own aeabi routines, but we can use the
Andrew Scull57e12682015-09-16 11:30:19 -07001339 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1340 // the more standard __moddi3 for rem.
Jan Voung70fa5252015-07-06 14:01:25 -07001341 const char *HelperName = "";
1342 switch (Inst->getOp()) {
1343 default:
1344 llvm_unreachable("Should have only matched div ops.");
1345 break;
1346 case InstArithmetic::Udiv:
1347 HelperName = H_udiv_i64;
1348 break;
1349 case InstArithmetic::Sdiv:
1350 HelperName = H_sdiv_i64;
1351 break;
1352 case InstArithmetic::Urem:
1353 HelperName = H_urem_i64;
1354 break;
1355 case InstArithmetic::Srem:
1356 HelperName = H_srem_i64;
1357 break;
1358 }
1359 constexpr SizeT MaxSrcs = 2;
1360 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1361 Call->addArg(Src0);
1362 Call->addArg(Src1);
1363 lowerCall(Call);
1364 return;
1365 }
1366 }
Jan Voung29719972015-05-19 11:24:51 -07001367 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1368 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Andrew Scull97f460d2015-07-21 10:07:42 -07001369 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1370 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
Jan Voung70fa5252015-07-06 14:01:25 -07001371 Operand *Src1Lo = loOperand(Src1);
1372 Operand *Src1Hi = hiOperand(Src1);
Jan Voung29719972015-05-19 11:24:51 -07001373 Variable *T_Lo = makeReg(DestLo->getType());
1374 Variable *T_Hi = makeReg(DestHi->getType());
1375 switch (Inst->getOp()) {
1376 case InstArithmetic::_num:
1377 llvm_unreachable("Unknown arithmetic operator");
Jan Voung70fa5252015-07-06 14:01:25 -07001378 return;
Jan Voung29719972015-05-19 11:24:51 -07001379 case InstArithmetic::Add:
Jan Voung70fa5252015-07-06 14:01:25 -07001380 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1381 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001382 _adds(T_Lo, Src0RLo, Src1Lo);
1383 _mov(DestLo, T_Lo);
1384 _adc(T_Hi, Src0RHi, Src1Hi);
1385 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001386 return;
Jan Voung29719972015-05-19 11:24:51 -07001387 case InstArithmetic::And:
Jan Voung70fa5252015-07-06 14:01:25 -07001388 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1389 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001390 _and(T_Lo, Src0RLo, Src1Lo);
1391 _mov(DestLo, T_Lo);
1392 _and(T_Hi, Src0RHi, Src1Hi);
1393 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001394 return;
Jan Voung29719972015-05-19 11:24:51 -07001395 case InstArithmetic::Or:
Jan Voung70fa5252015-07-06 14:01:25 -07001396 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1397 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001398 _orr(T_Lo, Src0RLo, Src1Lo);
1399 _mov(DestLo, T_Lo);
1400 _orr(T_Hi, Src0RHi, Src1Hi);
1401 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001402 return;
Jan Voung29719972015-05-19 11:24:51 -07001403 case InstArithmetic::Xor:
Jan Voung70fa5252015-07-06 14:01:25 -07001404 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1405 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001406 _eor(T_Lo, Src0RLo, Src1Lo);
1407 _mov(DestLo, T_Lo);
1408 _eor(T_Hi, Src0RHi, Src1Hi);
1409 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001410 return;
Jan Voung29719972015-05-19 11:24:51 -07001411 case InstArithmetic::Sub:
Jan Voung70fa5252015-07-06 14:01:25 -07001412 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1413 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001414 _subs(T_Lo, Src0RLo, Src1Lo);
1415 _mov(DestLo, T_Lo);
1416 _sbc(T_Hi, Src0RHi, Src1Hi);
1417 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001418 return;
Jan Voung29719972015-05-19 11:24:51 -07001419 case InstArithmetic::Mul: {
1420 // GCC 4.8 does:
1421 // a=b*c ==>
1422 // t_acc =(mul) (b.lo * c.hi)
1423 // t_acc =(mla) (c.lo * b.hi) + t_acc
1424 // t.hi,t.lo =(umull) b.lo * c.lo
1425 // t.hi += t_acc
1426 // a.lo = t.lo
1427 // a.hi = t.hi
1428 //
1429 // LLVM does:
1430 // t.hi,t.lo =(umull) b.lo * c.lo
1431 // t.hi =(mla) (b.lo * c.hi) + t.hi
1432 // t.hi =(mla) (b.hi * c.lo) + t.hi
1433 // a.lo = t.lo
1434 // a.hi = t.hi
1435 //
1436 // LLVM's lowering has fewer instructions, but more register pressure:
1437 // t.lo is live from beginning to end, while GCC delays the two-dest
1438 // instruction till the end, and kills c.hi immediately.
1439 Variable *T_Acc = makeReg(IceType_i32);
1440 Variable *T_Acc1 = makeReg(IceType_i32);
1441 Variable *T_Hi1 = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001442 Variable *Src1RLo = legalizeToReg(Src1Lo);
1443 Variable *Src1RHi = legalizeToReg(Src1Hi);
Jan Voung29719972015-05-19 11:24:51 -07001444 _mul(T_Acc, Src0RLo, Src1RHi);
1445 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1446 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1447 _add(T_Hi, T_Hi1, T_Acc1);
1448 _mov(DestLo, T_Lo);
1449 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001450 return;
1451 }
Jan Voung66c3d5e2015-06-04 17:02:31 -07001452 case InstArithmetic::Shl: {
1453 // a=b<<c ==>
1454 // GCC 4.8 does:
1455 // sub t_c1, c.lo, #32
1456 // lsl t_hi, b.hi, c.lo
1457 // orr t_hi, t_hi, b.lo, lsl t_c1
1458 // rsb t_c2, c.lo, #32
1459 // orr t_hi, t_hi, b.lo, lsr t_c2
1460 // lsl t_lo, b.lo, c.lo
1461 // a.lo = t_lo
1462 // a.hi = t_hi
Andrew Scull57e12682015-09-16 11:30:19 -07001463 // Can be strength-reduced for constant-shifts, but we don't do that for
1464 // now.
1465 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1466 // ARM, shifts only take the lower 8 bits of the shift register, and
1467 // saturate to the range 0-32, so the negative value will saturate to 32.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001468 Variable *T_Hi = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001469 Variable *Src1RLo = legalizeToReg(Src1Lo);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001470 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1471 Variable *T_C1 = makeReg(IceType_i32);
1472 Variable *T_C2 = makeReg(IceType_i32);
1473 _sub(T_C1, Src1RLo, ThirtyTwo);
1474 _lsl(T_Hi, Src0RHi, Src1RLo);
1475 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1476 OperandARM32::LSL, T_C1));
1477 _rsb(T_C2, Src1RLo, ThirtyTwo);
1478 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1479 OperandARM32::LSR, T_C2));
1480 _mov(DestHi, T_Hi);
1481 Variable *T_Lo = makeReg(IceType_i32);
1482 // _mov seems to sometimes have better register preferencing than lsl.
Andrew Scull57e12682015-09-16 11:30:19 -07001483 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
1484 // maps to lsl.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001485 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1486 OperandARM32::LSL, Src1RLo));
1487 _mov(DestLo, T_Lo);
Jan Voung70fa5252015-07-06 14:01:25 -07001488 return;
1489 }
Jan Voung29719972015-05-19 11:24:51 -07001490 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001491 // a=b>>c (unsigned) ==>
1492 // GCC 4.8 does:
1493 // rsb t_c1, c.lo, #32
1494 // lsr t_lo, b.lo, c.lo
1495 // orr t_lo, t_lo, b.hi, lsl t_c1
1496 // sub t_c2, c.lo, #32
1497 // orr t_lo, t_lo, b.hi, lsr t_c2
1498 // lsr t_hi, b.hi, c.lo
1499 // a.lo = t_lo
1500 // a.hi = t_hi
1501 case InstArithmetic::Ashr: {
1502 // a=b>>c (signed) ==> ...
Andrew Scull57e12682015-09-16 11:30:19 -07001503 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
1504 // next orr should be conditioned on PLUS. The last two right shifts
1505 // should also be arithmetic.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001506 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1507 Variable *T_Lo = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001508 Variable *Src1RLo = legalizeToReg(Src1Lo);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001509 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1510 Variable *T_C1 = makeReg(IceType_i32);
1511 Variable *T_C2 = makeReg(IceType_i32);
1512 _rsb(T_C1, Src1RLo, ThirtyTwo);
1513 _lsr(T_Lo, Src0RLo, Src1RLo);
1514 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1515 OperandARM32::LSL, T_C1));
1516 OperandARM32::ShiftKind RShiftKind;
1517 CondARM32::Cond Pred;
1518 if (IsAshr) {
1519 _subs(T_C2, Src1RLo, ThirtyTwo);
1520 RShiftKind = OperandARM32::ASR;
1521 Pred = CondARM32::PL;
1522 } else {
1523 _sub(T_C2, Src1RLo, ThirtyTwo);
1524 RShiftKind = OperandARM32::LSR;
1525 Pred = CondARM32::AL;
1526 }
1527 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1528 RShiftKind, T_C2),
1529 Pred);
1530 _mov(DestLo, T_Lo);
1531 Variable *T_Hi = makeReg(IceType_i32);
1532 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1533 RShiftKind, Src1RLo));
1534 _mov(DestHi, T_Hi);
Jan Voung6ec369e2015-06-30 11:03:15 -07001535 return;
1536 }
Jan Voung29719972015-05-19 11:24:51 -07001537 case InstArithmetic::Fadd:
1538 case InstArithmetic::Fsub:
1539 case InstArithmetic::Fmul:
1540 case InstArithmetic::Fdiv:
1541 case InstArithmetic::Frem:
1542 llvm_unreachable("FP instruction with i64 type");
Jan Voung70fa5252015-07-06 14:01:25 -07001543 return;
1544 case InstArithmetic::Udiv:
1545 case InstArithmetic::Sdiv:
1546 case InstArithmetic::Urem:
1547 case InstArithmetic::Srem:
1548 llvm_unreachable("Call-helper-involved instruction for i64 type "
1549 "should have already been handled before");
1550 return;
Jan Voung29719972015-05-19 11:24:51 -07001551 }
Jan Voung70fa5252015-07-06 14:01:25 -07001552 return;
Jan Voungb3401d22015-05-18 09:38:21 -07001553 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001554 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -07001555 // Add a fake def to keep liveness consistent in the meantime.
1556 Context.insert(InstFakeDef::create(Func, Dest));
Jan Voung70fa5252015-07-06 14:01:25 -07001557 return;
1558 }
1559 // Dest->getType() is a non-i64 scalar.
Andrew Scull97f460d2015-07-21 10:07:42 -07001560 Variable *Src0R = legalizeToReg(Src0);
Jan Voung70fa5252015-07-06 14:01:25 -07001561 Variable *T = makeReg(Dest->getType());
1562 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1563 // whether or not Src1 is a non-zero constant. Once legalized it is more
1564 // difficult to determine (constant may be moved to a register).
1565 switch (Inst->getOp()) {
1566 default:
1567 break;
1568 case InstArithmetic::Udiv: {
1569 constexpr bool IsRemainder = false;
1570 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1571 H_udiv_i32, IsRemainder);
1572 return;
1573 }
1574 case InstArithmetic::Sdiv: {
1575 constexpr bool IsRemainder = false;
1576 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1577 H_sdiv_i32, IsRemainder);
1578 return;
1579 }
1580 case InstArithmetic::Urem: {
1581 constexpr bool IsRemainder = true;
1582 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1583 H_urem_i32, IsRemainder);
1584 return;
1585 }
1586 case InstArithmetic::Srem: {
1587 constexpr bool IsRemainder = true;
1588 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1589 H_srem_i32, IsRemainder);
1590 return;
1591 }
Jan Voung86ebec12015-08-09 07:58:35 -07001592 case InstArithmetic::Frem: {
1593 const SizeT MaxSrcs = 2;
1594 Type Ty = Dest->getType();
1595 InstCall *Call = makeHelperCall(
1596 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1597 Call->addArg(Src0R);
1598 Call->addArg(Src1);
1599 lowerCall(Call);
1600 return;
1601 }
1602 }
1603
1604 // Handle floating point arithmetic separately: they require Src1 to be
1605 // legalized to a register.
1606 switch (Inst->getOp()) {
1607 default:
1608 break;
1609 case InstArithmetic::Fadd: {
1610 Variable *Src1R = legalizeToReg(Src1);
1611 _vadd(T, Src0R, Src1R);
1612 _vmov(Dest, T);
1613 return;
1614 }
1615 case InstArithmetic::Fsub: {
1616 Variable *Src1R = legalizeToReg(Src1);
1617 _vsub(T, Src0R, Src1R);
1618 _vmov(Dest, T);
1619 return;
1620 }
1621 case InstArithmetic::Fmul: {
1622 Variable *Src1R = legalizeToReg(Src1);
1623 _vmul(T, Src0R, Src1R);
1624 _vmov(Dest, T);
1625 return;
1626 }
1627 case InstArithmetic::Fdiv: {
1628 Variable *Src1R = legalizeToReg(Src1);
1629 _vdiv(T, Src0R, Src1R);
1630 _vmov(Dest, T);
1631 return;
1632 }
Jan Voung70fa5252015-07-06 14:01:25 -07001633 }
1634
1635 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1636 switch (Inst->getOp()) {
1637 case InstArithmetic::_num:
1638 llvm_unreachable("Unknown arithmetic operator");
1639 return;
1640 case InstArithmetic::Add:
1641 _add(T, Src0R, Src1RF);
1642 _mov(Dest, T);
1643 return;
1644 case InstArithmetic::And:
1645 _and(T, Src0R, Src1RF);
1646 _mov(Dest, T);
1647 return;
1648 case InstArithmetic::Or:
1649 _orr(T, Src0R, Src1RF);
1650 _mov(Dest, T);
1651 return;
1652 case InstArithmetic::Xor:
1653 _eor(T, Src0R, Src1RF);
1654 _mov(Dest, T);
1655 return;
1656 case InstArithmetic::Sub:
1657 _sub(T, Src0R, Src1RF);
1658 _mov(Dest, T);
1659 return;
1660 case InstArithmetic::Mul: {
Andrew Scull97f460d2015-07-21 10:07:42 -07001661 Variable *Src1R = legalizeToReg(Src1RF);
Jan Voung70fa5252015-07-06 14:01:25 -07001662 _mul(T, Src0R, Src1R);
1663 _mov(Dest, T);
1664 return;
1665 }
1666 case InstArithmetic::Shl:
1667 _lsl(T, Src0R, Src1RF);
1668 _mov(Dest, T);
1669 return;
1670 case InstArithmetic::Lshr:
1671 _lsr(T, Src0R, Src1RF);
1672 _mov(Dest, T);
1673 return;
1674 case InstArithmetic::Ashr:
1675 _asr(T, Src0R, Src1RF);
1676 _mov(Dest, T);
1677 return;
1678 case InstArithmetic::Udiv:
1679 case InstArithmetic::Sdiv:
1680 case InstArithmetic::Urem:
1681 case InstArithmetic::Srem:
1682 llvm_unreachable("Integer div/rem should have been handled earlier.");
1683 return;
1684 case InstArithmetic::Fadd:
Jan Voung70fa5252015-07-06 14:01:25 -07001685 case InstArithmetic::Fsub:
Jan Voung70fa5252015-07-06 14:01:25 -07001686 case InstArithmetic::Fmul:
Jan Voung70fa5252015-07-06 14:01:25 -07001687 case InstArithmetic::Fdiv:
Jan Voung70fa5252015-07-06 14:01:25 -07001688 case InstArithmetic::Frem:
Jan Voung86ebec12015-08-09 07:58:35 -07001689 llvm_unreachable("Floating point arith should have been handled earlier.");
Jan Voung70fa5252015-07-06 14:01:25 -07001690 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001691 }
1692}
1693
1694void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001695 Variable *Dest = Inst->getDest();
1696 Operand *Src0 = Inst->getSrc(0);
1697 assert(Dest->getType() == Src0->getType());
1698 if (Dest->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001699 Src0 = legalizeUndef(Src0);
1700 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1701 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
Jan Voungb3401d22015-05-18 09:38:21 -07001702 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1704 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1705 _mov(T_Lo, Src0Lo);
1706 _mov(DestLo, T_Lo);
1707 _mov(T_Hi, Src0Hi);
1708 _mov(DestHi, T_Hi);
1709 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001710 Operand *NewSrc;
Jan Voungb3401d22015-05-18 09:38:21 -07001711 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001712 // If Dest already has a physical register, then legalize the Src operand
Andrew Scull57e12682015-09-16 11:30:19 -07001713 // into a Variable with the same register assignment. This especially
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001714 // helps allow the use of Flex operands.
1715 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
Jan Voungb3401d22015-05-18 09:38:21 -07001716 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001717 // Dest could be a stack operand. Since we could potentially need to do a
1718 // Store (and store can only have Register operands), legalize this to a
1719 // register.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001720 NewSrc = legalize(Src0, Legal_Reg);
Jan Voungb3401d22015-05-18 09:38:21 -07001721 }
1722 if (isVectorType(Dest->getType())) {
1723 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -07001724 } else if (isFloatingType(Dest->getType())) {
1725 Variable *SrcR = legalizeToReg(NewSrc);
1726 _vmov(Dest, SrcR);
Jan Voungb3401d22015-05-18 09:38:21 -07001727 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001728 _mov(Dest, NewSrc);
Jan Voungb3401d22015-05-18 09:38:21 -07001729 }
1730 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001731}
1732
1733void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001734 if (Inst->isUnconditional()) {
1735 _br(Inst->getTargetUnconditional());
1736 return;
1737 }
1738 Operand *Cond = Inst->getCondition();
1739 // TODO(jvoung): Handle folding opportunities.
1740
Andrew Scull97f460d2015-07-21 10:07:42 -07001741 Variable *Src0R = legalizeToReg(Cond);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001742 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1743 _cmp(Src0R, Zero);
Jan Voung6ec369e2015-06-30 11:03:15 -07001744 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001745}
1746
Jan Voung3bfd99a2015-05-22 16:35:25 -07001747void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001748 MaybeLeafFunc = false;
Jan Voungb0a8c242015-06-18 15:00:14 -07001749 NeedsStackAlignment = true;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001750
Jan Voungb0a8c242015-06-18 15:00:14 -07001751 // Assign arguments to registers and stack. Also reserve stack.
1752 TargetARM32::CallingConv CC;
1753 // Pair of Arg Operand -> GPR number assignments.
1754 llvm::SmallVector<std::pair<Operand *, int32_t>,
1755 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
Jan Voung86ebec12015-08-09 07:58:35 -07001756 llvm::SmallVector<std::pair<Operand *, int32_t>,
1757 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
Jan Voungb0a8c242015-06-18 15:00:14 -07001758 // Pair of Arg Operand -> stack offset.
1759 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1760 int32_t ParameterAreaSizeBytes = 0;
1761
1762 // Classify each argument operand according to the location where the
1763 // argument is passed.
1764 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001765 Operand *Arg = legalizeUndef(Instr->getArg(i));
Jan Voungb0a8c242015-06-18 15:00:14 -07001766 Type Ty = Arg->getType();
1767 bool InRegs = false;
Jan Voung86ebec12015-08-09 07:58:35 -07001768 if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -07001769 std::pair<int32_t, int32_t> Regs;
1770 if (CC.I64InRegs(&Regs)) {
1771 InRegs = true;
1772 Operand *Lo = loOperand(Arg);
1773 Operand *Hi = hiOperand(Arg);
1774 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1775 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1776 }
Jan Voung86ebec12015-08-09 07:58:35 -07001777 } else if (isVectorType(Ty) || isFloatingType(Ty)) {
1778 int32_t Reg;
1779 if (CC.FPInReg(Ty, &Reg)) {
1780 InRegs = true;
1781 FPArgs.push_back(std::make_pair(Arg, Reg));
1782 }
Jan Voungb0a8c242015-06-18 15:00:14 -07001783 } else {
1784 assert(Ty == IceType_i32);
1785 int32_t Reg;
1786 if (CC.I32InReg(&Reg)) {
1787 InRegs = true;
1788 GPRArgs.push_back(std::make_pair(Arg, Reg));
1789 }
1790 }
1791
1792 if (!InRegs) {
1793 ParameterAreaSizeBytes =
1794 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1795 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1796 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1797 }
1798 }
1799
Andrew Scull57e12682015-09-16 11:30:19 -07001800 // Adjust the parameter area so that the stack is aligned. It is assumed that
1801 // the stack is already aligned at the start of the calling sequence.
Jan Voungb0a8c242015-06-18 15:00:14 -07001802 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1803
Andrew Scull57e12682015-09-16 11:30:19 -07001804 // Subtract the appropriate amount for the argument area. This also takes
1805 // care of setting the stack adjustment during emission.
Jan Voungb0a8c242015-06-18 15:00:14 -07001806 //
Andrew Scull57e12682015-09-16 11:30:19 -07001807 // TODO: If for some reason the call instruction gets dead-code eliminated
1808 // after lowering, we would need to ensure that the pre-call and the
1809 // post-call esp adjustment get eliminated as well.
Jan Voungb0a8c242015-06-18 15:00:14 -07001810 if (ParameterAreaSizeBytes) {
1811 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1812 Legal_Reg | Legal_Flex);
1813 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1814 }
1815
Andrew Scull57e12682015-09-16 11:30:19 -07001816 // Copy arguments that are passed on the stack to the appropriate stack
1817 // locations.
Jan Voungf645d852015-07-09 10:35:09 -07001818 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001819 for (auto &StackArg : StackArgs) {
1820 ConstantInteger32 *Loc =
1821 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1822 Type Ty = StackArg.first->getType();
1823 OperandARM32Mem *Addr;
1824 constexpr bool SignExt = false;
1825 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1826 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1827 } else {
1828 Variable *NewBase = Func->makeVariable(SP->getType());
1829 lowerArithmetic(
1830 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1831 Addr = formMemoryOperand(NewBase, Ty);
1832 }
1833 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1834 }
1835
1836 // Copy arguments to be passed in registers to the appropriate registers.
1837 for (auto &GPRArg : GPRArgs) {
Andrew Scull97f460d2015-07-21 10:07:42 -07001838 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
Andrew Scull57e12682015-09-16 11:30:19 -07001839 // Generate a FakeUse of register arguments so that they do not get dead
1840 // code eliminated as a result of the FakeKill of scratch registers after
1841 // the call.
Jan Voungb0a8c242015-06-18 15:00:14 -07001842 Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001843 }
Jan Voung86ebec12015-08-09 07:58:35 -07001844 for (auto &FPArg : FPArgs) {
1845 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
1846 Context.insert(InstFakeUse::create(Func, Reg));
1847 }
Jan Voung3bfd99a2015-05-22 16:35:25 -07001848
Andrew Scull57e12682015-09-16 11:30:19 -07001849 // Generate the call instruction. Assign its result to a temporary with high
1850 // register allocation weight.
Jan Voung3bfd99a2015-05-22 16:35:25 -07001851 Variable *Dest = Instr->getDest();
1852 // ReturnReg doubles as ReturnRegLo as necessary.
1853 Variable *ReturnReg = nullptr;
1854 Variable *ReturnRegHi = nullptr;
1855 if (Dest) {
1856 switch (Dest->getType()) {
1857 case IceType_NUM:
1858 llvm_unreachable("Invalid Call dest type");
1859 break;
1860 case IceType_void:
1861 break;
1862 case IceType_i1:
1863 case IceType_i8:
1864 case IceType_i16:
1865 case IceType_i32:
1866 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1867 break;
1868 case IceType_i64:
1869 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1870 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1871 break;
1872 case IceType_f32:
Jan Voung86ebec12015-08-09 07:58:35 -07001873 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
1874 break;
Jan Voung3bfd99a2015-05-22 16:35:25 -07001875 case IceType_f64:
Jan Voung86ebec12015-08-09 07:58:35 -07001876 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001877 break;
1878 case IceType_v4i1:
1879 case IceType_v8i1:
1880 case IceType_v16i1:
1881 case IceType_v16i8:
1882 case IceType_v8i16:
1883 case IceType_v4i32:
1884 case IceType_v4f32:
Jan Voung86ebec12015-08-09 07:58:35 -07001885 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001886 break;
1887 }
1888 }
1889 Operand *CallTarget = Instr->getCallTarget();
Andrew Scull57e12682015-09-16 11:30:19 -07001890 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
1891 // Ctx->getFlags().getUseSandboxing();
Jan Voungb0a8c242015-06-18 15:00:14 -07001892
Andrew Scull57e12682015-09-16 11:30:19 -07001893 // Allow ConstantRelocatable to be left alone as a direct call, but force
1894 // other constants like ConstantInteger32 to be in a register and make it an
1895 // indirect call.
Jan Voung3bfd99a2015-05-22 16:35:25 -07001896 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1897 CallTarget = legalize(CallTarget, Legal_Reg);
1898 }
1899 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1900 Context.insert(NewCall);
1901 if (ReturnRegHi)
1902 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1903
Andrew Scull57e12682015-09-16 11:30:19 -07001904 // Add the appropriate offset to SP. The call instruction takes care of
1905 // resetting the stack offset during emission.
Jan Voungb0a8c242015-06-18 15:00:14 -07001906 if (ParameterAreaSizeBytes) {
1907 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1908 Legal_Reg | Legal_Flex);
Jan Voungf645d852015-07-09 10:35:09 -07001909 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001910 _add(SP, SP, AddAmount);
1911 }
1912
Jan Voung3bfd99a2015-05-22 16:35:25 -07001913 // Insert a register-kill pseudo instruction.
1914 Context.insert(InstFakeKill::create(Func, NewCall));
1915
1916 // Generate a FakeUse to keep the call live if necessary.
1917 if (Instr->hasSideEffects() && ReturnReg) {
1918 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1919 Context.insert(FakeUse);
1920 }
1921
1922 if (!Dest)
1923 return;
1924
1925 // Assign the result of the call to Dest.
1926 if (ReturnReg) {
1927 if (ReturnRegHi) {
1928 assert(Dest->getType() == IceType_i64);
1929 split64(Dest);
1930 Variable *DestLo = Dest->getLo();
1931 Variable *DestHi = Dest->getHi();
1932 _mov(DestLo, ReturnReg);
1933 _mov(DestHi, ReturnRegHi);
1934 } else {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001935 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -07001936 _vmov(Dest, ReturnReg);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001937 } else {
Jan Voung86ebec12015-08-09 07:58:35 -07001938 assert(isIntegerType(Dest->getType()) &&
1939 typeWidthInBytes(Dest->getType()) <= 4);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001940 _mov(Dest, ReturnReg);
1941 }
1942 }
1943 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001944}
1945
1946void TargetARM32::lowerCast(const InstCast *Inst) {
1947 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001948 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07001949 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Jan Voungb36ad9b2015-04-21 17:01:49 -07001950 switch (CastKind) {
1951 default:
1952 Func->setError("Cast type not supported");
1953 return;
1954 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001955 if (isVectorType(Dest->getType())) {
1956 UnimplementedError(Func->getContext()->getFlags());
1957 } else if (Dest->getType() == IceType_i64) {
1958 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1959 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1960 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1961 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1962 Variable *T_Lo = makeReg(DestLo->getType());
1963 if (Src0->getType() == IceType_i32) {
1964 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1965 _mov(T_Lo, Src0RF);
1966 } else if (Src0->getType() == IceType_i1) {
Andrew Scull97f460d2015-07-21 10:07:42 -07001967 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001968 _lsl(T_Lo, Src0R, ShiftAmt);
1969 _asr(T_Lo, T_Lo, ShiftAmt);
1970 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07001971 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001972 _sxt(T_Lo, Src0R);
1973 }
1974 _mov(DestLo, T_Lo);
1975 Variable *T_Hi = makeReg(DestHi->getType());
1976 if (Src0->getType() != IceType_i1) {
1977 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1978 OperandARM32::ASR, ShiftAmt));
1979 } else {
1980 // For i1, the asr instruction is already done above.
1981 _mov(T_Hi, T_Lo);
1982 }
1983 _mov(DestHi, T_Hi);
1984 } else if (Src0->getType() == IceType_i1) {
1985 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1986 // lsl t1, src_reg, 31
1987 // asr t1, t1, 31
1988 // dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07001989 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001990 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1991 Variable *T = makeReg(Dest->getType());
1992 _lsl(T, Src0R, ShiftAmt);
1993 _asr(T, T, ShiftAmt);
1994 _mov(Dest, T);
1995 } else {
1996 // t1 = sxt src; dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07001997 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001998 Variable *T = makeReg(Dest->getType());
1999 _sxt(T, Src0R);
2000 _mov(Dest, T);
2001 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002002 break;
2003 }
2004 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002005 if (isVectorType(Dest->getType())) {
2006 UnimplementedError(Func->getContext()->getFlags());
2007 } else if (Dest->getType() == IceType_i64) {
2008 // t1=uxtb src; dst.lo=t1; dst.hi=0
2009 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2012 Variable *T_Lo = makeReg(DestLo->getType());
Andrew Scull57e12682015-09-16 11:30:19 -07002013 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
2014 // while i1 will have an and mask later anyway.
Jan Voung66c3d5e2015-06-04 17:02:31 -07002015 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2016 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2017 _mov(T_Lo, Src0RF);
2018 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002019 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002020 _uxt(T_Lo, Src0R);
2021 }
2022 if (Src0->getType() == IceType_i1) {
2023 Constant *One = Ctx->getConstantInt32(1);
2024 _and(T_Lo, T_Lo, One);
2025 }
2026 _mov(DestLo, T_Lo);
2027 Variable *T_Hi = makeReg(DestLo->getType());
2028 _mov(T_Hi, Zero);
2029 _mov(DestHi, T_Hi);
2030 } else if (Src0->getType() == IceType_i1) {
2031 // t = Src0; t &= 1; Dest = t
2032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2033 Constant *One = Ctx->getConstantInt32(1);
2034 Variable *T = makeReg(Dest->getType());
Andrew Scull57e12682015-09-16 11:30:19 -07002035 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
2036 // requires the source to be a register so could have required a _mov
2037 // from legalize anyway.
Jan Voung66c3d5e2015-06-04 17:02:31 -07002038 _mov(T, Src0RF);
2039 _and(T, T, One);
2040 _mov(Dest, T);
2041 } else {
2042 // t1 = uxt src; dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07002043 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002044 Variable *T = makeReg(Dest->getType());
2045 _uxt(T, Src0R);
2046 _mov(Dest, T);
2047 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002048 break;
2049 }
2050 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002051 if (isVectorType(Dest->getType())) {
2052 UnimplementedError(Func->getContext()->getFlags());
2053 } else {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002054 if (Src0->getType() == IceType_i64)
2055 Src0 = loOperand(Src0);
2056 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2057 // t1 = trunc Src0RF; Dest = t1
2058 Variable *T = makeReg(Dest->getType());
2059 _mov(T, Src0RF);
2060 if (Dest->getType() == IceType_i1)
2061 _and(T, T, Ctx->getConstantInt1(1));
2062 _mov(Dest, T);
2063 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002064 break;
2065 }
2066 case InstCast::Fptrunc:
Jan Voungb36ad9b2015-04-21 17:01:49 -07002067 case InstCast::Fpext: {
John Portoc31e2ed2015-09-11 05:17:08 -07002068 // fptrunc: dest.f32 = fptrunc src0.fp64
2069 // fpext: dest.f64 = fptrunc src0.fp32
2070 const bool IsTrunc = CastKind == InstCast::Fptrunc;
2071 if (isVectorType(Dest->getType())) {
2072 UnimplementedError(Func->getContext()->getFlags());
2073 break;
2074 }
2075 assert(Dest->getType() == (IsTrunc ? IceType_f32 : IceType_f64));
2076 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
2077 Variable *Src0R = legalizeToReg(Src0);
2078 Variable *T = makeReg(Dest->getType());
2079 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
2080 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002081 break;
2082 }
2083 case InstCast::Fptosi:
John Portoc31e2ed2015-09-11 05:17:08 -07002084 case InstCast::Fptoui: {
2085 // fptosi:
2086 // t1.fp = vcvt src0.fp
2087 // t2.i32 = vmov t1.fp
2088 // dest.int = conv t2.i32 @ Truncates the result if needed.
2089 // fptoui:
2090 // t1.fp = vcvt src0.fp
2091 // t2.u32 = vmov t1.fp
2092 // dest.uint = conv t2.u32 @ Truncates the result if needed.
2093 if (isVectorType(Dest->getType())) {
2094 UnimplementedError(Func->getContext()->getFlags());
2095 break;
2096 } else if (Dest->getType() == IceType_i64) {
John Portof977f712015-09-14 16:28:33 -07002097 split64(Dest);
2098 Context.insert(InstFakeDef::create(Func, Dest->getLo()));
2099 Context.insert(InstFakeDef::create(Func, Dest->getHi()));
John Portoc31e2ed2015-09-11 05:17:08 -07002100 UnimplementedError(Func->getContext()->getFlags());
2101 break;
2102 }
2103 const bool DestIsSigned = CastKind == InstCast::Fptosi;
2104 Variable *Src0R = legalizeToReg(Src0);
2105 Variable *T_fp = makeReg(IceType_f32);
2106 if (isFloat32Asserting32Or64(Src0->getType())) {
2107 _vcvt(T_fp, Src0R,
2108 DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui);
2109 } else {
2110 _vcvt(T_fp, Src0R,
2111 DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
2112 }
2113 Variable *T = makeReg(IceType_i32);
2114 _vmov(T, T_fp);
2115 if (Dest->getType() != IceType_i32) {
2116 Variable *T_1 = makeReg(Dest->getType());
2117 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
2118 T = T_1;
2119 }
2120 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002121 break;
John Portoc31e2ed2015-09-11 05:17:08 -07002122 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002123 case InstCast::Sitofp:
Jan Voungb36ad9b2015-04-21 17:01:49 -07002124 case InstCast::Uitofp: {
John Portoc31e2ed2015-09-11 05:17:08 -07002125 // sitofp:
2126 // t1.i32 = sext src.int @ sign-extends src0 if needed.
2127 // t2.fp32 = vmov t1.i32
2128 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
2129 // uitofp:
2130 // t1.i32 = zext src.int @ zero-extends src0 if needed.
2131 // t2.fp32 = vmov t1.i32
2132 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
2133 if (isVectorType(Dest->getType())) {
2134 UnimplementedError(Func->getContext()->getFlags());
2135 break;
John Portof977f712015-09-14 16:28:33 -07002136 }
2137 if (Src0->getType() == IceType_i64) {
2138 // avoid cryptic liveness errors
2139 Context.insert(InstFakeDef::create(Func, Dest));
John Portoc31e2ed2015-09-11 05:17:08 -07002140 UnimplementedError(Func->getContext()->getFlags());
2141 break;
2142 }
2143 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
2144 if (Src0->getType() != IceType_i32) {
2145 Variable *Src0R_32 = makeReg(IceType_i32);
2146 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext
2147 : InstCast::Zext,
2148 Src0R_32, Src0));
2149 Src0 = Src0R_32;
2150 }
2151 Variable *Src0R = legalizeToReg(Src0);
2152 Variable *Src0R_f32 = makeReg(IceType_f32);
2153 _vmov(Src0R_f32, Src0R);
2154 Src0R = Src0R_f32;
2155 Variable *T = makeReg(Dest->getType());
2156 if (isFloat32Asserting32Or64(Dest->getType())) {
2157 _vcvt(T, Src0R,
2158 SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s);
2159 } else {
2160 _vcvt(T, Src0R,
2161 SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
2162 }
2163 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002164 break;
2165 }
2166 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002167 Operand *Src0 = Inst->getSrc(0);
2168 if (Dest->getType() == Src0->getType()) {
2169 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2170 lowerAssign(Assign);
2171 return;
2172 }
John Portof977f712015-09-14 16:28:33 -07002173 Type DestType = Dest->getType();
2174 switch (DestType) {
2175 case IceType_NUM:
2176 case IceType_void:
2177 llvm::report_fatal_error("Unexpected bitcast.");
2178 case IceType_i1:
2179 UnimplementedError(Func->getContext()->getFlags());
2180 break;
2181 case IceType_v4i1:
2182 UnimplementedError(Func->getContext()->getFlags());
2183 break;
2184 case IceType_i8:
2185 UnimplementedError(Func->getContext()->getFlags());
2186 break;
2187 case IceType_i16:
2188 UnimplementedError(Func->getContext()->getFlags());
2189 break;
2190 case IceType_i32:
2191 case IceType_f32: {
2192 Variable *Src0R = legalizeToReg(Src0);
2193 Variable *T = makeReg(DestType);
2194 _vmov(T, Src0R);
2195 lowerAssign(InstAssign::create(Func, Dest, T));
2196 break;
2197 }
2198 case IceType_i64: {
2199 // t0, t1 <- src0
2200 // dest[31..0] = t0
2201 // dest[63..32] = t1
2202 assert(Src0->getType() == IceType_f64);
2203 Variable *T0 = makeReg(IceType_i32);
2204 Variable *T1 = makeReg(IceType_i32);
2205 Variable *Src0R = legalizeToReg(Src0);
2206 split64(Dest);
2207 _vmov(InstARM32Vmov::RegisterPair(T0, T1), Src0R);
2208 lowerAssign(InstAssign::create(Func, Dest->getLo(), T0));
2209 lowerAssign(InstAssign::create(Func, Dest->getHi(), T1));
2210 break;
2211 }
2212 case IceType_f64: {
2213 // T0 <- lo(src)
2214 // T1 <- hi(src)
2215 // vmov T2, T0, T1
2216 // Dest <- T2
2217 assert(Src0->getType() == IceType_i64);
2218 Variable *SrcLo = legalizeToReg(loOperand(Src0));
2219 Variable *SrcHi = legalizeToReg(hiOperand(Src0));
2220 Variable *T = makeReg(IceType_f64);
2221 _vmov(T, InstARM32Vmov::RegisterPair(SrcLo, SrcHi));
2222 lowerAssign(InstAssign::create(Func, Dest, T));
2223 break;
2224 }
2225 case IceType_v8i1:
2226 UnimplementedError(Func->getContext()->getFlags());
2227 break;
2228 case IceType_v16i1:
2229 UnimplementedError(Func->getContext()->getFlags());
2230 break;
2231 case IceType_v8i16:
2232 UnimplementedError(Func->getContext()->getFlags());
2233 break;
2234 case IceType_v16i8:
2235 UnimplementedError(Func->getContext()->getFlags());
2236 break;
2237 case IceType_v4i32:
2238 UnimplementedError(Func->getContext()->getFlags());
2239 break;
2240 case IceType_v4f32:
2241 UnimplementedError(Func->getContext()->getFlags());
2242 break;
2243 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002244 break;
2245 }
2246 }
2247}
2248
2249void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
2250 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002251 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002252}
2253
2254void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
2255 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002256 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002257}
2258
2259void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07002260 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07002261 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2262 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002263
2264 if (isVectorType(Dest->getType())) {
2265 UnimplementedError(Func->getContext()->getFlags());
2266 return;
2267 }
2268
2269 // a=icmp cond, b, c ==>
2270 // GCC does:
2271 // cmp b.hi, c.hi or cmp b.lo, c.lo
2272 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2273 // mov.<C1> t, #1 mov.<C1> t, #1
2274 // mov.<C2> t, #0 mov.<C2> t, #0
2275 // mov a, t mov a, t
2276 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
Andrew Scull57e12682015-09-16 11:30:19 -07002277 // is used for signed compares. In some cases, b and c need to be swapped as
2278 // well.
Jan Voung3bfd99a2015-05-22 16:35:25 -07002279 //
2280 // LLVM does:
2281 // for EQ and NE:
2282 // eor t1, b.hi, c.hi
2283 // eor t2, b.lo, c.hi
2284 // orrs t, t1, t2
2285 // mov.<C> t, #1
2286 // mov a, t
2287 //
Andrew Scull57e12682015-09-16 11:30:19 -07002288 // that's nice in that it's just as short but has fewer dependencies for
2289 // better ILP at the cost of more registers.
Jan Voung3bfd99a2015-05-22 16:35:25 -07002290 //
Andrew Scull57e12682015-09-16 11:30:19 -07002291 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2292 // unconditional mov #0, two cmps, two conditional mov #1, and one
2293 // conditional reg mov. That has few dependencies for good ILP, but is a
2294 // longer sequence.
Jan Voung3bfd99a2015-05-22 16:35:25 -07002295 //
2296 // So, we are going with the GCC version since it's usually better (except
2297 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2298 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2299 Constant *One = Ctx->getConstantInt32(1);
2300 if (Src0->getType() == IceType_i64) {
2301 InstIcmp::ICond Conditon = Inst->getCondition();
2302 size_t Index = static_cast<size_t>(Conditon);
Andrew Scull2c688f62015-09-09 11:56:10 -07002303 assert(Index < llvm::array_lengthof(TableIcmp64));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002304 Variable *Src0Lo, *Src0Hi;
2305 Operand *Src1LoRF, *Src1HiRF;
2306 if (TableIcmp64[Index].Swapped) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002307 Src0Lo = legalizeToReg(loOperand(Src1));
2308 Src0Hi = legalizeToReg(hiOperand(Src1));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002309 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2310 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2311 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002312 Src0Lo = legalizeToReg(loOperand(Src0));
2313 Src0Hi = legalizeToReg(hiOperand(Src0));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002314 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2315 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2316 }
2317 Variable *T = makeReg(IceType_i32);
2318 if (TableIcmp64[Index].IsSigned) {
2319 Variable *ScratchReg = makeReg(IceType_i32);
2320 _cmp(Src0Lo, Src1LoRF);
2321 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
Andrew Scull57e12682015-09-16 11:30:19 -07002322 // ScratchReg isn't going to be used, but we need the side-effect of
2323 // setting flags from this operation.
Jan Voung3bfd99a2015-05-22 16:35:25 -07002324 Context.insert(InstFakeUse::create(Func, ScratchReg));
2325 } else {
2326 _cmp(Src0Hi, Src1HiRF);
2327 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2328 }
2329 _mov(T, One, TableIcmp64[Index].C1);
2330 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
2331 _mov(Dest, T);
2332 return;
2333 }
2334
2335 // a=icmp cond b, c ==>
2336 // GCC does:
2337 // <u/s>xtb tb, b
2338 // <u/s>xtb tc, c
2339 // cmp tb, tc
2340 // mov.C1 t, #0
2341 // mov.C2 t, #1
2342 // mov a, t
Andrew Scull57e12682015-09-16 11:30:19 -07002343 // where the unsigned/sign extension is not needed for 32-bit. They also have
2344 // special cases for EQ and NE. E.g., for NE:
Jan Voung3bfd99a2015-05-22 16:35:25 -07002345 // <extend to tb, tc>
2346 // subs t, tb, tc
2347 // movne t, #1
2348 // mov a, t
2349 //
2350 // LLVM does:
2351 // lsl tb, b, #<N>
2352 // mov t, #0
2353 // cmp tb, c, lsl #<N>
2354 // mov.<C> t, #1
2355 // mov a, t
2356 //
Andrew Scull57e12682015-09-16 11:30:19 -07002357 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2358 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2359 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2360 // first. It's not clear to me why that special-casing is needed.
Jan Voung3bfd99a2015-05-22 16:35:25 -07002361 //
Andrew Scull57e12682015-09-16 11:30:19 -07002362 // We'll go with the LLVM way for now, since it's shorter and has just as few
2363 // dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07002364 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2365 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002366 Constant *ShiftConst = nullptr;
2367 Variable *Src0R = nullptr;
2368 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002369 if (ShiftAmt) {
2370 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002371 Src0R = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07002372 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002373 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002374 Src0R = legalizeToReg(Src0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002375 }
2376 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002377 if (ShiftAmt) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002378 Variable *Src1R = legalizeToReg(Src1);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002379 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2380 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2381 _cmp(Src0R, Src1RShifted);
2382 } else {
2383 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2384 _cmp(Src0R, Src1RF);
2385 }
2386 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
2387 _mov(Dest, T);
2388 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07002389}
2390
2391void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2392 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002393 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002394}
2395
2396void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Jim Stichnotha8d47132015-09-08 14:43:38 -07002397 switch (Instr->getIntrinsicInfo().ID) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002398 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07002399 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002400 return;
2401 }
2402 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07002403 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002404 return;
2405 case Intrinsics::AtomicFenceAll:
Andrew Scull57e12682015-09-16 11:30:19 -07002406 // NOTE: FenceAll should prevent and load/store from being moved across the
2407 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
2408 // currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07002409 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002410 return;
2411 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07002412 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002413 return;
2414 }
2415 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07002416 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002417 return;
2418 }
2419 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07002420 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002421 return;
2422 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07002423 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002424 return;
2425 }
2426 case Intrinsics::Bswap: {
Jan Voungf645d852015-07-09 10:35:09 -07002427 Variable *Dest = Instr->getDest();
2428 Operand *Val = Instr->getArg(0);
2429 Type Ty = Val->getType();
2430 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002431 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002432 Variable *Val_Lo = legalizeToReg(loOperand(Val));
2433 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002434 Variable *T_Lo = makeReg(IceType_i32);
2435 Variable *T_Hi = makeReg(IceType_i32);
2436 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2437 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2438 _rev(T_Lo, Val_Lo);
2439 _rev(T_Hi, Val_Hi);
2440 _mov(DestLo, T_Hi);
2441 _mov(DestHi, T_Lo);
2442 } else {
2443 assert(Ty == IceType_i32 || Ty == IceType_i16);
Andrew Scull97f460d2015-07-21 10:07:42 -07002444 Variable *ValR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002445 Variable *T = makeReg(Ty);
2446 _rev(T, ValR);
2447 if (Val->getType() == IceType_i16) {
2448 Operand *Sixteen =
2449 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2450 _lsr(T, T, Sixteen);
2451 }
2452 _mov(Dest, T);
2453 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002454 return;
2455 }
2456 case Intrinsics::Ctpop: {
Jan Voungf645d852015-07-09 10:35:09 -07002457 Variable *Dest = Instr->getDest();
2458 Operand *Val = Instr->getArg(0);
2459 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2460 ? H_call_ctpop_i32
2461 : H_call_ctpop_i64,
2462 Dest, 1);
2463 Call->addArg(Val);
2464 lowerCall(Call);
2465 // The popcount helpers always return 32-bit values, while the intrinsic's
Andrew Scull57e12682015-09-16 11:30:19 -07002466 // signature matches some 64-bit platform's native instructions and expect
2467 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
2468 // case the user doesn't do that in the IR or doesn't toss the bits via
2469 // truncate.
Jan Voungf645d852015-07-09 10:35:09 -07002470 if (Val->getType() == IceType_i64) {
2471 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2472 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voung28068ad2015-07-31 12:58:46 -07002473 Variable *T = nullptr;
2474 _mov(T, Zero);
2475 _mov(DestHi, T);
Jan Voungf645d852015-07-09 10:35:09 -07002476 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002477 return;
2478 }
2479 case Intrinsics::Ctlz: {
Andrew Scull57e12682015-09-16 11:30:19 -07002480 // The "is zero undef" parameter is ignored and we always return a
2481 // well-defined value.
Jan Voungf645d852015-07-09 10:35:09 -07002482 Operand *Val = Instr->getArg(0);
2483 Variable *ValLoR;
2484 Variable *ValHiR = nullptr;
2485 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002486 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002487 ValLoR = legalizeToReg(loOperand(Val));
2488 ValHiR = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002489 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002490 ValLoR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002491 }
2492 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002493 return;
2494 }
2495 case Intrinsics::Cttz: {
Jan Voungf645d852015-07-09 10:35:09 -07002496 // Essentially like Clz, but reverse the bits first.
2497 Operand *Val = Instr->getArg(0);
2498 Variable *ValLoR;
2499 Variable *ValHiR = nullptr;
2500 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002501 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002502 ValLoR = legalizeToReg(loOperand(Val));
2503 ValHiR = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002504 Variable *TLo = makeReg(IceType_i32);
2505 Variable *THi = makeReg(IceType_i32);
2506 _rbit(TLo, ValLoR);
2507 _rbit(THi, ValHiR);
2508 ValLoR = THi;
2509 ValHiR = TLo;
2510 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002511 ValLoR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002512 Variable *T = makeReg(IceType_i32);
2513 _rbit(T, ValLoR);
2514 ValLoR = T;
2515 }
2516 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002517 return;
2518 }
2519 case Intrinsics::Fabs: {
Jan Voung86ebec12015-08-09 07:58:35 -07002520 // Add a fake def to keep liveness consistent in the meantime.
2521 Context.insert(InstFakeDef::create(Func, Instr->getDest()));
Jan Voungb2d50842015-05-12 09:53:50 -07002522 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002523 return;
2524 }
2525 case Intrinsics::Longjmp: {
2526 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2527 Call->addArg(Instr->getArg(0));
2528 Call->addArg(Instr->getArg(1));
2529 lowerCall(Call);
2530 return;
2531 }
2532 case Intrinsics::Memcpy: {
2533 // In the future, we could potentially emit an inline memcpy/memset, etc.
2534 // for intrinsic calls w/ a known length.
2535 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
2536 Call->addArg(Instr->getArg(0));
2537 Call->addArg(Instr->getArg(1));
2538 Call->addArg(Instr->getArg(2));
2539 lowerCall(Call);
2540 return;
2541 }
2542 case Intrinsics::Memmove: {
2543 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
2544 Call->addArg(Instr->getArg(0));
2545 Call->addArg(Instr->getArg(1));
2546 Call->addArg(Instr->getArg(2));
2547 lowerCall(Call);
2548 return;
2549 }
2550 case Intrinsics::Memset: {
Jan Voungf645d852015-07-09 10:35:09 -07002551 // The value operand needs to be extended to a stack slot size because the
2552 // PNaCl ABI requires arguments to be at least 32 bits wide.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002553 Operand *ValOp = Instr->getArg(1);
2554 assert(ValOp->getType() == IceType_i8);
2555 Variable *ValExt = Func->makeVariable(stackSlotType());
2556 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voungf645d852015-07-09 10:35:09 -07002557 // Technically, ARM has their own __aeabi_memset, but we can use plain
2558 // memset too. The value and size argument need to be flipped if we ever
2559 // decide to use __aeabi_memset.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002560 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
2561 Call->addArg(Instr->getArg(0));
2562 Call->addArg(ValExt);
2563 Call->addArg(Instr->getArg(2));
2564 lowerCall(Call);
2565 return;
2566 }
2567 case Intrinsics::NaClReadTP: {
2568 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07002569 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002570 } else {
2571 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
2572 lowerCall(Call);
2573 }
2574 return;
2575 }
2576 case Intrinsics::Setjmp: {
2577 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
2578 Call->addArg(Instr->getArg(0));
2579 lowerCall(Call);
2580 return;
2581 }
2582 case Intrinsics::Sqrt: {
Jan Voung86ebec12015-08-09 07:58:35 -07002583 Variable *Src = legalizeToReg(Instr->getArg(0));
2584 Variable *Dest = Instr->getDest();
2585 Variable *T = makeReg(Dest->getType());
2586 _vsqrt(T, Src);
2587 _vmov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002588 return;
2589 }
2590 case Intrinsics::Stacksave: {
Jan Voungf645d852015-07-09 10:35:09 -07002591 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2592 Variable *Dest = Instr->getDest();
2593 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002594 return;
2595 }
2596 case Intrinsics::Stackrestore: {
Jan Voungf645d852015-07-09 10:35:09 -07002597 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2598 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2599 _mov_nonkillable(SP, Val);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002600 return;
2601 }
2602 case Intrinsics::Trap:
Jan Voungf645d852015-07-09 10:35:09 -07002603 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002604 return;
2605 case Intrinsics::UnknownIntrinsic:
2606 Func->setError("Should not be lowering UnknownIntrinsic");
2607 return;
2608 }
2609 return;
2610}
2611
Jan Voungf645d852015-07-09 10:35:09 -07002612void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
2613 Type Ty = Dest->getType();
2614 assert(Ty == IceType_i32 || Ty == IceType_i64);
2615 Variable *T = makeReg(IceType_i32);
2616 _clz(T, ValLoR);
2617 if (Ty == IceType_i64) {
2618 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2619 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2620 Operand *Zero =
2621 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2622 Operand *ThirtyTwo =
2623 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2624 _cmp(ValHiR, Zero);
2625 Variable *T2 = makeReg(IceType_i32);
2626 _add(T2, T, ThirtyTwo);
2627 _clz(T2, ValHiR, CondARM32::NE);
Andrew Scull57e12682015-09-16 11:30:19 -07002628 // T2 is actually a source as well when the predicate is not AL (since it
2629 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
2630 // of T2 as if it was used as a source.
Jan Voungf645d852015-07-09 10:35:09 -07002631 _set_dest_nonkillable();
2632 _mov(DestLo, T2);
Jan Voung28068ad2015-07-31 12:58:46 -07002633 Variable *T3 = nullptr;
2634 _mov(T3, Zero);
2635 _mov(DestHi, T3);
Jan Voungf645d852015-07-09 10:35:09 -07002636 return;
2637 }
2638 _mov(Dest, T);
2639 return;
2640}
2641
Jan Voungbefd03a2015-06-02 11:03:03 -07002642void TargetARM32::lowerLoad(const InstLoad *Load) {
Andrew Scull57e12682015-09-16 11:30:19 -07002643 // A Load instruction can be treated the same as an Assign instruction, after
2644 // the source operand is transformed into an OperandARM32Mem operand.
Jan Voungbefd03a2015-06-02 11:03:03 -07002645 Type Ty = Load->getDest()->getType();
2646 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2647 Variable *DestLoad = Load->getDest();
2648
Andrew Scull57e12682015-09-16 11:30:19 -07002649 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
2650 // be folded into a load.
Jan Voungbefd03a2015-06-02 11:03:03 -07002651 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2652 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002653}
2654
2655void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07002656 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002657}
2658
Qining Luaee5fa82015-08-20 14:59:03 -07002659void TargetARM32::randomlyInsertNop(float Probability,
2660 RandomNumberGenerator &RNG) {
2661 RandomNumberGeneratorWrapper RNGW(RNG);
2662 if (RNGW.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07002663 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002664 }
2665}
2666
2667void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
2668 Func->setError("Phi found in regular instruction list");
2669}
2670
2671void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07002672 Variable *Reg = nullptr;
2673 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07002674 Operand *Src0 = Inst->getRetValue();
Jan Voung86ebec12015-08-09 07:58:35 -07002675 Type Ty = Src0->getType();
2676 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002677 Src0 = legalizeUndef(Src0);
Andrew Scull97f460d2015-07-21 10:07:42 -07002678 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
2679 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
Jan Voungb3401d22015-05-18 09:38:21 -07002680 Reg = R0;
2681 Context.insert(InstFakeUse::create(Func, R1));
Jan Voung86ebec12015-08-09 07:58:35 -07002682 } else if (Ty == IceType_f32) {
2683 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
2684 Reg = S0;
2685 } else if (Ty == IceType_f64) {
2686 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
2687 Reg = D0;
Jan Voungb3401d22015-05-18 09:38:21 -07002688 } else if (isVectorType(Src0->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -07002689 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
2690 Reg = Q0;
Jan Voungb3401d22015-05-18 09:38:21 -07002691 } else {
2692 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002693 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07002694 }
Jan Voungb2d50842015-05-12 09:53:50 -07002695 }
Andrew Scull57e12682015-09-16 11:30:19 -07002696 // Add a ret instruction even if sandboxing is enabled, because addEpilog
2697 // explicitly looks for a ret instruction as a marker for where to insert the
2698 // frame removal instructions. addEpilog is responsible for restoring the
2699 // "lr" register as needed prior to this ret instruction.
Jan Voungb2d50842015-05-12 09:53:50 -07002700 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07002701 // Add a fake use of sp to make sure sp stays alive for the entire function.
2702 // Otherwise post-call sp adjustments get dead-code eliminated.
2703 // TODO: Are there more places where the fake use should be inserted? E.g.
2704 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
Jan Voungf645d852015-07-09 10:35:09 -07002705 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb2d50842015-05-12 09:53:50 -07002706 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002707}
2708
2709void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Vounge0df91f2015-06-30 08:47:06 -07002710 Variable *Dest = Inst->getDest();
2711 Type DestTy = Dest->getType();
2712 Operand *SrcT = Inst->getTrueOperand();
2713 Operand *SrcF = Inst->getFalseOperand();
2714 Operand *Condition = Inst->getCondition();
2715
2716 if (isVectorType(DestTy)) {
2717 UnimplementedError(Func->getContext()->getFlags());
2718 return;
2719 }
2720 if (isFloatingType(DestTy)) {
2721 UnimplementedError(Func->getContext()->getFlags());
2722 return;
2723 }
2724 // TODO(jvoung): handle folding opportunities.
2725 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
Andrew Scull97f460d2015-07-21 10:07:42 -07002726 Variable *CmpOpnd0 = legalizeToReg(Condition);
Jan Vounge0df91f2015-06-30 08:47:06 -07002727 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
2728 _cmp(CmpOpnd0, CmpOpnd1);
2729 CondARM32::Cond Cond = CondARM32::NE;
2730 if (DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002731 SrcT = legalizeUndef(SrcT);
2732 SrcF = legalizeUndef(SrcF);
Jan Vounge0df91f2015-06-30 08:47:06 -07002733 // Set the low portion.
2734 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2735 Variable *TLo = nullptr;
2736 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
2737 _mov(TLo, SrcFLo);
2738 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
2739 _mov_nonkillable(TLo, SrcTLo, Cond);
2740 _mov(DestLo, TLo);
2741 // Set the high portion.
2742 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2743 Variable *THi = nullptr;
2744 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
2745 _mov(THi, SrcFHi);
2746 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
2747 _mov_nonkillable(THi, SrcTHi, Cond);
2748 _mov(DestHi, THi);
2749 return;
2750 }
2751 Variable *T = nullptr;
2752 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
2753 _mov(T, SrcF);
2754 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
2755 _mov_nonkillable(T, SrcT, Cond);
2756 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002757}
2758
2759void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07002760 Operand *Value = Inst->getData();
2761 Operand *Addr = Inst->getAddr();
2762 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
2763 Type Ty = NewAddr->getType();
2764
2765 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002766 Value = legalizeUndef(Value);
Andrew Scull97f460d2015-07-21 10:07:42 -07002767 Variable *ValueHi = legalizeToReg(hiOperand(Value));
2768 Variable *ValueLo = legalizeToReg(loOperand(Value));
Jan Voungbefd03a2015-06-02 11:03:03 -07002769 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
2770 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
2771 } else if (isVectorType(Ty)) {
2772 UnimplementedError(Func->getContext()->getFlags());
2773 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002774 Variable *ValueR = legalizeToReg(Value);
Jan Voungbefd03a2015-06-02 11:03:03 -07002775 _str(ValueR, NewAddr);
2776 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002777}
2778
2779void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07002780 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002781}
2782
2783void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scullfdc54db2015-06-29 11:21:18 -07002784 // This implements the most naive possible lowering.
2785 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2786 Operand *Src0 = Inst->getComparison();
2787 SizeT NumCases = Inst->getNumCases();
2788 if (Src0->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002789 Src0 = legalizeUndef(Src0);
Andrew Scull97f460d2015-07-21 10:07:42 -07002790 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
2791 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
Andrew Scullfdc54db2015-06-29 11:21:18 -07002792 for (SizeT I = 0; I < NumCases; ++I) {
2793 Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
2794 Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
2795 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
2796 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
2797 _cmp(Src0Lo, ValueLo);
2798 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
2799 _br(Inst->getLabel(I), CondARM32::EQ);
2800 }
2801 _br(Inst->getLabelDefault());
2802 return;
2803 }
Jan Vounge0df91f2015-06-30 08:47:06 -07002804
Andrew Scullfdc54db2015-06-29 11:21:18 -07002805 // 32 bit integer
Andrew Scull97f460d2015-07-21 10:07:42 -07002806 Variable *Src0Var = legalizeToReg(Src0);
Andrew Scullfdc54db2015-06-29 11:21:18 -07002807 for (SizeT I = 0; I < NumCases; ++I) {
2808 Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
2809 Value = legalize(Value, Legal_Reg | Legal_Flex);
2810 _cmp(Src0Var, Value);
2811 _br(Inst->getLabel(I), CondARM32::EQ);
2812 }
2813 _br(Inst->getLabelDefault());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002814}
2815
2816void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voung6ec369e2015-06-30 11:03:15 -07002817 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002818}
2819
Jan Voungb36ad9b2015-04-21 17:01:49 -07002820void TargetARM32::prelowerPhis() {
Jan Voung53483692015-07-16 10:47:46 -07002821 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002822}
2823
Jan Voungb3401d22015-05-18 09:38:21 -07002824Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
2825 Variable *Reg = makeReg(Ty, RegNum);
2826 UnimplementedError(Func->getContext()->getFlags());
2827 return Reg;
2828}
2829
2830// Helper for legalize() to emit the right code to lower an operand to a
2831// register of the appropriate type.
2832Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2833 Type Ty = Src->getType();
2834 Variable *Reg = makeReg(Ty, RegNum);
Jan Voung86ebec12015-08-09 07:58:35 -07002835 if (isVectorType(Ty) || isFloatingType(Ty)) {
2836 _vmov(Reg, Src);
Jan Voungb3401d22015-05-18 09:38:21 -07002837 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07002838 // Mov's Src operand can really only be the flexible second operand type or
2839 // a register. Users should guarantee that.
Jan Voungb3401d22015-05-18 09:38:21 -07002840 _mov(Reg, Src);
2841 }
2842 return Reg;
2843}
2844
2845Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2846 int32_t RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002847 Type Ty = From->getType();
Andrew Scull57e12682015-09-16 11:30:19 -07002848 // Assert that a physical register is allowed. To date, all calls to
2849 // legalize() allow a physical register. Legal_Flex converts registers to the
2850 // right type OperandARM32FlexReg as needed.
Jan Voungb3401d22015-05-18 09:38:21 -07002851 assert(Allowed & Legal_Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07002852 // Go through the various types of operands: OperandARM32Mem,
2853 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
2854 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
2855 // can always copy to a register.
Jan Voungb3401d22015-05-18 09:38:21 -07002856 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07002857 // Before doing anything with a Mem operand, we need to ensure that the
2858 // Base and Index components are in physical registers.
Jan Voungb3401d22015-05-18 09:38:21 -07002859 Variable *Base = Mem->getBase();
2860 Variable *Index = Mem->getIndex();
2861 Variable *RegBase = nullptr;
2862 Variable *RegIndex = nullptr;
2863 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002864 RegBase = legalizeToReg(Base);
Jan Voungb3401d22015-05-18 09:38:21 -07002865 }
2866 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002867 RegIndex = legalizeToReg(Index);
Jan Voungb3401d22015-05-18 09:38:21 -07002868 }
2869 // Create a new operand if there was a change.
2870 if (Base != RegBase || Index != RegIndex) {
2871 // There is only a reg +/- reg or reg + imm form.
2872 // Figure out which to re-create.
2873 if (Mem->isRegReg()) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002874 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
Jan Voungb3401d22015-05-18 09:38:21 -07002875 Mem->getShiftOp(), Mem->getShiftAmt(),
2876 Mem->getAddrMode());
2877 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07002878 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(),
2879 Mem->getAddrMode());
Jan Voungb3401d22015-05-18 09:38:21 -07002880 }
2881 }
2882 if (!(Allowed & Legal_Mem)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002883 Variable *Reg = makeReg(Ty, RegNum);
Jan Voung86ebec12015-08-09 07:58:35 -07002884 if (isVectorType(Ty)) {
2885 UnimplementedError(Func->getContext()->getFlags());
2886 } else if (isFloatingType(Ty)) {
2887 _vldr(Reg, Mem);
2888 } else {
2889 _ldr(Reg, Mem);
2890 }
Jan Voungb3401d22015-05-18 09:38:21 -07002891 From = Reg;
2892 } else {
2893 From = Mem;
2894 }
2895 return From;
2896 }
2897
2898 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2899 if (!(Allowed & Legal_Flex)) {
2900 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2901 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2902 From = FlexReg->getReg();
Andrew Scull57e12682015-09-16 11:30:19 -07002903 // Fall through and let From be checked as a Variable below, where it
2904 // may or may not need a register.
Jan Voungb3401d22015-05-18 09:38:21 -07002905 } else {
2906 return copyToReg(Flex, RegNum);
2907 }
2908 } else {
2909 return copyToReg(Flex, RegNum);
2910 }
2911 } else {
2912 return From;
2913 }
2914 }
2915
2916 if (llvm::isa<Constant>(From)) {
2917 if (llvm::isa<ConstantUndef>(From)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002918 From = legalizeUndef(From, RegNum);
2919 if (isVectorType(Ty))
2920 return From;
Jan Voungb3401d22015-05-18 09:38:21 -07002921 }
2922 // There should be no constants of vector type (other than undef).
Jan Voungfbdd2442015-07-15 12:36:20 -07002923 assert(!isVectorType(Ty));
Jan Voungb3401d22015-05-18 09:38:21 -07002924 bool CanBeFlex = Allowed & Legal_Flex;
Jan Voungfbdd2442015-07-15 12:36:20 -07002925 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002926 uint32_t RotateAmt;
2927 uint32_t Immed_8;
2928 uint32_t Value = static_cast<uint32_t>(C32->getValue());
Andrew Scull57e12682015-09-16 11:30:19 -07002929 // Check if the immediate will fit in a Flexible second operand, if a
2930 // Flexible second operand is allowed. We need to know the exact value,
2931 // so that rules out relocatable constants. Also try the inverse and use
2932 // MVN if possible.
Jan Voungb3401d22015-05-18 09:38:21 -07002933 if (CanBeFlex &&
2934 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002935 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002936 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2937 ~Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002938 auto InvertedFlex =
2939 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002940 Variable *Reg = makeReg(Ty, RegNum);
2941 _mvn(Reg, InvertedFlex);
2942 return Reg;
2943 } else {
2944 // Do a movw/movt to a register.
Jan Voungb3401d22015-05-18 09:38:21 -07002945 Variable *Reg = makeReg(Ty, RegNum);
2946 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2947 _movw(Reg,
2948 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2949 if (UpperBits != 0) {
2950 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2951 }
2952 return Reg;
2953 }
Jan Voungfbdd2442015-07-15 12:36:20 -07002954 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002955 Variable *Reg = makeReg(Ty, RegNum);
2956 _movw(Reg, C);
2957 _movt(Reg, C);
2958 return Reg;
2959 } else {
Jan Voung86ebec12015-08-09 07:58:35 -07002960 assert(isScalarFloatingType(Ty));
Jan Voungb3401d22015-05-18 09:38:21 -07002961 // Load floats/doubles from literal pool.
Andrew Scull57e12682015-09-16 11:30:19 -07002962 // TODO(jvoung): Allow certain immediates to be encoded directly in an
2963 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
2964 // immediate constants". Or, for 32-bit floating point numbers, just
2965 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
2966 // instead of using a movw/movt pair to get the const-pool address then
2967 // loading to SREG.
Jan Voung86ebec12015-08-09 07:58:35 -07002968 std::string Buffer;
2969 llvm::raw_string_ostream StrBuf(Buffer);
2970 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
2971 llvm::cast<Constant>(From)->setShouldBePooled(true);
2972 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
2973 Variable *BaseReg = makeReg(getPointerType());
2974 _movw(BaseReg, Offset);
2975 _movt(BaseReg, Offset);
2976 From = formMemoryOperand(BaseReg, Ty);
2977 return copyToReg(From, RegNum);
Jan Voungb3401d22015-05-18 09:38:21 -07002978 }
Jan Voungb3401d22015-05-18 09:38:21 -07002979 }
2980
2981 if (auto Var = llvm::dyn_cast<Variable>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07002982 // Check if the variable is guaranteed a physical register. This can happen
2983 // either when the variable is pre-colored or when it is assigned infinite
2984 // weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07002985 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
Jan Voungb3401d22015-05-18 09:38:21 -07002986 // We need a new physical register for the operand if:
2987 // Mem is not allowed and Var isn't guaranteed a physical
2988 // register, or
2989 // RegNum is required and Var->getRegNum() doesn't match.
2990 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2991 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2992 From = copyToReg(From, RegNum);
2993 }
2994 return From;
2995 }
2996 llvm_unreachable("Unhandled operand kind in legalize()");
2997
2998 return From;
2999}
3000
Jan Voungfbdd2442015-07-15 12:36:20 -07003001/// Provide a trivial wrapper to legalize() for this common usage.
Andrew Scull97f460d2015-07-21 10:07:42 -07003002Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
Jan Voungb3401d22015-05-18 09:38:21 -07003003 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
3004}
3005
Jan Voungfbdd2442015-07-15 12:36:20 -07003006/// Legalize undef values to concrete values.
3007Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
3008 Type Ty = From->getType();
3009 if (llvm::isa<ConstantUndef>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07003010 // Lower undefs to zero. Another option is to lower undefs to an
3011 // uninitialized register; however, using an uninitialized register results
3012 // in less predictable code.
Jan Voungfbdd2442015-07-15 12:36:20 -07003013 //
Andrew Scull57e12682015-09-16 11:30:19 -07003014 // If in the future the implementation is changed to lower undef values to
3015 // uninitialized registers, a FakeDef will be needed:
3016 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
3017 // ensure that the live range of Reg is not overestimated. If the constant
3018 // being lowered is a 64 bit value, then the result should be split and the
3019 // lo and hi components will need to go in uninitialized registers.
Jan Voungfbdd2442015-07-15 12:36:20 -07003020 if (isVectorType(Ty))
3021 return makeVectorOfZeros(Ty, RegNum);
3022 return Ctx->getConstantZero(Ty);
3023 }
3024 return From;
3025}
3026
Jan Voungbefd03a2015-06-02 11:03:03 -07003027OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
3028 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
Andrew Scull57e12682015-09-16 11:30:19 -07003029 // It may be the case that address mode optimization already creates an
3030 // OperandARM32Mem, so in that case it wouldn't need another level of
3031 // transformation.
Jan Voungbefd03a2015-06-02 11:03:03 -07003032 if (Mem) {
3033 return llvm::cast<OperandARM32Mem>(legalize(Mem));
3034 }
Andrew Scull57e12682015-09-16 11:30:19 -07003035 // If we didn't do address mode optimization, then we only have a base/offset
3036 // to work with. ARM always requires a base register, so just use that to
3037 // hold the operand.
Andrew Scull97f460d2015-07-21 10:07:42 -07003038 Variable *Base = legalizeToReg(Operand);
Jan Voungbefd03a2015-06-02 11:03:03 -07003039 return OperandARM32Mem::create(
3040 Func, Ty, Base,
3041 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
3042}
3043
Jan Voungb3401d22015-05-18 09:38:21 -07003044Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
3045 // There aren't any 64-bit integer registers for ARM32.
3046 assert(Type != IceType_i64);
3047 Variable *Reg = Func->makeVariable(Type);
3048 if (RegNum == Variable::NoRegister)
Andrew Scull11c9a322015-08-28 14:24:14 -07003049 Reg->setMustHaveReg();
Jan Voungb3401d22015-05-18 09:38:21 -07003050 else
3051 Reg->setRegNum(RegNum);
3052 return Reg;
3053}
3054
Jan Voung55500db2015-05-26 14:25:40 -07003055void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
3056 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07003057 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07003058 uint32_t Immed_8;
3059 Operand *Mask;
Andrew Scull57e12682015-09-16 11:30:19 -07003060 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
3061 // it fits at all). Assume Align is usually small, in which case BIC works
3062 // better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07003063 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
3064 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
3065 _bic(Reg, Reg, Mask);
3066 } else {
3067 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
3068 _and(Reg, Reg, Mask);
3069 }
3070}
3071
Jan Voungb36ad9b2015-04-21 17:01:49 -07003072void TargetARM32::postLower() {
3073 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3074 return;
Jan Voungb3401d22015-05-18 09:38:21 -07003075 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07003076}
3077
3078void TargetARM32::makeRandomRegisterPermutation(
3079 llvm::SmallVectorImpl<int32_t> &Permutation,
Qining Luaee5fa82015-08-20 14:59:03 -07003080 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
Jan Voungb36ad9b2015-04-21 17:01:49 -07003081 (void)Permutation;
3082 (void)ExcludeRegisters;
Qining Luaee5fa82015-08-20 14:59:03 -07003083 (void)Salt;
Jan Voungb2d50842015-05-12 09:53:50 -07003084 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07003085}
3086
Jan Voung76bb0be2015-05-14 09:26:19 -07003087void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07003088 if (!BuildDefs::dump())
Jan Voung76bb0be2015-05-14 09:26:19 -07003089 return;
3090 Ostream &Str = Ctx->getStrEmit();
3091 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07003092}
3093
Jan Voung76bb0be2015-05-14 09:26:19 -07003094void TargetARM32::emit(const ConstantInteger64 *) const {
3095 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07003096}
Jan Voung76bb0be2015-05-14 09:26:19 -07003097
3098void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07003099 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07003100 UnimplementedError(Ctx->getFlags());
3101}
3102
3103void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07003104 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07003105 UnimplementedError(Ctx->getFlags());
3106}
3107
3108void TargetARM32::emit(const ConstantUndef *) const {
3109 llvm::report_fatal_error("undef value encountered by emitter.");
3110}
Jan Voungb36ad9b2015-04-21 17:01:49 -07003111
3112TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
3113 : TargetDataLowering(Ctx) {}
3114
John Porto8b1a7052015-06-17 13:20:08 -07003115void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
3116 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07003117 switch (Ctx->getFlags().getOutFileType()) {
3118 case FT_Elf: {
3119 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07003120 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07003121 } break;
3122 case FT_Asm:
3123 case FT_Iasm: {
3124 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
3125 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07003126 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07003127 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07003128 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07003129 }
3130 }
3131 } break;
3132 }
3133}
3134
John Porto0f86d032015-06-15 07:44:27 -07003135void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07003136 if (Ctx->getFlags().getDisableTranslation())
3137 return;
Jan Voungb2d50842015-05-12 09:53:50 -07003138 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07003139}
3140
Andrew Scull86df4e92015-07-30 13:54:44 -07003141void TargetDataARM32::lowerJumpTables() {
3142 if (Ctx->getFlags().getDisableTranslation())
3143 return;
3144 UnimplementedError(Ctx->getFlags());
3145}
3146
Jan Voungfb792842015-06-11 15:27:50 -07003147TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung6ec369e2015-06-30 11:03:15 -07003148 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voungfb792842015-06-11 15:27:50 -07003149
3150void TargetHeaderARM32::lower() {
3151 OstreamLocker L(Ctx);
3152 Ostream &Str = Ctx->getStrEmit();
3153 Str << ".syntax unified\n";
Andrew Scull57e12682015-09-16 11:30:19 -07003154 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
3155 // "Addenda to, and Errata in the ABI for the ARM architecture"
3156 // http://infocenter.arm.com
3157 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
Jan Voungfb792842015-06-11 15:27:50 -07003158 //
Andrew Scull57e12682015-09-16 11:30:19 -07003159 // Tag_conformance should be be emitted first in a file-scope sub-subsection
3160 // of the first public subsection of the attributes.
Jan Voungfb792842015-06-11 15:27:50 -07003161 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
Andrew Scull57e12682015-09-16 11:30:19 -07003162 // Chromebooks are at least A15, but do A9 for higher compat. For some
3163 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
3164 // specified on the commandline. So to test hwdiv, we need to set the .cpu
3165 // directive higher (can't just rely on --mattr=...).
Jan Voung6ec369e2015-06-30 11:03:15 -07003166 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3167 Str << ".cpu cortex-a15\n";
3168 } else {
3169 Str << ".cpu cortex-a9\n";
3170 }
3171 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voungfb792842015-06-11 15:27:50 -07003172 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
3173 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
3174 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voungfb792842015-06-11 15:27:50 -07003175 Str << ".fpu neon\n"
3176 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
3177 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
3178 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
3179 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
3180 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
3181 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
3182 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
3183 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
3184 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
3185 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
3186 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
3187 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung6ec369e2015-06-30 11:03:15 -07003188 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3189 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3190 }
Jan Voungfb792842015-06-11 15:27:50 -07003191 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3192 // However, for compatibility with current NaCl LLVM, don't claim that.
3193 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3194}
3195
Jan Voungb36ad9b2015-04-21 17:01:49 -07003196} // end of namespace Ice