Jim Stichnoth | b9a8472 | 2016-08-01 13:18:36 -0700 | [diff] [blame] | 1 | //===- subzero/src/IceVariableSplitting.cpp - Local variable splitting ----===// |
| 2 | // |
| 3 | // The Subzero Code Generator |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief Aggressive block-local variable splitting to improve linear-scan |
| 12 | /// register allocation. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "IceVariableSplitting.h" |
| 17 | |
| 18 | #include "IceCfg.h" |
| 19 | #include "IceCfgNode.h" |
| 20 | #include "IceClFlags.h" |
| 21 | #include "IceInst.h" |
| 22 | #include "IceOperand.h" |
| 23 | #include "IceTargetLowering.h" |
| 24 | |
| 25 | namespace Ice { |
| 26 | |
| 27 | namespace { |
| 28 | |
| 29 | /// A Variable is "allocable" if it is a register allocation candidate but |
| 30 | /// doesn't already have a register. |
| 31 | bool isAllocable(const Variable *Var) { |
| 32 | if (Var == nullptr) |
| 33 | return false; |
| 34 | return !Var->hasReg() && Var->mayHaveReg(); |
| 35 | } |
| 36 | |
| 37 | /// A Variable is "inf" if it already has a register or is infinite-weight. |
| 38 | bool isInf(const Variable *Var) { |
| 39 | if (Var == nullptr) |
| 40 | return false; |
| 41 | return Var->hasReg() || Var->mustHaveReg(); |
| 42 | } |
| 43 | |
| 44 | /// VariableMap is a simple helper class that keeps track of the latest split |
| 45 | /// version of the original Variables, as well as the instruction containing the |
| 46 | /// last use of the Variable within the current block. For each entry, the |
| 47 | /// Variable is tagged with the CfgNode that it is valid in, so that we don't |
| 48 | /// need to clear the entire Map[] vector for each block. |
| 49 | class VariableMap { |
| 50 | private: |
| 51 | VariableMap() = delete; |
| 52 | VariableMap(const VariableMap &) = delete; |
| 53 | VariableMap &operator=(const VariableMap &) = delete; |
| 54 | |
| 55 | struct VarInfo { |
| 56 | /// MappedVar is the latest mapped/split version of the Variable. |
| 57 | Variable *MappedVar = nullptr; |
| 58 | /// MappedVarNode is the block in which MappedVar is valid. |
| 59 | const CfgNode *MappedVarNode = nullptr; |
| 60 | /// LastUseInst is the last instruction in the block that uses the Variable |
| 61 | /// as a source operand. |
| 62 | const Inst *LastUseInst = nullptr; |
| 63 | /// LastUseNode is the block in which LastUseInst is valid. |
| 64 | const CfgNode *LastUseNode = nullptr; |
| 65 | VarInfo() = default; |
| 66 | |
| 67 | private: |
| 68 | VarInfo(const VarInfo &) = delete; |
| 69 | VarInfo &operator=(const VarInfo &) = delete; |
| 70 | }; |
| 71 | |
| 72 | public: |
| 73 | explicit VariableMap(Cfg *Func) |
| 74 | : Func(Func), NumVars(Func->getNumVariables()), Map(NumVars) {} |
| 75 | /// Reset the mappings at the start of a block. |
| 76 | void reset(const CfgNode *CurNode) { |
| 77 | Node = CurNode; |
| 78 | // Do a prepass through all the instructions, marking which instruction is |
| 79 | // the last use of each Variable within the block. |
| 80 | for (const Inst &Instr : Node->getInsts()) { |
| 81 | if (Instr.isDeleted()) |
| 82 | continue; |
| 83 | for (SizeT i = 0; i < Instr.getSrcSize(); ++i) { |
| 84 | if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr.getSrc(i))) { |
| 85 | const SizeT VarNum = getVarNum(SrcVar); |
| 86 | Map[VarNum].LastUseInst = &Instr; |
| 87 | Map[VarNum].LastUseNode = Node; |
| 88 | } |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | /// Get Var's current mapping (or Var itself if it has no mapping yet). |
| 93 | Variable *get(Variable *Var) const { |
| 94 | const SizeT VarNum = getVarNum(Var); |
| 95 | Variable *MappedVar = Map[VarNum].MappedVar; |
| 96 | if (MappedVar == nullptr) |
| 97 | return Var; |
| 98 | if (Map[VarNum].MappedVarNode != Node) |
| 99 | return Var; |
| 100 | return MappedVar; |
| 101 | } |
| 102 | /// Create a new linked Variable in the LinkedTo chain, and set it as Var's |
| 103 | /// latest mapping. |
| 104 | Variable *makeLinked(Variable *Var) { |
| 105 | Variable *NewVar = Func->makeVariable(Var->getType()); |
| 106 | NewVar->setRegClass(Var->getRegClass()); |
| 107 | NewVar->setLinkedTo(get(Var)); |
| 108 | const SizeT VarNum = getVarNum(Var); |
| 109 | Map[VarNum].MappedVar = NewVar; |
| 110 | Map[VarNum].MappedVarNode = Node; |
| 111 | return NewVar; |
| 112 | } |
| 113 | /// Given Var that is LinkedTo some other variable, re-splice it into the |
| 114 | /// LinkedTo chain so that the chain is ordered by Variable::getIndex(). |
| 115 | void spliceBlockLocalLinkedToChain(Variable *Var) { |
| 116 | Variable *LinkedTo = Var->getLinkedTo(); |
| 117 | assert(LinkedTo != nullptr); |
| 118 | assert(Var->getIndex() > LinkedTo->getIndex()); |
| 119 | const SizeT VarNum = getVarNum(LinkedTo); |
| 120 | Variable *Link = Map[VarNum].MappedVar; |
| 121 | if (Link == nullptr || Map[VarNum].MappedVarNode != Node) |
| 122 | return; |
| 123 | Variable *LinkParent = Link->getLinkedTo(); |
| 124 | while (LinkParent != nullptr && LinkParent->getIndex() >= Var->getIndex()) { |
| 125 | Link = LinkParent; |
| 126 | LinkParent = Link->getLinkedTo(); |
| 127 | } |
| 128 | Var->setLinkedTo(LinkParent); |
| 129 | Link->setLinkedTo(Var); |
| 130 | } |
| 131 | /// Return whether the given Variable has any uses as a source operand within |
| 132 | /// the current block. If it has no source operand uses, but is assigned as a |
| 133 | /// dest variable in some instruction in the block, then we needn't bother |
| 134 | /// splitting it. |
| 135 | bool isDestUsedInBlock(const Variable *Dest) const { |
| 136 | return Map[getVarNum(Dest)].LastUseNode == Node; |
| 137 | } |
| 138 | /// Return whether the given instruction is the last use of the given Variable |
| 139 | /// within the current block. If it is, then we needn't bother splitting the |
| 140 | /// Variable at this instruction. |
| 141 | bool isInstLastUseOfVar(const Variable *Var, const Inst *Instr) { |
| 142 | return Map[getVarNum(Var)].LastUseInst == Instr; |
| 143 | } |
| 144 | |
| 145 | private: |
| 146 | Cfg *const Func; |
| 147 | // NumVars is for the size of the Map array. It can be const because any new |
| 148 | // Variables created during the splitting pass don't need to be mapped. |
| 149 | const SizeT NumVars; |
| 150 | CfgVector<VarInfo> Map; |
| 151 | const CfgNode *Node = nullptr; |
| 152 | /// Get Var's VarNum, and do some validation. |
| 153 | SizeT getVarNum(const Variable *Var) const { |
| 154 | const SizeT VarNum = Var->getIndex(); |
| 155 | assert(VarNum < NumVars); |
| 156 | return VarNum; |
| 157 | } |
| 158 | }; |
| 159 | |
| 160 | /// LocalVariableSplitter tracks the necessary splitting state across |
| 161 | /// instructions. |
| 162 | class LocalVariableSplitter { |
| 163 | LocalVariableSplitter() = delete; |
| 164 | LocalVariableSplitter(const LocalVariableSplitter &) = delete; |
| 165 | LocalVariableSplitter &operator=(const LocalVariableSplitter &) = delete; |
| 166 | |
| 167 | public: |
| 168 | explicit LocalVariableSplitter(Cfg *Func) |
| 169 | : Target(Func->getTarget()), VarMap(Func) {} |
| 170 | /// setNode() is called before processing the instructions of a block. |
| 171 | void setNode(CfgNode *CurNode) { |
| 172 | Node = CurNode; |
| 173 | VarMap.reset(Node); |
| 174 | LinkedToFixups.clear(); |
| 175 | } |
| 176 | /// finalizeNode() is called after all instructions in the block are |
| 177 | /// processed. |
| 178 | void finalizeNode() { |
| 179 | // Splice in any preexisting LinkedTo links into the single chain. These |
| 180 | // are the ones that were recorded during setInst(). |
| 181 | for (Variable *Var : LinkedToFixups) { |
| 182 | VarMap.spliceBlockLocalLinkedToChain(Var); |
| 183 | } |
| 184 | } |
| 185 | /// setInst() is called before processing the next instruction. The iterators |
| 186 | /// are the insertion points for a new instructions, depending on whether the |
| 187 | /// new instruction should be inserted before or after the current |
| 188 | /// instruction. |
| 189 | void setInst(Inst *CurInst, InstList::iterator Cur, InstList::iterator Next) { |
| 190 | Instr = CurInst; |
| 191 | Dest = Instr->getDest(); |
| 192 | IterCur = Cur; |
| 193 | IterNext = Next; |
| 194 | ShouldSkipRemainingInstructions = false; |
| 195 | // Note any preexisting LinkedTo relationships that were created during |
| 196 | // target lowering. Record them in LinkedToFixups which is then processed |
| 197 | // in finalizeNode(). |
| 198 | if (Dest != nullptr && Dest->getLinkedTo() != nullptr) { |
| 199 | LinkedToFixups.emplace_back(Dest); |
| 200 | } |
| 201 | } |
| 202 | bool shouldSkipRemainingInstructions() const { |
| 203 | return ShouldSkipRemainingInstructions; |
| 204 | } |
| 205 | bool isUnconditionallyExecuted() const { return WaitingForLabel == nullptr; } |
| 206 | |
| 207 | /// Note: the handle*() functions return true to indicate that the instruction |
| 208 | /// has now been handled and that the instruction loop should continue to the |
| 209 | /// next instruction in the block (and return false otherwise). In addition, |
| 210 | /// they set the ShouldSkipRemainingInstructions flag to indicate that no more |
| 211 | /// instructions in the block should be processed. |
| 212 | |
| 213 | /// Handle an "unwanted" instruction by returning true; |
| 214 | bool handleUnwantedInstruction() { |
| 215 | // We can limit the splitting to an arbitrary subset of the instructions, |
| 216 | // and still expect correct code. As such, we can do instruction-subset |
| 217 | // bisection to help debug any problems in this pass. |
| 218 | static constexpr char AnInstructionHasNoName[] = ""; |
| 219 | if (!BuildDefs::minimal() && |
| 220 | !getFlags().matchSplitInsts(AnInstructionHasNoName, |
| 221 | Instr->getNumber())) { |
| 222 | return true; |
| 223 | } |
| 224 | if (!llvm::isa<InstTarget>(Instr)) { |
| 225 | // Ignore non-lowered instructions like FakeDef/FakeUse. |
| 226 | return true; |
| 227 | } |
| 228 | return false; |
| 229 | } |
| 230 | |
| 231 | /// Process a potential label instruction. |
| 232 | bool handleLabel() { |
| 233 | if (!Instr->isLabel()) |
| 234 | return false; |
| 235 | // A Label instruction shouldn't have any operands, so it can be handled |
| 236 | // right here and then move on. |
| 237 | assert(Dest == nullptr); |
| 238 | assert(Instr->getSrcSize() == 0); |
| 239 | if (Instr == WaitingForLabel) { |
| 240 | // If we found the forward-branch-target Label instruction we're waiting |
| 241 | // for, then clear the WaitingForLabel state. |
| 242 | WaitingForLabel = nullptr; |
| 243 | } else if (WaitingForLabel == nullptr && WaitingForBranchTo == nullptr) { |
| 244 | // If we found a new Label instruction while the WaitingFor* state is |
| 245 | // clear, then set things up for this being a backward branch target. |
| 246 | WaitingForBranchTo = Instr; |
| 247 | } else { |
| 248 | // We see something we don't understand, so skip to the next block. |
| 249 | ShouldSkipRemainingInstructions = true; |
| 250 | } |
| 251 | return true; |
| 252 | } |
| 253 | |
| 254 | /// Process a potential intra-block branch instruction. |
| 255 | bool handleIntraBlockBranch() { |
| 256 | const Inst *Label = Instr->getIntraBlockBranchTarget(); |
| 257 | if (Label == nullptr) |
| 258 | return false; |
| 259 | // An intra-block branch instruction shouldn't have any operands, so it can |
| 260 | // be handled right here and then move on. |
| 261 | assert(Dest == nullptr); |
| 262 | assert(Instr->getSrcSize() == 0); |
| 263 | if (WaitingForBranchTo == Label && WaitingForLabel == nullptr) { |
| 264 | WaitingForBranchTo = nullptr; |
| 265 | } else if (WaitingForBranchTo == nullptr && |
| 266 | (WaitingForLabel == nullptr || WaitingForLabel == Label)) { |
| 267 | WaitingForLabel = Label; |
| 268 | } else { |
| 269 | // We see something we don't understand, so skip to the next block. |
| 270 | ShouldSkipRemainingInstructions = true; |
| 271 | } |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | /// Specially process a potential "Variable=Variable" assignment instruction, |
| 276 | /// when it conforms to certain patterns. |
| 277 | bool handleSimpleVarAssign() { |
| 278 | if (!Instr->isVarAssign()) |
| 279 | return false; |
| 280 | const bool DestIsInf = isInf(Dest); |
| 281 | const bool DestIsAllocable = isAllocable(Dest); |
| 282 | auto *SrcVar = llvm::cast<Variable>(Instr->getSrc(0)); |
| 283 | const bool SrcIsInf = isInf(SrcVar); |
| 284 | const bool SrcIsAllocable = isAllocable(SrcVar); |
| 285 | if (DestIsInf && SrcIsInf) { |
| 286 | // The instruction: |
| 287 | // t:inf = u:inf |
| 288 | // No transformation is needed. |
| 289 | return true; |
| 290 | } |
| 291 | if (DestIsInf && SrcIsAllocable && Dest->getType() == SrcVar->getType()) { |
| 292 | // The instruction: |
| 293 | // t:inf = v |
| 294 | // gets transformed to: |
| 295 | // t:inf = v1 |
| 296 | // v2 = t:inf |
| 297 | // where: |
| 298 | // v1 := map[v] |
| 299 | // v2 := linkTo(v) |
| 300 | // map[v] := v2 |
| 301 | // |
| 302 | // If both v2 and its linkedToStackRoot get a stack slot, then "v2=t:inf" |
| 303 | // is recognized as a redundant assignment and elided. |
| 304 | // |
| 305 | // Note that if the dest and src types are different, then this is |
| 306 | // actually a truncation operation, which would make "v2=t:inf" an invalid |
| 307 | // instruction. In this case, the type test will make it fall through to |
| 308 | // the general case below. |
| 309 | Variable *OldMapped = VarMap.get(SrcVar); |
| 310 | Instr->replaceSource(0, OldMapped); |
| 311 | if (isUnconditionallyExecuted()) { |
| 312 | // Only create new mapping state if the instruction is unconditionally |
| 313 | // executed. |
| 314 | if (!VarMap.isInstLastUseOfVar(SrcVar, Instr)) { |
| 315 | Variable *NewMapped = VarMap.makeLinked(SrcVar); |
| 316 | Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
| 317 | Node->getInsts().insert(IterNext, Mov); |
| 318 | } |
| 319 | } |
| 320 | return true; |
| 321 | } |
| 322 | if (DestIsAllocable && SrcIsInf) { |
| 323 | if (!VarMap.isDestUsedInBlock(Dest)) { |
| 324 | return true; |
| 325 | } |
| 326 | // The instruction: |
| 327 | // v = t:inf |
| 328 | // gets transformed to: |
| 329 | // v = t:inf |
| 330 | // v2 = t:inf |
| 331 | // where: |
| 332 | // v2 := linkTo(v) |
| 333 | // map[v] := v2 |
| 334 | // |
| 335 | // If both v2 and v get a stack slot, then "v2=t:inf" is recognized as a |
| 336 | // redundant assignment and elided. |
| 337 | if (isUnconditionallyExecuted()) { |
| 338 | // Only create new mapping state if the instruction is unconditionally |
| 339 | // executed. |
| 340 | Variable *NewMapped = VarMap.makeLinked(Dest); |
| 341 | Inst *Mov = Target->createLoweredMove(NewMapped, SrcVar); |
| 342 | Node->getInsts().insert(IterNext, Mov); |
| 343 | } else { |
| 344 | // For a conditionally executed instruction, add a redefinition of the |
| 345 | // original Dest mapping, without creating a new linked variable. |
| 346 | Variable *OldMapped = VarMap.get(Dest); |
| 347 | Inst *Mov = Target->createLoweredMove(OldMapped, SrcVar); |
| 348 | Mov->setDestRedefined(); |
| 349 | Node->getInsts().insert(IterNext, Mov); |
| 350 | } |
| 351 | return true; |
| 352 | } |
| 353 | assert(!ShouldSkipRemainingInstructions); |
| 354 | return false; |
| 355 | } |
| 356 | |
| 357 | /// Process the dest Variable of a Phi instruction. |
| 358 | bool handlePhi() { |
| 359 | assert(llvm::isa<InstPhi>(Instr)); |
| 360 | const bool DestIsAllocable = isAllocable(Dest); |
| 361 | if (!DestIsAllocable) |
| 362 | return true; |
| 363 | if (!VarMap.isDestUsedInBlock(Dest)) |
| 364 | return true; |
| 365 | Variable *NewMapped = VarMap.makeLinked(Dest); |
| 366 | Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
| 367 | Node->getInsts().insert(IterCur, Mov); |
| 368 | return true; |
| 369 | } |
| 370 | |
| 371 | /// Process an arbitrary instruction. |
| 372 | bool handleGeneralInst() { |
| 373 | const bool DestIsAllocable = isAllocable(Dest); |
| 374 | // The (non-variable-assignment) instruction: |
| 375 | // ... = F(v) |
| 376 | // where v is not infinite-weight, gets transformed to: |
| 377 | // v2 = v1 |
| 378 | // ... = F(v1) |
| 379 | // where: |
| 380 | // v1 := map[v] |
| 381 | // v2 := linkTo(v) |
| 382 | // map[v] := v2 |
| 383 | // After that, if the "..." dest=u is not infinite-weight, append: |
| 384 | // u2 = u |
| 385 | // where: |
| 386 | // u2 := linkTo(u) |
| 387 | // map[u] := u2 |
| 388 | for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { |
| 389 | // Iterate over the top-level src vars. Don't bother to dig into |
| 390 | // e.g. MemOperands because their vars should all be infinite-weight. |
| 391 | // (This assumption would need to change if the pass were done |
| 392 | // pre-lowering.) |
| 393 | if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { |
| 394 | const bool SrcIsAllocable = isAllocable(SrcVar); |
| 395 | if (SrcIsAllocable) { |
| 396 | Variable *OldMapped = VarMap.get(SrcVar); |
| 397 | if (isUnconditionallyExecuted()) { |
| 398 | if (!VarMap.isInstLastUseOfVar(SrcVar, Instr)) { |
| 399 | Variable *NewMapped = VarMap.makeLinked(SrcVar); |
| 400 | Inst *Mov = Target->createLoweredMove(NewMapped, OldMapped); |
| 401 | Node->getInsts().insert(IterCur, Mov); |
| 402 | } |
| 403 | } |
| 404 | Instr->replaceSource(i, OldMapped); |
| 405 | } |
| 406 | } |
| 407 | } |
| 408 | // Transformation of Dest is the same as the "v=t:inf" case above. |
| 409 | if (DestIsAllocable && VarMap.isDestUsedInBlock(Dest)) { |
| 410 | if (isUnconditionallyExecuted()) { |
| 411 | Variable *NewMapped = VarMap.makeLinked(Dest); |
| 412 | Inst *Mov = Target->createLoweredMove(NewMapped, Dest); |
| 413 | Node->getInsts().insert(IterNext, Mov); |
| 414 | } else { |
| 415 | Variable *OldMapped = VarMap.get(Dest); |
| 416 | Inst *Mov = Target->createLoweredMove(OldMapped, Dest); |
| 417 | Mov->setDestRedefined(); |
| 418 | Node->getInsts().insert(IterNext, Mov); |
| 419 | } |
| 420 | } |
| 421 | return true; |
| 422 | } |
| 423 | |
| 424 | private: |
| 425 | TargetLowering *Target; |
| 426 | CfgNode *Node = nullptr; |
| 427 | Inst *Instr = nullptr; |
| 428 | Variable *Dest = nullptr; |
| 429 | InstList::iterator IterCur; |
| 430 | InstList::iterator IterNext; |
| 431 | bool ShouldSkipRemainingInstructions = false; |
| 432 | VariableMap VarMap; |
| 433 | CfgVector<Variable *> LinkedToFixups; |
| 434 | /// WaitingForLabel and WaitingForBranchTo are for tracking intra-block |
| 435 | /// control flow. |
| 436 | const Inst *WaitingForLabel = nullptr; |
| 437 | const Inst *WaitingForBranchTo = nullptr; |
| 438 | }; |
| 439 | |
| 440 | } // end of anonymous namespace |
| 441 | |
| 442 | /// Within each basic block, rewrite Variable references in terms of chained |
| 443 | /// copies of the original Variable. For example: |
| 444 | /// A = B + C |
| 445 | /// might be rewritten as: |
| 446 | /// B1 = B |
| 447 | /// C1 = C |
| 448 | /// A = B + C |
| 449 | /// A1 = A |
| 450 | /// and then: |
| 451 | /// D = A + B |
| 452 | /// might be rewritten as: |
| 453 | /// A2 = A1 |
| 454 | /// B2 = B1 |
| 455 | /// D = A1 + B1 |
| 456 | /// D1 = D |
| 457 | /// |
| 458 | /// The purpose is to present the linear-scan register allocator with smaller |
| 459 | /// live ranges, to help mitigate its "all or nothing" allocation strategy, |
| 460 | /// while counting on its preference mechanism to keep the split versions in the |
| 461 | /// same register when possible. |
| 462 | /// |
| 463 | /// When creating new Variables, A2 is linked to A1 which is linked to A, and |
| 464 | /// similar for the other Variable linked-to chains. Rewrites apply only to |
| 465 | /// Variables where mayHaveReg() is true. |
| 466 | /// |
| 467 | /// At code emission time, redundant linked-to stack assignments will be |
| 468 | /// recognized and elided. To illustrate using the above example, if A1 gets a |
| 469 | /// register but A and A2 are on the stack, the "A2=A1" store instruction is |
| 470 | /// redundant since A and A2 share the same stack slot and A1 originated from A. |
| 471 | /// |
| 472 | /// Simple assignment instructions are rewritten slightly differently, to take |
| 473 | /// maximal advantage of Variables known to have registers. |
| 474 | /// |
| 475 | /// In general, there may be several valid ways to rewrite an instruction: add |
| 476 | /// the new assignment instruction either before or after the original |
| 477 | /// instruction, and rewrite the original instruction with either the old or the |
| 478 | /// new variable mapping. We try to pick a strategy most likely to avoid |
| 479 | /// potential performance problems. For example, try to avoid storing to the |
| 480 | /// stack and then immediately reloading from the same location. One |
| 481 | /// consequence is that code might be generated that loads a register from a |
| 482 | /// stack location, followed almost immediately by another use of the same stack |
| 483 | /// location, despite its value already being available in a register as a |
| 484 | /// result of the first instruction. However, the performance impact here is |
| 485 | /// likely to be negligible, and a simple availability peephole optimization |
| 486 | /// could clean it up. |
| 487 | /// |
| 488 | /// This pass potentially adds a lot of new instructions and variables, and as |
| 489 | /// such there are compile-time performance concerns, particularly with liveness |
| 490 | /// analysis and register allocation. Note that for liveness analysis, the new |
| 491 | /// variables have single-block liveness, so they don't increase the size of the |
| 492 | /// liveness bit vectors that need to be merged across blocks. As a result, the |
| 493 | /// performance impact is likely to be linearly related to the number of new |
| 494 | /// instructions, rather than number of new variables times number of blocks |
| 495 | /// which would be the case if they were multi-block variables. |
| 496 | void splitBlockLocalVariables(Cfg *Func) { |
| 497 | if (!getFlags().getSplitLocalVars()) |
| 498 | return; |
| 499 | TimerMarker _(TimerStack::TT_splitLocalVars, Func); |
| 500 | LocalVariableSplitter Splitter(Func); |
| 501 | // TODO(stichnot): Fix this mechanism for LinkedTo variables and stack slot |
| 502 | // assignment. |
| 503 | // |
| 504 | // To work around shortcomings with stack frame mapping, we want to arrange |
| 505 | // LinkedTo structure such that within one block, the LinkedTo structure |
| 506 | // leading to a root forms a list, not a tree. A LinkedTo root can have |
| 507 | // multiple children linking to it, but only one per block. Furthermore, |
| 508 | // because stack slot mapping processes variables in numerical order, the |
| 509 | // LinkedTo chain needs to be ordered such that when A->getLinkedTo() == B, |
| 510 | // then A->getIndex() > B->getIndex(). |
| 511 | // |
| 512 | // To effect this, while processing a block we keep track of preexisting |
| 513 | // LinkedTo relationships via the LinkedToFixups vector, and at the end of the |
| 514 | // block we splice them in such that the block has a single chain for each |
| 515 | // root, ordered by getIndex() value. |
| 516 | CfgVector<Variable *> LinkedToFixups; |
| 517 | for (CfgNode *Node : Func->getNodes()) { |
| 518 | // Clear the VarMap and LinkedToFixups at the start of every block. |
| 519 | LinkedToFixups.clear(); |
| 520 | Splitter.setNode(Node); |
| 521 | auto &Insts = Node->getInsts(); |
| 522 | auto Iter = Insts.begin(); |
| 523 | auto IterEnd = Insts.end(); |
| 524 | // TODO(stichnot): Figure out why Phi processing usually degrades |
| 525 | // performance. Disable for now. |
| 526 | static constexpr bool ProcessPhis = false; |
| 527 | if (ProcessPhis) { |
| 528 | for (Inst &Instr : Node->getPhis()) { |
| 529 | if (Instr.isDeleted()) |
| 530 | continue; |
| 531 | Splitter.setInst(&Instr, Iter, Iter); |
| 532 | Splitter.handlePhi(); |
| 533 | } |
| 534 | } |
| 535 | InstList::iterator NextIter; |
| 536 | for (; Iter != IterEnd && !Splitter.shouldSkipRemainingInstructions(); |
| 537 | Iter = NextIter) { |
| 538 | NextIter = Iter; |
| 539 | ++NextIter; |
| 540 | Inst *Instr = iteratorToInst(Iter); |
| 541 | if (Instr->isDeleted()) |
| 542 | continue; |
| 543 | Splitter.setInst(Instr, Iter, NextIter); |
| 544 | |
| 545 | // Before doing any transformations, take care of the bookkeeping for |
| 546 | // intra-block branching. |
| 547 | // |
| 548 | // This is tricky because the transformation for one instruction may |
| 549 | // depend on a transformation for a previous instruction, but if that |
| 550 | // previous instruction is not dynamically executed due to intra-block |
| 551 | // control flow, it may lead to an inconsistent state and incorrect code. |
| 552 | // |
| 553 | // We want to handle some simple cases, and reject some others: |
| 554 | // |
| 555 | // 1. For something like a select instruction, we could have: |
| 556 | // test cond |
| 557 | // dest = src_false |
| 558 | // branch conditionally to label |
| 559 | // dest = src_true |
| 560 | // label: |
| 561 | // |
| 562 | // Between the conditional branch and the label, we need to treat dest and |
| 563 | // src variables specially, specifically not creating any new state. |
| 564 | // |
| 565 | // 2. Some 64-bit atomic instructions may be lowered to a loop: |
| 566 | // label: |
| 567 | // ... |
| 568 | // branch conditionally to label |
| 569 | // |
| 570 | // No special treatment is needed, but it's worth tracking so that case #1 |
| 571 | // above can also be handled. |
| 572 | // |
| 573 | // 3. Advanced switch lowering can create really complex intra-block |
| 574 | // control flow, so when we recognize this, we should just stop splitting |
| 575 | // for the remainder of the block (which isn't much since a switch |
| 576 | // instruction is a terminator). |
| 577 | // |
| 578 | // 4. Other complex lowering, e.g. an i64 icmp on a 32-bit architecture, |
| 579 | // can result in an if/then/else like structure with two labels. One |
| 580 | // possibility would be to suspect splitting for the remainder of the |
| 581 | // lowered instruction, and then resume for the remainder of the block, |
| 582 | // but since we don't have high-level instruction markers, we might as |
| 583 | // well just stop splitting for the remainder of the block. |
| 584 | if (Splitter.handleLabel()) |
| 585 | continue; |
| 586 | if (Splitter.handleIntraBlockBranch()) |
| 587 | continue; |
| 588 | if (Splitter.handleUnwantedInstruction()) |
| 589 | continue; |
| 590 | |
| 591 | // Intra-block bookkeeping is complete, now do the transformations. |
| 592 | |
| 593 | // Determine the transformation based on the kind of instruction, and |
| 594 | // whether its Variables are infinite-weight. New instructions can be |
| 595 | // inserted before the current instruction via Iter, or after the current |
| 596 | // instruction via NextIter. |
| 597 | if (Splitter.handleSimpleVarAssign()) |
| 598 | continue; |
| 599 | if (Splitter.handleGeneralInst()) |
| 600 | continue; |
| 601 | } |
| 602 | Splitter.finalizeNode(); |
| 603 | } |
| 604 | |
| 605 | Func->dump("After splitting local variables"); |
| 606 | } |
| 607 | |
| 608 | } // end of namespace Ice |