third_party/llvm-16.0/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp - SwiftShader - Git at Google

 //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file implements the idea to rewrite undef incoming operand for certain
 // PHIs in structurized CFG. This pass only works on IR that has gone through
 // StructurizedCFG pass, and this pass has some additional limitation that make
 // it can only run after SIAnnotateControlFlow.
 //
 // To achieve optimal code generation for AMDGPU, we assume that divergence
 // analysis reports the PHI in join block of divergent branch as uniform if
 // it has one unique uniform value plus additional undefined/poisoned incoming
 // value. That is to say the later compiler pipeline will ensure such PHI always
 // return uniform value and ensure it work correctly. Let's take a look at two
 // typical patterns in structured CFG that need to be taken care: (In both
 // patterns, block %if terminate with divergent branch.)
 //
 // Pattern A: Block with undefined incoming value dominates defined predecessor
 //  %if
 //  | \
 //  | %then
 //  | /
 //  %endif: %phi = phi [%undef, %if], [%uniform, %then]
 //
 //  Pattern B: Block with defined incoming value dominates undefined predecessor
 //  %if
 //  | \
 //  | %then
 //  | /
 //  %endif: %phi = phi [%uniform, %if], [%undef, %then]
 //
 // For pattern A, by reporting %phi as uniform, the later pipeline need to make
 // sure it be handled correctly. The backend usually allocates a scalar register
 // and if any thread in a wave takes %then path, the scalar register will get
 // the %uniform value.
 //
 // For pattern B, we will replace the undef operand with the other defined value
 // in this pass. So the scalar register allocated for such PHI will get correct
 // liveness. Without this transformation, the scalar register may be overwritten
 // in the %then block.
 //
 // Limitation note:
 // If the join block of divergent threads is a loop header, the pass cannot
 // handle it correctly right now. For below case, the undef in %phi should also
 // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
 // block to get a separate join block, then we can rewrite the undef correctly.
 //     %if
 //     | \
 //     | %then
 //     | /
 // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
 // |   |
 // \---

 #include "AMDGPU.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"

 namespace {

 class AMDGPURewriteUndefForPHI : public FunctionPass {
 public:
   static char ID;
   AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
     initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
   }
   bool runOnFunction(Function &F) override;
   StringRef getPassName() const override {
     return "AMDGPU Rewrite Undef for PHI";
   }

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addRequired<DominatorTreeWrapperPass>();

     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<LegacyDivergenceAnalysis>();
     AU.setPreservesCFG();
   }
 };

 } // end anonymous namespace
 char AMDGPURewriteUndefForPHI::ID = 0;

 INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
                       "Rewrite undef for PHI", false, false)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
                     "Rewrite undef for PHI", false, false)

 bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
   bool Changed = false;
   SmallVector<PHINode *> ToBeDeleted;
   for (auto &BB : F) {
     for (auto &PHI : BB.phis()) {
       if (DA->isDivergent(&PHI))
         continue;

       // The unique incoming value except undef/poison for the PHI node.
       Value *UniqueDefinedIncoming = nullptr;
       // The divergent block with defined incoming value that dominates all
       // other block with the same incoming value.
       BasicBlock *DominateBB = nullptr;
       // Predecessors with undefined incoming value (excluding loop backedge).
       SmallVector<BasicBlock *> Undefs;

       for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
         Value *Incoming = PHI.getIncomingValue(i);
         BasicBlock *IncomingBB = PHI.getIncomingBlock(i);

         if (Incoming == &PHI)
           continue;

         if (isa<UndefValue>(Incoming)) {
           // Undef from loop backedge will not be replaced.
           if (!DT->dominates(&BB, IncomingBB))
             Undefs.push_back(IncomingBB);
           continue;
         }

         if (!UniqueDefinedIncoming) {
           UniqueDefinedIncoming = Incoming;
           DominateBB = IncomingBB;
         } else if (Incoming == UniqueDefinedIncoming) {
           // Update DominateBB if necessary.
           if (DT->dominates(IncomingBB, DominateBB))
             DominateBB = IncomingBB;
         } else {
           UniqueDefinedIncoming = nullptr;
           break;
         }
       }
       // We only need to replace the undef for the PHI which is merging
       // defined/undefined values from divergent threads.
       // TODO: We should still be able to replace undef value if the unique
       // value is a Constant.
       if (!UniqueDefinedIncoming || Undefs.empty() ||
           !DA->isDivergent(DominateBB->getTerminator()))
         continue;

       // We only replace the undef when DominateBB truly dominates all the
       // other predecessors with undefined incoming value. Make sure DominateBB
       // dominates BB so that UniqueDefinedIncoming is available in BB and
       // afterwards.
       if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
             return DT->dominates(DominateBB, UD);
           })) {
         PHI.replaceAllUsesWith(UniqueDefinedIncoming);
         ToBeDeleted.push_back(&PHI);
         Changed = true;
       }
     }
   }

   for (auto *PHI : ToBeDeleted)
     PHI->eraseFromParent();

   return Changed;
 }

 bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
   LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   return rewritePHIs(F, DA, DT);
 }

 FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
   return new AMDGPURewriteUndefForPHI();
 }
	//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	// This file implements the idea to rewrite undef incoming operand for certain
	// PHIs in structurized CFG. This pass only works on IR that has gone through
	// StructurizedCFG pass, and this pass has some additional limitation that make
	// it can only run after SIAnnotateControlFlow.
	//
	// To achieve optimal code generation for AMDGPU, we assume that divergence
	// analysis reports the PHI in join block of divergent branch as uniform if
	// it has one unique uniform value plus additional undefined/poisoned incoming
	// value. That is to say the later compiler pipeline will ensure such PHI always
	// return uniform value and ensure it work correctly. Let's take a look at two
	// typical patterns in structured CFG that need to be taken care: (In both
	// patterns, block %if terminate with divergent branch.)
	//
	// Pattern A: Block with undefined incoming value dominates defined predecessor
	// %if
	// \| \
	// \| %then
	// \| /
	// %endif: %phi = phi [%undef, %if], [%uniform, %then]
	//
	// Pattern B: Block with defined incoming value dominates undefined predecessor
	// %if
	// \| \
	// \| %then
	// \| /
	// %endif: %phi = phi [%uniform, %if], [%undef, %then]
	//
	// For pattern A, by reporting %phi as uniform, the later pipeline need to make
	// sure it be handled correctly. The backend usually allocates a scalar register
	// and if any thread in a wave takes %then path, the scalar register will get
	// the %uniform value.
	//
	// For pattern B, we will replace the undef operand with the other defined value
	// in this pass. So the scalar register allocated for such PHI will get correct
	// liveness. Without this transformation, the scalar register may be overwritten
	// in the %then block.
	//
	// Limitation note:
	// If the join block of divergent threads is a loop header, the pass cannot
	// handle it correctly right now. For below case, the undef in %phi should also
	// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
	// block to get a separate join block, then we can rewrite the undef correctly.
	// %if
	// \| \
	// \| %then
	// \| /
	// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
	// \| \|
	// \---

	#include "AMDGPU.h"
	#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/InitializePasses.h"

	using namespace llvm;

	#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"

	namespace {

	class AMDGPURewriteUndefForPHI : public FunctionPass {
	public:
	static char ID;
	AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
	initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
	}
	bool runOnFunction(Function &F) override;
	StringRef getPassName() const override {
	return "AMDGPU Rewrite Undef for PHI";
	}

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<LegacyDivergenceAnalysis>();
	AU.addRequired<DominatorTreeWrapperPass>();

	AU.addPreserved<DominatorTreeWrapperPass>();
	AU.addPreserved<LegacyDivergenceAnalysis>();
	AU.setPreservesCFG();
	}
	};

	} // end anonymous namespace
	char AMDGPURewriteUndefForPHI::ID = 0;

	INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
	"Rewrite undef for PHI", false, false)
	INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
	"Rewrite undef for PHI", false, false)

	bool rewritePHIs(Function &F, LegacyDivergenceAnalysis DA, DominatorTree DT) {
	bool Changed = false;
	SmallVector<PHINode *> ToBeDeleted;
	for (auto &BB : F) {
	for (auto &PHI : BB.phis()) {
	if (DA->isDivergent(&PHI))
	continue;

	// The unique incoming value except undef/poison for the PHI node.
	Value *UniqueDefinedIncoming = nullptr;
	// The divergent block with defined incoming value that dominates all
	// other block with the same incoming value.
	BasicBlock *DominateBB = nullptr;
	// Predecessors with undefined incoming value (excluding loop backedge).
	SmallVector<BasicBlock *> Undefs;

	for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
	Value *Incoming = PHI.getIncomingValue(i);
	BasicBlock *IncomingBB = PHI.getIncomingBlock(i);

	if (Incoming == &PHI)
	continue;

	if (isa<UndefValue>(Incoming)) {
	// Undef from loop backedge will not be replaced.
	if (!DT->dominates(&BB, IncomingBB))
	Undefs.push_back(IncomingBB);
	continue;
	}

	if (!UniqueDefinedIncoming) {
	UniqueDefinedIncoming = Incoming;
	DominateBB = IncomingBB;
	} else if (Incoming == UniqueDefinedIncoming) {
	// Update DominateBB if necessary.
	if (DT->dominates(IncomingBB, DominateBB))
	DominateBB = IncomingBB;
	} else {
	UniqueDefinedIncoming = nullptr;
	break;
	}
	}
	// We only need to replace the undef for the PHI which is merging
	// defined/undefined values from divergent threads.
	// TODO: We should still be able to replace undef value if the unique
	// value is a Constant.
	if (!UniqueDefinedIncoming \|\| Undefs.empty() \|\|
	!DA->isDivergent(DominateBB->getTerminator()))
	continue;

	// We only replace the undef when DominateBB truly dominates all the
	// other predecessors with undefined incoming value. Make sure DominateBB
	// dominates BB so that UniqueDefinedIncoming is available in BB and
	// afterwards.
	if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
	return DT->dominates(DominateBB, UD);
	})) {
	PHI.replaceAllUsesWith(UniqueDefinedIncoming);
	ToBeDeleted.push_back(&PHI);
	Changed = true;
	}
	}
	}

	for (auto *PHI : ToBeDeleted)
	PHI->eraseFromParent();

	return Changed;
	}

	bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
	LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
	return rewritePHIs(F, DA, DT);
	}

	FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
	return new AMDGPURewriteUndefForPHI();
	}