Eliminate Subzero profiling support

We're never used this functionality, and shouldn't have a need for it.
Profiling information can be collected at the Reactor level or using
a profiler like VTune.

This functionality was the only thing using the `Target` parameter of
`InstIntrinsicCall`, which get in the way for aligning the parameters of
load- and store-like intrinsics with regular `InstLoad` and `InstStore`.

Bug: b/179497998
Change-Id: I5a0ad5ee8e0101f0879a97a1ea01e3efc5bebbe4
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/52528
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/third_party/subzero/src/IceCfg.cpp b/third_party/subzero/src/IceCfg.cpp
index ce09d5c..7725bb4 100644
--- a/third_party/subzero/src/IceCfg.cpp
+++ b/third_party/subzero/src/IceCfg.cpp
@@ -177,38 +177,6 @@
   GlobalInits->push_back(Var);
 }
 
-void Cfg::profileBlocks() {
-  if (GlobalInits == nullptr)
-    GlobalInits.reset(new VariableDeclarationList());
-
-  for (CfgNode *Node : Nodes) {
-    const std::string NodeAsmName = Node->getAsmName();
-    createNodeNameDeclaration(NodeAsmName);
-    createBlockProfilingInfoDeclaration(NodeAsmName, GlobalInits->back());
-    Node->profileExecutionCount(GlobalInits->back());
-  }
-}
-
-bool Cfg::isProfileGlobal(const VariableDeclaration &Var) {
-  if (!Var.getName().hasStdString())
-    return false;
-  return Var.getName().toString().find(BlockStatsGlobalPrefix) == 0;
-}
-
-void Cfg::addCallToProfileSummary() {
-  // The call(s) to __Sz_profile_summary are added by the profiler in functions
-  // that cause the program to exit. This function is defined in
-  // runtime/szrt_profiler.c.
-  Constant *ProfileSummarySym =
-      Ctx->getConstantExternSym(Ctx->getGlobalString("__Sz_profile_summary"));
-  constexpr SizeT NumArgs = 0;
-  constexpr Variable *Void = nullptr;
-  constexpr bool HasTailCall = false;
-  auto *Call =
-      InstCall::create(this, NumArgs, Void, ProfileSummarySym, HasTailCall);
-  getEntryNode()->getInsts().push_front(Call);
-}
-
 void Cfg::translate() {
   if (hasError())
     return;
@@ -238,16 +206,6 @@
 
   dump("Initial CFG");
 
-  if (getFlags().getEnableBlockProfile()) {
-    profileBlocks();
-    // TODO(jpp): this is fragile, at best. Figure out a better way of
-    // detecting exit functions.
-    if (getFunctionName().toStringOrEmpty() == "exit") {
-      addCallToProfileSummary();
-    }
-    dump("Profiled CFG");
-  }
-
   // Create the Hi and Lo variables where a split was needed
   for (Variable *Var : Variables) {
     if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) {
diff --git a/third_party/subzero/src/IceCfg.h b/third_party/subzero/src/IceCfg.h
index da676be..36bf139 100644
--- a/third_party/subzero/src/IceCfg.h
+++ b/third_party/subzero/src/IceCfg.h
@@ -178,10 +178,6 @@
   uint32_t getConstantBlindingCookie() const { return ConstantBlindingCookie; }
   /// @}
 
-  /// Returns true if Var is a global variable that is used by the profiling
-  /// code.
-  static bool isProfileGlobal(const VariableDeclaration &Var);
-
   /// Passes over the CFG.
   void translate();
   /// After the CFG is fully constructed, iterate over the nodes and compute the
@@ -282,15 +278,6 @@
 
   Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
 
-  /// Adds a call to the ProfileSummary runtime function as the first
-  /// instruction in this CFG's entry block.
-  void addCallToProfileSummary();
-
-  /// Iterates over the basic blocks in this CFG, adding profiling code to each
-  /// one of them. It returns a list with all the globals that the profiling
-  /// code needs to be defined.
-  void profileBlocks();
-
   void createNodeNameDeclaration(const std::string &NodeAsmName);
   void
   createBlockProfilingInfoDeclaration(const std::string &NodeAsmName,
@@ -338,7 +325,7 @@
   std::unique_ptr<TargetLowering> Target;
   std::unique_ptr<VariablesMetadata> VMetadata;
   std::unique_ptr<Assembler> TargetAssembler;
-  /// Globals required by this CFG. Mostly used for the profiler's globals.
+  /// Globals required by this CFG.
   std::unique_ptr<VariableDeclarationList> GlobalInits;
   CfgVector<InstJumpTable *> JumpTables;
   /// CurrentNode is maintained during dumping/emitting just for validating
diff --git a/third_party/subzero/src/IceCfgNode.cpp b/third_party/subzero/src/IceCfgNode.cpp
index 7170bef..d14e41f 100644
--- a/third_party/subzero/src/IceCfgNode.cpp
+++ b/third_party/subzero/src/IceCfgNode.cpp
@@ -1464,33 +1464,6 @@
   }
 }
 
-void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
-  GlobalContext *Ctx = Func->getContext();
-  GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64");
-
-  bool BadIntrinsic = false;
-  const Intrinsics::FullIntrinsicInfo *Info =
-      Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
-  assert(!BadIntrinsic);
-  assert(Info != nullptr);
-
-  Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64);
-  constexpr RelocOffsetT Offset = 0;
-  Constant *Counter = Ctx->getConstantSym(Offset, Var->getName());
-  Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd);
-  Constant *One = Ctx->getConstantInt64(1);
-  Constant *OrderAcquireRelease =
-      Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
-
-  auto *Instr = InstIntrinsicCall::create(
-      Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
-  Instr->addArg(AtomicRMWOp);
-  Instr->addArg(Counter);
-  Instr->addArg(One);
-  Instr->addArg(OrderAcquireRelease);
-  Insts.push_front(Instr);
-}
-
 void CfgNode::removeInEdge(CfgNode *In) {
   InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In));
 }
diff --git a/third_party/subzero/src/IceGlobalContext.cpp b/third_party/subzero/src/IceGlobalContext.cpp
index 3a21b3a..392ccbf 100644
--- a/third_party/subzero/src/IceGlobalContext.cpp
+++ b/third_party/subzero/src/IceGlobalContext.cpp
@@ -483,14 +483,6 @@
   DataLowering->emitTargetRODataSections();
 }
 
-void GlobalContext::saveBlockInfoPtrs() {
-  for (VariableDeclaration *Global : Globals) {
-    if (Cfg::isProfileGlobal(*Global)) {
-      ProfileBlockInfos.push_back(Global);
-    }
-  }
-}
-
 void GlobalContext::lowerGlobals(const std::string &SectionSuffix) {
   TimerMarker T(TimerStack::TT_emitGlobalInitializers, this);
   const bool DumpGlobalVariables =
@@ -506,7 +498,6 @@
   if (getFlags().getDisableTranslation())
     return;
 
-  saveBlockInfoPtrs();
   // If we need to shuffle the layout of global variables, shuffle them now.
   if (getFlags().getReorderGlobalVariables()) {
     // Create a random number generator for global variable reordering.
@@ -520,50 +511,13 @@
     Instrumentor->instrumentGlobals(Globals);
 
   DataLowering->lowerGlobals(Globals, SectionSuffix);
-  if (ProfileBlockInfos.empty() && DisposeGlobalVariablesAfterLowering) {
+  if (DisposeGlobalVariablesAfterLowering) {
     Globals.clearAndPurge();
   } else {
     Globals.clear();
   }
 }
 
-void GlobalContext::lowerProfileData() {
-  // ProfileBlockInfoVarDecl is initialized in the constructor, and will only
-  // ever be nullptr after this method completes. This assertion is a convoluted
-  // way of ensuring lowerProfileData is invoked a single time.
-  assert(ProfileBlockInfoVarDecl == nullptr);
-
-  auto GlobalVariablePool = getInitializerAllocator();
-  ProfileBlockInfoVarDecl =
-      VariableDeclaration::createExternal(GlobalVariablePool.get());
-  ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64));
-  ProfileBlockInfoVarDecl->setIsConstant(true);
-
-  // Note: if you change this symbol, make sure to update
-  // runtime/szrt_profiler.c as well.
-  ProfileBlockInfoVarDecl->setName(this, "__Sz_block_profile_info");
-
-  for (const VariableDeclaration *PBI : ProfileBlockInfos) {
-    if (Cfg::isProfileGlobal(*PBI)) {
-      constexpr RelocOffsetT BlockExecutionCounterOffset = 0;
-      ProfileBlockInfoVarDecl->addInitializer(
-          VariableDeclaration::RelocInitializer::create(
-              GlobalVariablePool.get(), PBI,
-              {RelocOffset::create(this, BlockExecutionCounterOffset)}));
-    }
-  }
-
-  // This adds a 64-bit sentinel entry to the end of our array. For 32-bit
-  // architectures this will waste 4 bytes.
-  const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64);
-  ProfileBlockInfoVarDecl->addInitializer(
-      VariableDeclaration::ZeroInitializer::create(GlobalVariablePool.get(),
-                                                   Sizeof64BitNullPtr));
-  Globals.push_back(ProfileBlockInfoVarDecl);
-  constexpr char ProfileDataSection[] = "$sz_profiler$";
-  lowerGlobals(ProfileDataSection);
-}
-
 void GlobalContext::emitterWrapper(ThreadContext *MyTLS) {
   ICE_TLS_SET_FIELD(TLS, MyTLS);
   emitItems();
diff --git a/third_party/subzero/src/IceGlobalContext.h b/third_party/subzero/src/IceGlobalContext.h
index ba0bc09..b901d52 100644
--- a/third_party/subzero/src/IceGlobalContext.h
+++ b/third_party/subzero/src/IceGlobalContext.h
@@ -396,9 +396,6 @@
   ///  - clears the Globals array.
   void lowerGlobals(const std::string &SectionSuffix);
 
-  /// Lowers the profile information.
-  void lowerProfileData();
-
   void dumpConstantLookupCounts();
 
   /// DisposeGlobalVariablesAfterLowering controls whether the memory used by
@@ -507,9 +504,6 @@
   // If Instrumentor is not empty then it will be used to instrument globals and
   // CFGs.
   std::unique_ptr<Instrumentation> Instrumentor = nullptr;
-  // TODO(jpp): move to EmitterContext.
-  VariableDeclaration *ProfileBlockInfoVarDecl = nullptr;
-  std::vector<VariableDeclaration *> ProfileBlockInfos;
   /// Indicates if global variable declarations can be disposed of right after
   /// lowering.
   bool DisposeGlobalVariablesAfterLowering = true;
@@ -564,8 +558,6 @@
     HasSeenCode = true;
   }
 
-  void saveBlockInfoPtrs();
-
   llvm::SmallVector<ThreadContext *, 128> AllThreadContexts;
   llvm::SmallVector<std::thread, 128> TranslationThreads;
   llvm::SmallVector<std::thread, 128> EmitterThreads;