Blame SOURCES/0001-Selectively-disable-threading-from-gold.patch

fefa20
From 5fd2c900e90445c6efb7b97f8c663e588bcab81b Mon Sep 17 00:00:00 2001
fefa20
From: serge-sans-paille <sguelton@redhat.com>
fefa20
Date: Tue, 2 Jul 2019 09:25:52 +0000
fefa20
Subject: [PATCH] Selectively disable threading from gold
fefa20
fefa20
Related: rhbz#1636479
fefa20
---
fefa20
 llvm/include/llvm/LTO/LTO.h            |   3 +
fefa20
 llvm/include/llvm/LTO/LTOBackend.h     |   3 +
fefa20
 llvm/include/llvm/Support/ThreadPool.h |   7 ++
fefa20
 llvm/lib/LTO/LTO.cpp                   | 134 ++++++++++++++++++++++++-
fefa20
 llvm/lib/LTO/LTOBackend.cpp            |  48 ++++++++-
fefa20
 llvm/lib/Support/ThreadPool.cpp        |   6 +-
fefa20
 llvm/tools/gold/gold-plugin.cpp        |   7 +-
fefa20
 7 files changed, 197 insertions(+), 11 deletions(-)
fefa20
fefa20
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
fefa20
index 534d9b6f3f2..9dfda126540 100644
fefa20
--- a/llvm/include/llvm/LTO/LTO.h
fefa20
+++ b/llvm/include/llvm/LTO/LTO.h
fefa20
@@ -212,6 +212,7 @@ typedef std::function<std::unique_ptr<ThinBackendProc>(
fefa20
 
fefa20
 /// This ThinBackend runs the individual backend jobs in-process.
fefa20
 ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
fefa20
+ThinBackend createInProcessThinBackendNoThread(unsigned ParallelismLevel);
fefa20
 
fefa20
 /// This ThinBackend writes individual module indexes to files, instead of
fefa20
 /// running the individual backend jobs. This backend is for distributed builds
fefa20
@@ -281,6 +282,7 @@ public:
fefa20
   /// The client will receive at most one callback (via either AddStream or
fefa20
   /// Cache) for each task identifier.
fefa20
   Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
fefa20
+  Error runNoThread(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
fefa20
 
fefa20
 private:
fefa20
   Config Conf;
fefa20
@@ -397,6 +399,7 @@ private:
fefa20
                    const SymbolResolution *&ResI, const SymbolResolution *ResE);
fefa20
 
fefa20
   Error runRegularLTO(AddStreamFn AddStream);
fefa20
+  Error runRegularLTONoThread(AddStreamFn AddStream);
fefa20
   Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
fefa20
 
fefa20
   mutable bool CalledGetMaxTasks = false;
fefa20
diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h
fefa20
index d4743f6940f..2d6461fa82e 100644
fefa20
--- a/llvm/include/llvm/LTO/LTOBackend.h
fefa20
+++ b/llvm/include/llvm/LTO/LTOBackend.h
fefa20
@@ -39,6 +39,9 @@ namespace lto {
fefa20
 Error backend(Config &C, AddStreamFn AddStream,
fefa20
               unsigned ParallelCodeGenParallelismLevel,
fefa20
               std::unique_ptr<Module> M, ModuleSummaryIndex &CombinedIndex);
fefa20
+Error backendNoThread(Config &C, AddStreamFn AddStream,
fefa20
+              unsigned ParallelCodeGenParallelismLevel,
fefa20
+              std::unique_ptr<Module> M, ModuleSummaryIndex &CombinedIndex);
fefa20
 
fefa20
 /// Runs a ThinLTO backend.
fefa20
 Error thinBackend(Config &C, unsigned Task, AddStreamFn AddStream, Module &M,
fefa20
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
fefa20
index 4fdbd528b21..60ea5097114 100644
fefa20
--- a/llvm/include/llvm/Support/ThreadPool.h
fefa20
+++ b/llvm/include/llvm/Support/ThreadPool.h
fefa20
@@ -98,4 +98,11 @@ private:
fefa20
 };
fefa20
 }
fefa20
 
fefa20
+#ifndef ThreadPool
fefa20
+#undef LLVM_SUPPORT_THREAD_POOL_H
fefa20
+#define ThreadPool SequentialThreadPool
fefa20
+#include "llvm/Support/ThreadPool.h"
fefa20
+#undef ThreadPool
fefa20
+#endif
fefa20
+
fefa20
 #endif // LLVM_SUPPORT_THREAD_POOL_H
fefa20
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
fefa20
index 3a955060dea..a7b4f45b18c 100644
fefa20
--- a/llvm/lib/LTO/LTO.cpp
fefa20
+++ b/llvm/lib/LTO/LTO.cpp
fefa20
@@ -861,6 +861,58 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
fefa20
   return Result;
fefa20
 }
fefa20
 
fefa20
+Error LTO::runNoThread(AddStreamFn AddStream, NativeObjectCache Cache) {
fefa20
+  // Compute "dead" symbols, we don't want to import/export these!
fefa20
+  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
fefa20
+  DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
fefa20
+  for (auto &Res : GlobalResolutions) {
fefa20
+    // Normally resolution have IR name of symbol. We can do nothing here
fefa20
+    // otherwise. See comments in GlobalResolution struct for more details.
fefa20
+    if (Res.second.IRName.empty())
fefa20
+      continue;
fefa20
+
fefa20
+    GlobalValue::GUID GUID = GlobalValue::getGUID(
fefa20
+        GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
fefa20
+
fefa20
+    if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
fefa20
+      GUIDPreservedSymbols.insert(GlobalValue::getGUID(
fefa20
+          GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
fefa20
+
fefa20
+    GUIDPrevailingResolutions[GUID] =
fefa20
+        Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
fefa20
+  }
fefa20
+
fefa20
+  auto isPrevailing = [&](GlobalValue::GUID G) {
fefa20
+    auto It = GUIDPrevailingResolutions.find(G);
fefa20
+    if (It == GUIDPrevailingResolutions.end())
fefa20
+      return PrevailingType::Unknown;
fefa20
+    return It->second;
fefa20
+  };
fefa20
+  computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols,
fefa20
+                                  isPrevailing, Conf.OptLevel > 0);
fefa20
+
fefa20
+  // Setup output file to emit statistics.
fefa20
+  std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
fefa20
+  if (!Conf.StatsFile.empty()) {
fefa20
+    EnableStatistics(false);
fefa20
+    std::error_code EC;
fefa20
+    StatsFile =
fefa20
+        llvm::make_unique<ToolOutputFile>(Conf.StatsFile, EC, sys::fs::F_None);
fefa20
+    if (EC)
fefa20
+      return errorCodeToError(EC);
fefa20
+    StatsFile->keep();
fefa20
+  }
fefa20
+
fefa20
+  Error Result = runRegularLTONoThread(AddStream);
fefa20
+  if (!Result)
fefa20
+    Result = runThinLTO(AddStream, Cache);
fefa20
+
fefa20
+  if (StatsFile)
fefa20
+    PrintStatisticsJSON(StatsFile->os());
fefa20
+
fefa20
+  return Result;
fefa20
+}
fefa20
+
fefa20
 Error LTO::runRegularLTO(AddStreamFn AddStream) {
fefa20
   for (auto &M : RegularLTO.ModsWithSummaries)
fefa20
     if (Error Err = linkRegularLTO(std::move(M),
fefa20
@@ -928,6 +980,73 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
fefa20
                  std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex);
fefa20
 }
fefa20
 
fefa20
+Error LTO::runRegularLTONoThread(AddStreamFn AddStream) {
fefa20
+  for (auto &M : RegularLTO.ModsWithSummaries)
fefa20
+    if (Error Err = linkRegularLTO(std::move(M),
fefa20
+                                   /*LivenessFromIndex=*/true))
fefa20
+      return Err;
fefa20
+
fefa20
+  // Make sure commons have the right size/alignment: we kept the largest from
fefa20
+  // all the prevailing when adding the inputs, and we apply it here.
fefa20
+  const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
fefa20
+  for (auto &I : RegularLTO.Commons) {
fefa20
+    if (!I.second.Prevailing)
fefa20
+      // Don't do anything if no instance of this common was prevailing.
fefa20
+      continue;
fefa20
+    GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first);
fefa20
+    if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) {
fefa20
+      // Don't create a new global if the type is already correct, just make
fefa20
+      // sure the alignment is correct.
fefa20
+      OldGV->setAlignment(I.second.Align);
fefa20
+      continue;
fefa20
+    }
fefa20
+    ArrayType *Ty =
fefa20
+        ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size);
fefa20
+    auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
fefa20
+                                  GlobalValue::CommonLinkage,
fefa20
+                                  ConstantAggregateZero::get(Ty), "");
fefa20
+    GV->setAlignment(I.second.Align);
fefa20
+    if (OldGV) {
fefa20
+      OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType()));
fefa20
+      GV->takeName(OldGV);
fefa20
+      OldGV->eraseFromParent();
fefa20
+    } else {
fefa20
+      GV->setName(I.first);
fefa20
+    }
fefa20
+  }
fefa20
+
fefa20
+  if (Conf.PreOptModuleHook &&
fefa20
+      !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
fefa20
+    return Error::success();
fefa20
+
fefa20
+  if (!Conf.CodeGenOnly) {
fefa20
+    for (const auto &R : GlobalResolutions) {
fefa20
+      if (!R.second.isPrevailingIRSymbol())
fefa20
+        continue;
fefa20
+      if (R.second.Partition != 0 &&
fefa20
+          R.second.Partition != GlobalResolution::External)
fefa20
+        continue;
fefa20
+
fefa20
+      GlobalValue *GV =
fefa20
+          RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
fefa20
+      // Ignore symbols defined in other partitions.
fefa20
+      // Also skip declarations, which are not allowed to have internal linkage.
fefa20
+      if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
fefa20
+        continue;
fefa20
+      GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
fefa20
+                                              : GlobalValue::UnnamedAddr::None);
fefa20
+      if (EnableLTOInternalization && R.second.Partition == 0)
fefa20
+        GV->setLinkage(GlobalValue::InternalLinkage);
fefa20
+    }
fefa20
+
fefa20
+    if (Conf.PostInternalizeModuleHook &&
fefa20
+        !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
fefa20
+      return Error::success();
fefa20
+  }
fefa20
+  return backendNoThread(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel,
fefa20
+                 std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex);
fefa20
+}
fefa20
+
fefa20
 /// This class defines the interface to the ThinLTO backend.
fefa20
 class lto::ThinBackendProc {
fefa20
 protected:
fefa20
@@ -952,8 +1071,9 @@ public:
fefa20
 };
fefa20
 
fefa20
 namespace {
fefa20
+template<class ThreadEngine>
fefa20
 class InProcessThinBackend : public ThinBackendProc {
fefa20
-  ThreadPool BackendThreadPool;
fefa20
+  ThreadEngine BackendThreadPool;
fefa20
   AddStreamFn AddStream;
fefa20
   NativeObjectCache Cache;
fefa20
   std::set<GlobalValue::GUID> CfiFunctionDefs;
fefa20
@@ -1065,9 +1185,19 @@ ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) {
fefa20
   return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
fefa20
              const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
fefa20
              AddStreamFn AddStream, NativeObjectCache Cache) {
fefa20
-    return llvm::make_unique<InProcessThinBackend>(
fefa20
+    return llvm::make_unique<InProcessThinBackend<ThreadPool>>(
fefa20
+        Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries,
fefa20
+        AddStream, Cache);
fefa20
+  };
fefa20
+}
fefa20
+ThinBackend lto::createInProcessThinBackendNoThread(unsigned ParallelismLevel) {
fefa20
+  return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
fefa20
+             const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
fefa20
+             AddStreamFn AddStream, NativeObjectCache Cache) {
fefa20
+    ThinBackendProc* Backend = new InProcessThinBackend<SequentialThreadPool>(
fefa20
         Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries,
fefa20
         AddStream, Cache);
fefa20
+    return std::unique_ptr<ThinBackendProc>(Backend);
fefa20
   };
fefa20
 }
fefa20
 
fefa20
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
fefa20
index 926c419e34a..2c263ed86a9 100644
fefa20
--- a/llvm/lib/LTO/LTOBackend.cpp
fefa20
+++ b/llvm/lib/LTO/LTOBackend.cpp
fefa20
@@ -333,10 +333,11 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
fefa20
     DwoOut->keep();
fefa20
 }
fefa20
 
fefa20
-void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
fefa20
+template<class ThreadPoolEngine>
fefa20
+static void splitCodeGen_(Config &C, TargetMachine *TM, AddStreamFn AddStream,
fefa20
                   unsigned ParallelCodeGenParallelismLevel,
fefa20
                   std::unique_ptr<Module> Mod) {
fefa20
-  ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
fefa20
+  ThreadPoolEngine CodegenThreadPool(ParallelCodeGenParallelismLevel);
fefa20
   unsigned ThreadCount = 0;
fefa20
   const Target *T = &TM->getTarget();
fefa20
 
fefa20
@@ -380,6 +381,17 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
fefa20
   // can leave the function scope.
fefa20
   CodegenThreadPool.wait();
fefa20
 }
fefa20
+void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
fefa20
+                  unsigned ParallelCodeGenParallelismLevel,
fefa20
+		  std::unique_ptr<Module> Mod) {
fefa20
+  splitCodeGen_<ThreadPool>(C, TM, AddStream, ParallelCodeGenParallelismLevel, std::move(Mod));
fefa20
+}
fefa20
+
fefa20
+void splitCodeGenNoThread(Config &C, TargetMachine *TM, AddStreamFn AddStream,
fefa20
+                  unsigned ParallelCodeGenParallelismLevel,
fefa20
+		  std::unique_ptr<Module> Mod) {
fefa20
+  splitCodeGen_<SequentialThreadPool>(C, TM, AddStream, ParallelCodeGenParallelismLevel, std::move(Mod));
fefa20
+}
fefa20
 
fefa20
 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
fefa20
   if (!C.OverrideTriple.empty())
fefa20
@@ -439,6 +451,38 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
fefa20
   return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
fefa20
 }
fefa20
 
fefa20
+Error lto::backendNoThread(Config &C, AddStreamFn AddStream,
fefa20
+                   unsigned ParallelCodeGenParallelismLevel,
fefa20
+                   std::unique_ptr<Module> Mod,
fefa20
+                   ModuleSummaryIndex &CombinedIndex) {
fefa20
+  Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
fefa20
+  if (!TOrErr)
fefa20
+    return TOrErr.takeError();
fefa20
+
fefa20
+  std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
fefa20
+
fefa20
+  // Setup optimization remarks.
fefa20
+  auto DiagFileOrErr = lto::setupOptimizationRemarks(
fefa20
+      Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
fefa20
+  if (!DiagFileOrErr)
fefa20
+    return DiagFileOrErr.takeError();
fefa20
+  auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
fefa20
+
fefa20
+  if (!C.CodeGenOnly) {
fefa20
+    if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
fefa20
+             /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
fefa20
+      return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
fefa20
+  }
fefa20
+
fefa20
+  if (ParallelCodeGenParallelismLevel == 1) {
fefa20
+    codegen(C, TM.get(), AddStream, 0, *Mod);
fefa20
+  } else {
fefa20
+    splitCodeGenNoThread(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
fefa20
+                 std::move(Mod));
fefa20
+  }
fefa20
+  return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
fefa20
+}
fefa20
+
fefa20
 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
fefa20
                             const ModuleSummaryIndex &Index) {
fefa20
   std::vector<GlobalValue*> DeadGVs;
fefa20
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
fefa20
index d0212ca1346..3d760edbcb7 100644
fefa20
--- a/llvm/lib/Support/ThreadPool.cpp
fefa20
+++ b/llvm/lib/Support/ThreadPool.cpp
fefa20
@@ -19,8 +19,6 @@
fefa20
 
fefa20
 using namespace llvm;
fefa20
 
fefa20
-#if LLVM_ENABLE_THREADS
fefa20
-
fefa20
 // Default to hardware_concurrency
fefa20
 ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {}
fefa20
 
fefa20
@@ -107,7 +105,7 @@ ThreadPool::~ThreadPool() {
fefa20
     Worker.join();
fefa20
 }
fefa20
 
fefa20
-#else // LLVM_ENABLE_THREADS Disabled
fefa20
+#define ThreadPool SequentialThreadPool
fefa20
 
fefa20
 ThreadPool::ThreadPool() : ThreadPool(0) {}
fefa20
 
fefa20
@@ -142,5 +140,3 @@ std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
fefa20
 ThreadPool::~ThreadPool() {
fefa20
   wait();
fefa20
 }
fefa20
-
fefa20
-#endif
fefa20
diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
fefa20
index 738cafa6cac..8ef75d2589c 100644
fefa20
--- a/llvm/tools/gold/gold-plugin.cpp
fefa20
+++ b/llvm/tools/gold/gold-plugin.cpp
fefa20
@@ -848,7 +848,7 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
fefa20
   Conf.DisableVerify = options::DisableVerify;
fefa20
   Conf.OptLevel = options::OptLevel;
fefa20
   if (options::Parallelism)
fefa20
-    Backend = createInProcessThinBackend(options::Parallelism);
fefa20
+    Backend = createInProcessThinBackendNoThread(options::Parallelism);
fefa20
   if (options::thinlto_index_only) {
fefa20
     std::string OldPrefix, NewPrefix;
fefa20
     getThinLTOOldAndNewPrefix(OldPrefix, NewPrefix);
fefa20
@@ -856,6 +856,9 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
fefa20
                                             options::thinlto_emit_imports_files,
fefa20
                                             LinkedObjectsFile, OnIndexWrite);
fefa20
   }
fefa20
+  if(!Backend)
fefa20
+    Backend = createInProcessThinBackendNoThread(llvm::heavyweight_hardware_concurrency());
fefa20
+
fefa20
 
fefa20
   Conf.OverrideTriple = options::triple;
fefa20
   Conf.DefaultTriple = sys::getDefaultTargetTriple();
fefa20
@@ -1039,7 +1042,7 @@ static std::vector<std::pair<SmallString<128>, bool>> runLTO() {
fefa20
   if (!options::cache_dir.empty())
fefa20
     Cache = check(localCache(options::cache_dir, AddBuffer));
fefa20
 
fefa20
-  check(Lto->run(AddStream, Cache));
fefa20
+  check(Lto->runNoThread(AddStream, Cache));
fefa20
 
fefa20
   // Write empty output files that may be expected by the distributed build
fefa20
   // system.
fefa20
-- 
fefa20
2.20.1
fefa20