third_party/llvm-16.0/llvm/lib/Support/Unix/Threading.inc - SwiftShader - Git at Google

 //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file provides the Unix specific implementation of Threading functions.
 //
 //===----------------------------------------------------------------------===//

 #include "Unix.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"

 #if defined(__APPLE__)
 #include <mach/mach_init.h>
 #include <mach/mach_port.h>
 #include <pthread/qos.h>
 #include <sys/sysctl.h>
 #include <sys/types.h>
 #endif

 #include <pthread.h>

 #if defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
 #endif

 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <errno.h>
 #include <sys/cpuset.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 #include <unistd.h>
 #endif

 #if defined(__NetBSD__)
 #include <lwp.h> // For _lwp_self()
 #endif

 #if defined(__OpenBSD__)
 #include <unistd.h> // For getthrid()
 #endif

 #if defined(__linux__)
 #include <sched.h>       // For sched_getaffinity
 #include <sys/syscall.h> // For syscall codes
 #include <unistd.h>      // For syscall()
 #endif

 namespace llvm {
 pthread_t
 llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
                             std::optional<unsigned> StackSizeInBytes) {
   int errnum;

   // Construct the attributes object.
   pthread_attr_t Attr;
   if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
     ReportErrnumFatal("pthread_attr_init failed", errnum);
   }

   auto AttrGuard = llvm::make_scope_exit([&] {
     if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
       ReportErrnumFatal("pthread_attr_destroy failed", errnum);
     }
   });

   // Set the requested stack size, if given.
   if (StackSizeInBytes) {
     if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
       ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
     }
   }

   // Construct and execute the thread.
   pthread_t Thread;
   if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
     ReportErrnumFatal("pthread_create failed", errnum);

   return Thread;
 }

 void llvm_thread_detach_impl(pthread_t Thread) {
   int errnum;

   if ((errnum = ::pthread_detach(Thread)) != 0) {
     ReportErrnumFatal("pthread_detach failed", errnum);
   }
 }

 void llvm_thread_join_impl(pthread_t Thread) {
   int errnum;

   if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
     ReportErrnumFatal("pthread_join failed", errnum);
   }
 }

 pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }

 pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }

 } // namespace llvm

 uint64_t llvm::get_threadid() {
 #if defined(__APPLE__)
   // Calling "mach_thread_self()" bumps the reference count on the thread
   // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
   // count.
   thread_port_t Self = mach_thread_self();
   mach_port_deallocate(mach_task_self(), Self);
   return Self;
 #elif defined(__FreeBSD__)
   return uint64_t(pthread_getthreadid_np());
 #elif defined(__NetBSD__)
   return uint64_t(_lwp_self());
 #elif defined(__OpenBSD__)
   return uint64_t(getthrid());
 #elif defined(__ANDROID__)
   return uint64_t(gettid());
 #elif defined(__linux__)
   return uint64_t(syscall(SYS_gettid));
 #else
   return uint64_t(pthread_self());
 #endif
 }

 static constexpr uint32_t get_max_thread_name_length_impl() {
 #if defined(__NetBSD__)
   return PTHREAD_MAX_NAMELEN_NP;
 #elif defined(__APPLE__)
   return 64;
 #elif defined(__linux__)
 #if HAVE_PTHREAD_SETNAME_NP
   return 16;
 #else
   return 0;
 #endif
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
   return 16;
 #elif defined(__OpenBSD__)
   return 32;
 #else
   return 0;
 #endif
 }

 uint32_t llvm::get_max_thread_name_length() {
   return get_max_thread_name_length_impl();
 }

 void llvm::set_thread_name(const Twine &Name) {
   // Make sure the input is null terminated.
   SmallString<64> Storage;
   StringRef NameStr = Name.toNullTerminatedStringRef(Storage);

   // Truncate from the beginning, not the end, if the specified name is too
   // long.  For one, this ensures that the resulting string is still null
   // terminated, but additionally the end of a long thread name will usually
   // be more unique than the beginning, since a common pattern is for similar
   // threads to share a common prefix.
   // Note that the name length includes the null terminator.
   if (get_max_thread_name_length() > 0)
     NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
   (void)NameStr;
 #if defined(__linux__)
 #if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
 #if HAVE_PTHREAD_SETNAME_NP
   ::pthread_setname_np(::pthread_self(), NameStr.data());
 #endif
 #endif
 #elif defined(__FreeBSD__) || defined(__OpenBSD__)
   ::pthread_set_name_np(::pthread_self(), NameStr.data());
 #elif defined(__NetBSD__)
   ::pthread_setname_np(::pthread_self(), "%s",
                        const_cast<char *>(NameStr.data()));
 #elif defined(__APPLE__)
   ::pthread_setname_np(NameStr.data());
 #endif
 }

 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
   Name.clear();

 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
   int pid = ::getpid();
   uint64_t tid = get_threadid();

   struct kinfo_proc *kp = nullptr, *nkp;
   size_t len = 0;
   int error;
   int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
                 (int)pid};

   while (1) {
     error = sysctl(ctl, 4, kp, &len, nullptr, 0);
     if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
       // Add extra space in case threads are added before next call.
       len += sizeof(*kp) + len / 10;
       nkp = (struct kinfo_proc *)::realloc(kp, len);
       if (nkp == nullptr) {
         free(kp);
         return;
       }
       kp = nkp;
       continue;
     }
     if (error != 0)
       len = 0;
     break;
   }

   for (size_t i = 0; i < len / sizeof(*kp); i++) {
     if (kp[i].ki_tid == (lwpid_t)tid) {
       Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
       break;
     }
   }
   free(kp);
   return;
 #elif defined(__NetBSD__)
   constexpr uint32_t len = get_max_thread_name_length_impl();
   char buf[len];
   ::pthread_getname_np(::pthread_self(), buf, len);

   Name.append(buf, buf + strlen(buf));
 #elif defined(__OpenBSD__)
   constexpr uint32_t len = get_max_thread_name_length_impl();
   char buf[len];
   ::pthread_get_name_np(::pthread_self(), buf, len);

   Name.append(buf, buf + strlen(buf));
 #elif defined(__linux__)
 #if HAVE_PTHREAD_GETNAME_NP
   constexpr uint32_t len = get_max_thread_name_length_impl();
   char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
   if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
     Name.append(Buffer, Buffer + strlen(Buffer));
 #endif
 #endif
 }

 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
 #if defined(__linux__) && defined(SCHED_IDLE)
   // Some *really* old glibcs are missing SCHED_IDLE.
   // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
   // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
   sched_param priority;
   // For each of the above policies, param->sched_priority must be 0.
   priority.sched_priority = 0;
   // SCHED_IDLE    for running very low priority background jobs.
   // SCHED_OTHER   the standard round-robin time-sharing policy;
   return !pthread_setschedparam(
              pthread_self(),
              // FIXME: consider SCHED_BATCH for Low
              Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
              &priority)
              ? SetThreadPriorityResult::SUCCESS
              : SetThreadPriorityResult::FAILURE;
 #elif defined(__APPLE__)
   // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
   //
   // Background - Applies to work that isn’t visible to the user and may take
   // significant time to complete. Examples include indexing, backing up, or
   // synchronizing data. This class emphasizes energy efficiency.
   //
   // Utility - Applies to work that takes anywhere from a few seconds to a few
   // minutes to complete. Examples include downloading a document or importing
   // data. This class offers a balance between responsiveness, performance, and
   // energy efficiency.
   const auto qosClass = [&]() {
     switch (Priority) {
     case ThreadPriority::Background:
       return QOS_CLASS_BACKGROUND;
     case ThreadPriority::Low:
       return QOS_CLASS_UTILITY;
     case ThreadPriority::Default:
       return QOS_CLASS_DEFAULT;
     }
   }();
   return !pthread_set_qos_class_self_np(qosClass, 0)
              ? SetThreadPriorityResult::SUCCESS
              : SetThreadPriorityResult::FAILURE;
 #endif
   return SetThreadPriorityResult::FAILURE;
 }

 #include <thread>

 static int computeHostNumHardwareThreads() {
 #if defined(__FreeBSD__)
   cpuset_t mask;
   CPU_ZERO(&mask);
   if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
                          &mask) == 0)
     return CPU_COUNT(&mask);
 #elif defined(__linux__)
   cpu_set_t Set;
   if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
     return CPU_COUNT(&Set);
 #endif
   // Guard against std::thread::hardware_concurrency() returning 0.
   if (unsigned Val = std::thread::hardware_concurrency())
     return Val;
   return 1;
 }

 void llvm::ThreadPoolStrategy::apply_thread_strategy(
     unsigned ThreadPoolNum) const {}

 llvm::BitVector llvm::get_thread_affinity_mask() {
   // FIXME: Implement
   llvm_unreachable("Not implemented!");
 }

 unsigned llvm::get_cpus() { return 1; }

 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
 // using the number of unique physical/core id pairs. The following
 // implementation reads the /proc/cpuinfo format on an x86_64 system.
 static int computeHostNumPhysicalCores() {
   // Enabled represents the number of physical id/core id pairs with at least
   // one processor id enabled by the CPU affinity mask.
   cpu_set_t Affinity, Enabled;
   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
     return -1;
   CPU_ZERO(&Enabled);

   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
   // mmapped because it appears to have 0 size.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
   if (std::error_code EC = Text.getError()) {
     llvm::errs() << "Can't read "
                  << "/proc/cpuinfo: " << EC.message() << "\n";
     return -1;
   }
   SmallVector<StringRef, 8> strs;
   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
                              /*KeepEmpty=*/false);
   int CurProcessor = -1;
   int CurPhysicalId = -1;
   int CurSiblings = -1;
   int CurCoreId = -1;
   for (StringRef Line : strs) {
     std::pair<StringRef, StringRef> Data = Line.split(':');
     auto Name = Data.first.trim();
     auto Val = Data.second.trim();
     // These fields are available if the kernel is configured with CONFIG_SMP.
     if (Name == "processor")
       Val.getAsInteger(10, CurProcessor);
     else if (Name == "physical id")
       Val.getAsInteger(10, CurPhysicalId);
     else if (Name == "siblings")
       Val.getAsInteger(10, CurSiblings);
     else if (Name == "core id") {
       Val.getAsInteger(10, CurCoreId);
       // The processor id corresponds to an index into cpu_set_t.
       if (CPU_ISSET(CurProcessor, &Affinity))
         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
     }
   }
   return CPU_COUNT(&Enabled);
 }
 #elif defined(__linux__) && defined(__s390x__)
 static int computeHostNumPhysicalCores() {
   return sysconf(_SC_NPROCESSORS_ONLN);
 }
 #elif defined(__linux__) && !defined(__ANDROID__)
 static int computeHostNumPhysicalCores() {
   cpu_set_t Affinity;
   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
     return CPU_COUNT(&Affinity);

   // The call to sched_getaffinity() may have failed because the Affinity
   // mask is too small for the number of CPU's on the system (i.e. the
   // system has more than 1024 CPUs). Allocate a mask large enough for
   // twice as many CPUs.
   cpu_set_t *DynAffinity;
   DynAffinity = CPU_ALLOC(2048);
   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
     int NumCPUs = CPU_COUNT(DynAffinity);
     CPU_FREE(DynAffinity);
     return NumCPUs;
   }
   return -1;
 }
 #elif defined(__APPLE__)
 // Gets the number of *physical cores* on the machine.
 static int computeHostNumPhysicalCores() {
   uint32_t count;
   size_t len = sizeof(count);
   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
   if (count < 1) {
     int nm[2];
     nm[0] = CTL_HW;
     nm[1] = HW_AVAILCPU;
     sysctl(nm, 2, &count, &len, NULL, 0);
     if (count < 1)
       return -1;
   }
   return count;
 }
 #elif defined(__MVS__)
 static int computeHostNumPhysicalCores() {
   enum {
     // Byte offset of the pointer to the Communications Vector Table (CVT) in
     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
     // will be zero-extended to uintptr_t.
     FLCCVT = 16,
     // Byte offset of the pointer to the Common System Data Area (CSD) in the
     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
     // uintptr_t.
     CVTCSD = 660,
     // Byte offset to the number of live CPs in the LPAR, stored as a signed
     // 32-bit value in the table.
     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
   };
   char *PSA = 0;
   char *CVT = reinterpret_cast<char *>(
       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
   char *CSD = reinterpret_cast<char *>(
       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
 }
 #else
 // On other systems, return -1 to indicate unknown.
 static int computeHostNumPhysicalCores() { return -1; }
 #endif

 int llvm::get_physical_cores() {
   static int NumCores = computeHostNumPhysicalCores();
   return NumCores;
 }
	//===- Unix/Threading.inc - Unix Threading Implementation ----- -- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file provides the Unix specific implementation of Threading functions.
	//
	//===----------------------------------------------------------------------===//

	#include "Unix.h"
	#include "llvm/ADT/ScopeExit.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/raw_ostream.h"

	#if defined(__APPLE__)
	#include <mach/mach_init.h>
	#include <mach/mach_port.h>
	#include <pthread/qos.h>
	#include <sys/sysctl.h>
	#include <sys/types.h>
	#endif

	#include <pthread.h>

	#if defined(__FreeBSD__) \|\| defined(__OpenBSD__)
	#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
	#endif

	#if defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
	#include <errno.h>
	#include <sys/cpuset.h>
	#include <sys/sysctl.h>
	#include <sys/user.h>
	#include <unistd.h>
	#endif

	#if defined(__NetBSD__)
	#include <lwp.h> // For _lwp_self()
	#endif

	#if defined(__OpenBSD__)
	#include <unistd.h> // For getthrid()
	#endif

	#if defined(__linux__)
	#include <sched.h> // For sched_getaffinity
	#include <sys/syscall.h> // For syscall codes
	#include <unistd.h> // For syscall()
	#endif

	namespace llvm {
	pthread_t
	llvm_execute_on_thread_impl(void (ThreadFunc)(void ), void Arg,
	std::optional<unsigned> StackSizeInBytes) {
	int errnum;

	// Construct the attributes object.
	pthread_attr_t Attr;
	if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
	ReportErrnumFatal("pthread_attr_init failed", errnum);
	}

	auto AttrGuard = llvm::make_scope_exit([&] {
	if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
	ReportErrnumFatal("pthread_attr_destroy failed", errnum);
	}
	});

	// Set the requested stack size, if given.
	if (StackSizeInBytes) {
	if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
	ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
	}
	}

	// Construct and execute the thread.
	pthread_t Thread;
	if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
	ReportErrnumFatal("pthread_create failed", errnum);

	return Thread;
	}

	void llvm_thread_detach_impl(pthread_t Thread) {
	int errnum;

	if ((errnum = ::pthread_detach(Thread)) != 0) {
	ReportErrnumFatal("pthread_detach failed", errnum);
	}
	}

	void llvm_thread_join_impl(pthread_t Thread) {
	int errnum;

	if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
	ReportErrnumFatal("pthread_join failed", errnum);
	}
	}

	pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }

	pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }

	} // namespace llvm

	uint64_t llvm::get_threadid() {
	#if defined(__APPLE__)
	// Calling "mach_thread_self()" bumps the reference count on the thread
	// port, so we need to deallocate it. mach_task_self() doesn't bump the ref
	// count.
	thread_port_t Self = mach_thread_self();
	mach_port_deallocate(mach_task_self(), Self);
	return Self;
	#elif defined(__FreeBSD__)
	return uint64_t(pthread_getthreadid_np());
	#elif defined(__NetBSD__)
	return uint64_t(_lwp_self());
	#elif defined(__OpenBSD__)
	return uint64_t(getthrid());
	#elif defined(__ANDROID__)
	return uint64_t(gettid());
	#elif defined(__linux__)
	return uint64_t(syscall(SYS_gettid));
	#else
	return uint64_t(pthread_self());
	#endif
	}

	static constexpr uint32_t get_max_thread_name_length_impl() {
	#if defined(__NetBSD__)
	return PTHREAD_MAX_NAMELEN_NP;
	#elif defined(__APPLE__)
	return 64;
	#elif defined(__linux__)
	#if HAVE_PTHREAD_SETNAME_NP
	return 16;
	#else
	return 0;
	#endif
	#elif defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
	return 16;
	#elif defined(__OpenBSD__)
	return 32;
	#else
	return 0;
	#endif
	}

	uint32_t llvm::get_max_thread_name_length() {
	return get_max_thread_name_length_impl();
	}

	void llvm::set_thread_name(const Twine &Name) {
	// Make sure the input is null terminated.
	SmallString<64> Storage;
	StringRef NameStr = Name.toNullTerminatedStringRef(Storage);

	// Truncate from the beginning, not the end, if the specified name is too
	// long. For one, this ensures that the resulting string is still null
	// terminated, but additionally the end of a long thread name will usually
	// be more unique than the beginning, since a common pattern is for similar
	// threads to share a common prefix.
	// Note that the name length includes the null terminator.
	if (get_max_thread_name_length() > 0)
	NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
	(void)NameStr;
	#if defined(__linux__)
	#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) \|\| defined(__ANDROID__)
	#if HAVE_PTHREAD_SETNAME_NP
	::pthread_setname_np(::pthread_self(), NameStr.data());
	#endif
	#endif
	#elif defined(__FreeBSD__) \|\| defined(__OpenBSD__)
	::pthread_set_name_np(::pthread_self(), NameStr.data());
	#elif defined(__NetBSD__)
	::pthread_setname_np(::pthread_self(), "%s",
	const_cast<char *>(NameStr.data()));
	#elif defined(__APPLE__)
	::pthread_setname_np(NameStr.data());
	#endif
	}

	void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
	Name.clear();

	#if defined(__FreeBSD__) \|\| defined(__FreeBSD_kernel__)
	int pid = ::getpid();
	uint64_t tid = get_threadid();

	struct kinfo_proc kp = nullptr, nkp;
	size_t len = 0;
	int error;
	int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID \| KERN_PROC_INC_THREAD,
	(int)pid};

	while (1) {
	error = sysctl(ctl, 4, kp, &len, nullptr, 0);
	if (kp == nullptr \|\| (error != 0 && errno == ENOMEM)) {
	// Add extra space in case threads are added before next call.
	len += sizeof(*kp) + len / 10;
	nkp = (struct kinfo_proc *)::realloc(kp, len);
	if (nkp == nullptr) {
	free(kp);
	return;
	}
	kp = nkp;
	continue;
	}
	if (error != 0)
	len = 0;
	break;
	}

	for (size_t i = 0; i < len / sizeof(*kp); i++) {
	if (kp[i].ki_tid == (lwpid_t)tid) {
	Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
	break;
	}
	}
	free(kp);
	return;
	#elif defined(__NetBSD__)
	constexpr uint32_t len = get_max_thread_name_length_impl();
	char buf[len];
	::pthread_getname_np(::pthread_self(), buf, len);

	Name.append(buf, buf + strlen(buf));
	#elif defined(__OpenBSD__)
	constexpr uint32_t len = get_max_thread_name_length_impl();
	char buf[len];
	::pthread_get_name_np(::pthread_self(), buf, len);

	Name.append(buf, buf + strlen(buf));
	#elif defined(__linux__)
	#if HAVE_PTHREAD_GETNAME_NP
	constexpr uint32_t len = get_max_thread_name_length_impl();
	char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
	if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
	Name.append(Buffer, Buffer + strlen(Buffer));
	#endif
	#endif
	}

	SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
	#if defined(__linux__) && defined(SCHED_IDLE)
	// Some really old glibcs are missing SCHED_IDLE.
	// http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
	// http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
	sched_param priority;
	// For each of the above policies, param->sched_priority must be 0.
	priority.sched_priority = 0;
	// SCHED_IDLE for running very low priority background jobs.
	// SCHED_OTHER the standard round-robin time-sharing policy;
	return !pthread_setschedparam(
	pthread_self(),
	// FIXME: consider SCHED_BATCH for Low
	Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
	&priority)
	? SetThreadPriorityResult::SUCCESS
	: SetThreadPriorityResult::FAILURE;
	#elif defined(__APPLE__)
	// https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
	//
	// Background - Applies to work that isn’t visible to the user and may take
	// significant time to complete. Examples include indexing, backing up, or
	// synchronizing data. This class emphasizes energy efficiency.
	//
	// Utility - Applies to work that takes anywhere from a few seconds to a few
	// minutes to complete. Examples include downloading a document or importing
	// data. This class offers a balance between responsiveness, performance, and
	// energy efficiency.
	const auto qosClass = [&]() {
	switch (Priority) {
	case ThreadPriority::Background:
	return QOS_CLASS_BACKGROUND;
	case ThreadPriority::Low:
	return QOS_CLASS_UTILITY;
	case ThreadPriority::Default:
	return QOS_CLASS_DEFAULT;
	}
	}();
	return !pthread_set_qos_class_self_np(qosClass, 0)
	? SetThreadPriorityResult::SUCCESS
	: SetThreadPriorityResult::FAILURE;
	#endif
	return SetThreadPriorityResult::FAILURE;
	}

	#include <thread>

	static int computeHostNumHardwareThreads() {
	#if defined(__FreeBSD__)
	cpuset_t mask;
	CPU_ZERO(&mask);
	if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
	&mask) == 0)
	return CPU_COUNT(&mask);
	#elif defined(__linux__)
	cpu_set_t Set;
	if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
	return CPU_COUNT(&Set);
	#endif
	// Guard against std::thread::hardware_concurrency() returning 0.
	if (unsigned Val = std::thread::hardware_concurrency())
	return Val;
	return 1;
	}

	void llvm::ThreadPoolStrategy::apply_thread_strategy(
	unsigned ThreadPoolNum) const {}

	llvm::BitVector llvm::get_thread_affinity_mask() {
	// FIXME: Implement
	llvm_unreachable("Not implemented!");
	}

	unsigned llvm::get_cpus() { return 1; }

	#if defined(__linux__) && (defined(__i386__) \|\| defined(__x86_64__))
	// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
	// using the number of unique physical/core id pairs. The following
	// implementation reads the /proc/cpuinfo format on an x86_64 system.
	static int computeHostNumPhysicalCores() {
	// Enabled represents the number of physical id/core id pairs with at least
	// one processor id enabled by the CPU affinity mask.
	cpu_set_t Affinity, Enabled;
	if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
	return -1;
	CPU_ZERO(&Enabled);

	// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
	// mmapped because it appears to have 0 size.
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
	llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
	if (std::error_code EC = Text.getError()) {
	llvm::errs() << "Can't read "
	<< "/proc/cpuinfo: " << EC.message() << "\n";
	return -1;
	}
	SmallVector<StringRef, 8> strs;
	(Text)->getBuffer().split(strs, "\n", /MaxSplit=*/-1,
	/KeepEmpty=/false);
	int CurProcessor = -1;
	int CurPhysicalId = -1;
	int CurSiblings = -1;
	int CurCoreId = -1;
	for (StringRef Line : strs) {
	std::pair<StringRef, StringRef> Data = Line.split(':');
	auto Name = Data.first.trim();
	auto Val = Data.second.trim();
	// These fields are available if the kernel is configured with CONFIG_SMP.
	if (Name == "processor")
	Val.getAsInteger(10, CurProcessor);
	else if (Name == "physical id")
	Val.getAsInteger(10, CurPhysicalId);
	else if (Name == "siblings")
	Val.getAsInteger(10, CurSiblings);
	else if (Name == "core id") {
	Val.getAsInteger(10, CurCoreId);
	// The processor id corresponds to an index into cpu_set_t.
	if (CPU_ISSET(CurProcessor, &Affinity))
	CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
	}
	}
	return CPU_COUNT(&Enabled);
	}
	#elif defined(__linux__) && defined(__s390x__)
	static int computeHostNumPhysicalCores() {
	return sysconf(_SC_NPROCESSORS_ONLN);
	}
	#elif defined(__linux__) && !defined(__ANDROID__)
	static int computeHostNumPhysicalCores() {
	cpu_set_t Affinity;
	if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
	return CPU_COUNT(&Affinity);

	// The call to sched_getaffinity() may have failed because the Affinity
	// mask is too small for the number of CPU's on the system (i.e. the
	// system has more than 1024 CPUs). Allocate a mask large enough for
	// twice as many CPUs.
	cpu_set_t *DynAffinity;
	DynAffinity = CPU_ALLOC(2048);
	if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
	int NumCPUs = CPU_COUNT(DynAffinity);
	CPU_FREE(DynAffinity);
	return NumCPUs;
	}
	return -1;
	}
	#elif defined(__APPLE__)
	// Gets the number of physical cores on the machine.
	static int computeHostNumPhysicalCores() {
	uint32_t count;
	size_t len = sizeof(count);
	sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
	if (count < 1) {
	int nm[2];
	nm[0] = CTL_HW;
	nm[1] = HW_AVAILCPU;
	sysctl(nm, 2, &count, &len, NULL, 0);
	if (count < 1)
	return -1;
	}
	return count;
	}
	#elif defined(__MVS__)
	static int computeHostNumPhysicalCores() {
	enum {
	// Byte offset of the pointer to the Communications Vector Table (CVT) in
	// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
	// will be zero-extended to uintptr_t.
	FLCCVT = 16,
	// Byte offset of the pointer to the Common System Data Area (CSD) in the
	// CVT. The table entry is a 31-bit pointer and will be zero-extended to
	// uintptr_t.
	CVTCSD = 660,
	// Byte offset to the number of live CPs in the LPAR, stored as a signed
	// 32-bit value in the table.
	CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
	};
	char *PSA = 0;
	char CVT = reinterpret_cast<char >(
	static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
	char CSD = reinterpret_cast<char >(
	static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
	return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
	}
	#else
	// On other systems, return -1 to indicate unknown.
	static int computeHostNumPhysicalCores() { return -1; }
	#endif

	int llvm::get_physical_cores() {
	static int NumCores = computeHostNumPhysicalCores();
	return NumCores;
	}