blob: 819748db4ec21e8a052accd854d9b118cc2e8fcf [file] [log] [blame]
//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the Unix specific implementation of Threading functions.
//
//===----------------------------------------------------------------------===//
#include "Unix.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#if defined(__APPLE__)
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <pthread/qos.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif
#include <pthread.h>
#if defined(__FreeBSD__) || defined(__OpenBSD__)
#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <errno.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <sys/user.h>
#include <unistd.h>
#endif
#if defined(__NetBSD__)
#include <lwp.h> // For _lwp_self()
#endif
#if defined(__OpenBSD__)
#include <unistd.h> // For getthrid()
#endif
#if defined(__linux__)
#include <sched.h> // For sched_getaffinity
#include <sys/syscall.h> // For syscall codes
#include <unistd.h> // For syscall()
#endif
namespace llvm {
pthread_t
llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
std::optional<unsigned> StackSizeInBytes) {
int errnum;
// Construct the attributes object.
pthread_attr_t Attr;
if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
ReportErrnumFatal("pthread_attr_init failed", errnum);
}
auto AttrGuard = llvm::make_scope_exit([&] {
if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
ReportErrnumFatal("pthread_attr_destroy failed", errnum);
}
});
// Set the requested stack size, if given.
if (StackSizeInBytes) {
if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
}
}
// Construct and execute the thread.
pthread_t Thread;
if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
ReportErrnumFatal("pthread_create failed", errnum);
return Thread;
}
void llvm_thread_detach_impl(pthread_t Thread) {
int errnum;
if ((errnum = ::pthread_detach(Thread)) != 0) {
ReportErrnumFatal("pthread_detach failed", errnum);
}
}
void llvm_thread_join_impl(pthread_t Thread) {
int errnum;
if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
ReportErrnumFatal("pthread_join failed", errnum);
}
}
pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
} // namespace llvm
uint64_t llvm::get_threadid() {
#if defined(__APPLE__)
// Calling "mach_thread_self()" bumps the reference count on the thread
// port, so we need to deallocate it. mach_task_self() doesn't bump the ref
// count.
thread_port_t Self = mach_thread_self();
mach_port_deallocate(mach_task_self(), Self);
return Self;
#elif defined(__FreeBSD__)
return uint64_t(pthread_getthreadid_np());
#elif defined(__NetBSD__)
return uint64_t(_lwp_self());
#elif defined(__OpenBSD__)
return uint64_t(getthrid());
#elif defined(__ANDROID__)
return uint64_t(gettid());
#elif defined(__linux__)
return uint64_t(syscall(SYS_gettid));
#else
return uint64_t(pthread_self());
#endif
}
static constexpr uint32_t get_max_thread_name_length_impl() {
#if defined(__NetBSD__)
return PTHREAD_MAX_NAMELEN_NP;
#elif defined(__APPLE__)
return 64;
#elif defined(__linux__)
#if HAVE_PTHREAD_SETNAME_NP
return 16;
#else
return 0;
#endif
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
return 16;
#elif defined(__OpenBSD__)
return 32;
#else
return 0;
#endif
}
uint32_t llvm::get_max_thread_name_length() {
return get_max_thread_name_length_impl();
}
void llvm::set_thread_name(const Twine &Name) {
// Make sure the input is null terminated.
SmallString<64> Storage;
StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
// Truncate from the beginning, not the end, if the specified name is too
// long. For one, this ensures that the resulting string is still null
// terminated, but additionally the end of a long thread name will usually
// be more unique than the beginning, since a common pattern is for similar
// threads to share a common prefix.
// Note that the name length includes the null terminator.
if (get_max_thread_name_length() > 0)
NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
(void)NameStr;
#if defined(__linux__)
#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
#if HAVE_PTHREAD_SETNAME_NP
::pthread_setname_np(::pthread_self(), NameStr.data());
#endif
#endif
#elif defined(__FreeBSD__) || defined(__OpenBSD__)
::pthread_set_name_np(::pthread_self(), NameStr.data());
#elif defined(__NetBSD__)
::pthread_setname_np(::pthread_self(), "%s",
const_cast<char *>(NameStr.data()));
#elif defined(__APPLE__)
::pthread_setname_np(NameStr.data());
#endif
}
void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
Name.clear();
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
int pid = ::getpid();
uint64_t tid = get_threadid();
struct kinfo_proc *kp = nullptr, *nkp;
size_t len = 0;
int error;
int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
(int)pid};
while (1) {
error = sysctl(ctl, 4, kp, &len, nullptr, 0);
if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
// Add extra space in case threads are added before next call.
len += sizeof(*kp) + len / 10;
nkp = (struct kinfo_proc *)::realloc(kp, len);
if (nkp == nullptr) {
free(kp);
return;
}
kp = nkp;
continue;
}
if (error != 0)
len = 0;
break;
}
for (size_t i = 0; i < len / sizeof(*kp); i++) {
if (kp[i].ki_tid == (lwpid_t)tid) {
Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
break;
}
}
free(kp);
return;
#elif defined(__NetBSD__)
constexpr uint32_t len = get_max_thread_name_length_impl();
char buf[len];
::pthread_getname_np(::pthread_self(), buf, len);
Name.append(buf, buf + strlen(buf));
#elif defined(__OpenBSD__)
constexpr uint32_t len = get_max_thread_name_length_impl();
char buf[len];
::pthread_get_name_np(::pthread_self(), buf, len);
Name.append(buf, buf + strlen(buf));
#elif defined(__linux__)
#if HAVE_PTHREAD_GETNAME_NP
constexpr uint32_t len = get_max_thread_name_length_impl();
char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
Name.append(Buffer, Buffer + strlen(Buffer));
#endif
#endif
}
SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
#if defined(__linux__) && defined(SCHED_IDLE)
// Some *really* old glibcs are missing SCHED_IDLE.
// http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
// http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
sched_param priority;
// For each of the above policies, param->sched_priority must be 0.
priority.sched_priority = 0;
// SCHED_IDLE for running very low priority background jobs.
// SCHED_OTHER the standard round-robin time-sharing policy;
return !pthread_setschedparam(
pthread_self(),
// FIXME: consider SCHED_BATCH for Low
Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
&priority)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#elif defined(__APPLE__)
// https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
//
// Background - Applies to work that isn’t visible to the user and may take
// significant time to complete. Examples include indexing, backing up, or
// synchronizing data. This class emphasizes energy efficiency.
//
// Utility - Applies to work that takes anywhere from a few seconds to a few
// minutes to complete. Examples include downloading a document or importing
// data. This class offers a balance between responsiveness, performance, and
// energy efficiency.
const auto qosClass = [&]() {
switch (Priority) {
case ThreadPriority::Background:
return QOS_CLASS_BACKGROUND;
case ThreadPriority::Low:
return QOS_CLASS_UTILITY;
case ThreadPriority::Default:
return QOS_CLASS_DEFAULT;
}
}();
return !pthread_set_qos_class_self_np(qosClass, 0)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#endif
return SetThreadPriorityResult::FAILURE;
}
#include <thread>
static int computeHostNumHardwareThreads() {
#if defined(__FreeBSD__)
cpuset_t mask;
CPU_ZERO(&mask);
if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
&mask) == 0)
return CPU_COUNT(&mask);
#elif defined(__linux__)
cpu_set_t Set;
if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
return CPU_COUNT(&Set);
#endif
// Guard against std::thread::hardware_concurrency() returning 0.
if (unsigned Val = std::thread::hardware_concurrency())
return Val;
return 1;
}
void llvm::ThreadPoolStrategy::apply_thread_strategy(
unsigned ThreadPoolNum) const {}
llvm::BitVector llvm::get_thread_affinity_mask() {
// FIXME: Implement
llvm_unreachable("Not implemented!");
}
unsigned llvm::get_cpus() { return 1; }
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
// using the number of unique physical/core id pairs. The following
// implementation reads the /proc/cpuinfo format on an x86_64 system.
static int computeHostNumPhysicalCores() {
// Enabled represents the number of physical id/core id pairs with at least
// one processor id enabled by the CPU affinity mask.
cpu_set_t Affinity, Enabled;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
return -1;
CPU_ZERO(&Enabled);
// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
// mmapped because it appears to have 0 size.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Can't read "
<< "/proc/cpuinfo: " << EC.message() << "\n";
return -1;
}
SmallVector<StringRef, 8> strs;
(*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
int CurProcessor = -1;
int CurPhysicalId = -1;
int CurSiblings = -1;
int CurCoreId = -1;
for (StringRef Line : strs) {
std::pair<StringRef, StringRef> Data = Line.split(':');
auto Name = Data.first.trim();
auto Val = Data.second.trim();
// These fields are available if the kernel is configured with CONFIG_SMP.
if (Name == "processor")
Val.getAsInteger(10, CurProcessor);
else if (Name == "physical id")
Val.getAsInteger(10, CurPhysicalId);
else if (Name == "siblings")
Val.getAsInteger(10, CurSiblings);
else if (Name == "core id") {
Val.getAsInteger(10, CurCoreId);
// The processor id corresponds to an index into cpu_set_t.
if (CPU_ISSET(CurProcessor, &Affinity))
CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
}
}
return CPU_COUNT(&Enabled);
}
#elif defined(__linux__) && defined(__s390x__)
static int computeHostNumPhysicalCores() {
return sysconf(_SC_NPROCESSORS_ONLN);
}
#elif defined(__linux__) && !defined(__ANDROID__)
static int computeHostNumPhysicalCores() {
cpu_set_t Affinity;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
return CPU_COUNT(&Affinity);
// The call to sched_getaffinity() may have failed because the Affinity
// mask is too small for the number of CPU's on the system (i.e. the
// system has more than 1024 CPUs). Allocate a mask large enough for
// twice as many CPUs.
cpu_set_t *DynAffinity;
DynAffinity = CPU_ALLOC(2048);
if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
int NumCPUs = CPU_COUNT(DynAffinity);
CPU_FREE(DynAffinity);
return NumCPUs;
}
return -1;
}
#elif defined(__APPLE__)
// Gets the number of *physical cores* on the machine.
static int computeHostNumPhysicalCores() {
uint32_t count;
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
if (count < 1) {
int nm[2];
nm[0] = CTL_HW;
nm[1] = HW_AVAILCPU;
sysctl(nm, 2, &count, &len, NULL, 0);
if (count < 1)
return -1;
}
return count;
}
#elif defined(__MVS__)
static int computeHostNumPhysicalCores() {
enum {
// Byte offset of the pointer to the Communications Vector Table (CVT) in
// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
// will be zero-extended to uintptr_t.
FLCCVT = 16,
// Byte offset of the pointer to the Common System Data Area (CSD) in the
// CVT. The table entry is a 31-bit pointer and will be zero-extended to
// uintptr_t.
CVTCSD = 660,
// Byte offset to the number of live CPs in the LPAR, stored as a signed
// 32-bit value in the table.
CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
};
char *PSA = 0;
char *CVT = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
char *CSD = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
}
#else
// On other systems, return -1 to indicate unknown.
static int computeHostNumPhysicalCores() { return -1; }
#endif
int llvm::get_physical_cores() {
static int NumCores = computeHostNumPhysicalCores();
return NumCores;
}