| //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file provides the Unix specific implementation of Threading functions. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "Unix.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #if defined(__APPLE__) |
| #include <mach/mach_init.h> |
| #include <mach/mach_port.h> |
| #include <pthread/qos.h> |
| #include <sys/sysctl.h> |
| #include <sys/types.h> |
| #endif |
| |
| #include <pthread.h> |
| |
| #if defined(__FreeBSD__) || defined(__OpenBSD__) |
| #include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np() |
| #endif |
| |
| #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
| #include <errno.h> |
| #include <sys/cpuset.h> |
| #include <sys/sysctl.h> |
| #include <sys/user.h> |
| #include <unistd.h> |
| #endif |
| |
| #if defined(__NetBSD__) |
| #include <lwp.h> // For _lwp_self() |
| #endif |
| |
| #if defined(__OpenBSD__) |
| #include <unistd.h> // For getthrid() |
| #endif |
| |
| #if defined(__linux__) |
| #include <sched.h> // For sched_getaffinity |
| #include <sys/syscall.h> // For syscall codes |
| #include <unistd.h> // For syscall() |
| #endif |
| |
| namespace llvm { |
| pthread_t |
| llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg, |
| std::optional<unsigned> StackSizeInBytes) { |
| int errnum; |
| |
| // Construct the attributes object. |
| pthread_attr_t Attr; |
| if ((errnum = ::pthread_attr_init(&Attr)) != 0) { |
| ReportErrnumFatal("pthread_attr_init failed", errnum); |
| } |
| |
| auto AttrGuard = llvm::make_scope_exit([&] { |
| if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) { |
| ReportErrnumFatal("pthread_attr_destroy failed", errnum); |
| } |
| }); |
| |
| // Set the requested stack size, if given. |
| if (StackSizeInBytes) { |
| if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) { |
| ReportErrnumFatal("pthread_attr_setstacksize failed", errnum); |
| } |
| } |
| |
| // Construct and execute the thread. |
| pthread_t Thread; |
| if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0) |
| ReportErrnumFatal("pthread_create failed", errnum); |
| |
| return Thread; |
| } |
| |
| void llvm_thread_detach_impl(pthread_t Thread) { |
| int errnum; |
| |
| if ((errnum = ::pthread_detach(Thread)) != 0) { |
| ReportErrnumFatal("pthread_detach failed", errnum); |
| } |
| } |
| |
| void llvm_thread_join_impl(pthread_t Thread) { |
| int errnum; |
| |
| if ((errnum = ::pthread_join(Thread, nullptr)) != 0) { |
| ReportErrnumFatal("pthread_join failed", errnum); |
| } |
| } |
| |
| pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; } |
| |
| pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); } |
| |
| } // namespace llvm |
| |
| uint64_t llvm::get_threadid() { |
| #if defined(__APPLE__) |
| // Calling "mach_thread_self()" bumps the reference count on the thread |
| // port, so we need to deallocate it. mach_task_self() doesn't bump the ref |
| // count. |
| thread_port_t Self = mach_thread_self(); |
| mach_port_deallocate(mach_task_self(), Self); |
| return Self; |
| #elif defined(__FreeBSD__) |
| return uint64_t(pthread_getthreadid_np()); |
| #elif defined(__NetBSD__) |
| return uint64_t(_lwp_self()); |
| #elif defined(__OpenBSD__) |
| return uint64_t(getthrid()); |
| #elif defined(__ANDROID__) |
| return uint64_t(gettid()); |
| #elif defined(__linux__) |
| return uint64_t(syscall(SYS_gettid)); |
| #else |
| return uint64_t(pthread_self()); |
| #endif |
| } |
| |
| static constexpr uint32_t get_max_thread_name_length_impl() { |
| #if defined(__NetBSD__) |
| return PTHREAD_MAX_NAMELEN_NP; |
| #elif defined(__APPLE__) |
| return 64; |
| #elif defined(__linux__) |
| #if HAVE_PTHREAD_SETNAME_NP |
| return 16; |
| #else |
| return 0; |
| #endif |
| #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
| return 16; |
| #elif defined(__OpenBSD__) |
| return 32; |
| #else |
| return 0; |
| #endif |
| } |
| |
| uint32_t llvm::get_max_thread_name_length() { |
| return get_max_thread_name_length_impl(); |
| } |
| |
| void llvm::set_thread_name(const Twine &Name) { |
| // Make sure the input is null terminated. |
| SmallString<64> Storage; |
| StringRef NameStr = Name.toNullTerminatedStringRef(Storage); |
| |
| // Truncate from the beginning, not the end, if the specified name is too |
| // long. For one, this ensures that the resulting string is still null |
| // terminated, but additionally the end of a long thread name will usually |
| // be more unique than the beginning, since a common pattern is for similar |
| // threads to share a common prefix. |
| // Note that the name length includes the null terminator. |
| if (get_max_thread_name_length() > 0) |
| NameStr = NameStr.take_back(get_max_thread_name_length() - 1); |
| (void)NameStr; |
| #if defined(__linux__) |
| #if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__) |
| #if HAVE_PTHREAD_SETNAME_NP |
| ::pthread_setname_np(::pthread_self(), NameStr.data()); |
| #endif |
| #endif |
| #elif defined(__FreeBSD__) || defined(__OpenBSD__) |
| ::pthread_set_name_np(::pthread_self(), NameStr.data()); |
| #elif defined(__NetBSD__) |
| ::pthread_setname_np(::pthread_self(), "%s", |
| const_cast<char *>(NameStr.data())); |
| #elif defined(__APPLE__) |
| ::pthread_setname_np(NameStr.data()); |
| #endif |
| } |
| |
| void llvm::get_thread_name(SmallVectorImpl<char> &Name) { |
| Name.clear(); |
| |
| #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
| int pid = ::getpid(); |
| uint64_t tid = get_threadid(); |
| |
| struct kinfo_proc *kp = nullptr, *nkp; |
| size_t len = 0; |
| int error; |
| int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD, |
| (int)pid}; |
| |
| while (1) { |
| error = sysctl(ctl, 4, kp, &len, nullptr, 0); |
| if (kp == nullptr || (error != 0 && errno == ENOMEM)) { |
| // Add extra space in case threads are added before next call. |
| len += sizeof(*kp) + len / 10; |
| nkp = (struct kinfo_proc *)::realloc(kp, len); |
| if (nkp == nullptr) { |
| free(kp); |
| return; |
| } |
| kp = nkp; |
| continue; |
| } |
| if (error != 0) |
| len = 0; |
| break; |
| } |
| |
| for (size_t i = 0; i < len / sizeof(*kp); i++) { |
| if (kp[i].ki_tid == (lwpid_t)tid) { |
| Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname)); |
| break; |
| } |
| } |
| free(kp); |
| return; |
| #elif defined(__NetBSD__) |
| constexpr uint32_t len = get_max_thread_name_length_impl(); |
| char buf[len]; |
| ::pthread_getname_np(::pthread_self(), buf, len); |
| |
| Name.append(buf, buf + strlen(buf)); |
| #elif defined(__OpenBSD__) |
| constexpr uint32_t len = get_max_thread_name_length_impl(); |
| char buf[len]; |
| ::pthread_get_name_np(::pthread_self(), buf, len); |
| |
| Name.append(buf, buf + strlen(buf)); |
| #elif defined(__linux__) |
| #if HAVE_PTHREAD_GETNAME_NP |
| constexpr uint32_t len = get_max_thread_name_length_impl(); |
| char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. |
| if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) |
| Name.append(Buffer, Buffer + strlen(Buffer)); |
| #endif |
| #endif |
| } |
| |
| SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { |
| #if defined(__linux__) && defined(SCHED_IDLE) |
| // Some *really* old glibcs are missing SCHED_IDLE. |
| // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html |
| // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html |
| sched_param priority; |
| // For each of the above policies, param->sched_priority must be 0. |
| priority.sched_priority = 0; |
| // SCHED_IDLE for running very low priority background jobs. |
| // SCHED_OTHER the standard round-robin time-sharing policy; |
| return !pthread_setschedparam( |
| pthread_self(), |
| // FIXME: consider SCHED_BATCH for Low |
| Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE, |
| &priority) |
| ? SetThreadPriorityResult::SUCCESS |
| : SetThreadPriorityResult::FAILURE; |
| #elif defined(__APPLE__) |
| // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon |
| // |
| // Background - Applies to work that isn’t visible to the user and may take |
| // significant time to complete. Examples include indexing, backing up, or |
| // synchronizing data. This class emphasizes energy efficiency. |
| // |
| // Utility - Applies to work that takes anywhere from a few seconds to a few |
| // minutes to complete. Examples include downloading a document or importing |
| // data. This class offers a balance between responsiveness, performance, and |
| // energy efficiency. |
| const auto qosClass = [&]() { |
| switch (Priority) { |
| case ThreadPriority::Background: |
| return QOS_CLASS_BACKGROUND; |
| case ThreadPriority::Low: |
| return QOS_CLASS_UTILITY; |
| case ThreadPriority::Default: |
| return QOS_CLASS_DEFAULT; |
| } |
| }(); |
| return !pthread_set_qos_class_self_np(qosClass, 0) |
| ? SetThreadPriorityResult::SUCCESS |
| : SetThreadPriorityResult::FAILURE; |
| #endif |
| return SetThreadPriorityResult::FAILURE; |
| } |
| |
| #include <thread> |
| |
| static int computeHostNumHardwareThreads() { |
| #if defined(__FreeBSD__) |
| cpuset_t mask; |
| CPU_ZERO(&mask); |
| if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask), |
| &mask) == 0) |
| return CPU_COUNT(&mask); |
| #elif defined(__linux__) |
| cpu_set_t Set; |
| if (sched_getaffinity(0, sizeof(Set), &Set) == 0) |
| return CPU_COUNT(&Set); |
| #endif |
| // Guard against std::thread::hardware_concurrency() returning 0. |
| if (unsigned Val = std::thread::hardware_concurrency()) |
| return Val; |
| return 1; |
| } |
| |
| void llvm::ThreadPoolStrategy::apply_thread_strategy( |
| unsigned ThreadPoolNum) const {} |
| |
| llvm::BitVector llvm::get_thread_affinity_mask() { |
| // FIXME: Implement |
| llvm_unreachable("Not implemented!"); |
| } |
| |
| unsigned llvm::get_cpus() { return 1; } |
| |
| #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) |
| // On Linux, the number of physical cores can be computed from /proc/cpuinfo, |
| // using the number of unique physical/core id pairs. The following |
| // implementation reads the /proc/cpuinfo format on an x86_64 system. |
| static int computeHostNumPhysicalCores() { |
| // Enabled represents the number of physical id/core id pairs with at least |
| // one processor id enabled by the CPU affinity mask. |
| cpu_set_t Affinity, Enabled; |
| if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) |
| return -1; |
| CPU_ZERO(&Enabled); |
| |
| // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be |
| // mmapped because it appears to have 0 size. |
| llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
| llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); |
| if (std::error_code EC = Text.getError()) { |
| llvm::errs() << "Can't read " |
| << "/proc/cpuinfo: " << EC.message() << "\n"; |
| return -1; |
| } |
| SmallVector<StringRef, 8> strs; |
| (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, |
| /*KeepEmpty=*/false); |
| int CurProcessor = -1; |
| int CurPhysicalId = -1; |
| int CurSiblings = -1; |
| int CurCoreId = -1; |
| for (StringRef Line : strs) { |
| std::pair<StringRef, StringRef> Data = Line.split(':'); |
| auto Name = Data.first.trim(); |
| auto Val = Data.second.trim(); |
| // These fields are available if the kernel is configured with CONFIG_SMP. |
| if (Name == "processor") |
| Val.getAsInteger(10, CurProcessor); |
| else if (Name == "physical id") |
| Val.getAsInteger(10, CurPhysicalId); |
| else if (Name == "siblings") |
| Val.getAsInteger(10, CurSiblings); |
| else if (Name == "core id") { |
| Val.getAsInteger(10, CurCoreId); |
| // The processor id corresponds to an index into cpu_set_t. |
| if (CPU_ISSET(CurProcessor, &Affinity)) |
| CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); |
| } |
| } |
| return CPU_COUNT(&Enabled); |
| } |
| #elif defined(__linux__) && defined(__s390x__) |
| static int computeHostNumPhysicalCores() { |
| return sysconf(_SC_NPROCESSORS_ONLN); |
| } |
| #elif defined(__linux__) && !defined(__ANDROID__) |
| static int computeHostNumPhysicalCores() { |
| cpu_set_t Affinity; |
| if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) |
| return CPU_COUNT(&Affinity); |
| |
| // The call to sched_getaffinity() may have failed because the Affinity |
| // mask is too small for the number of CPU's on the system (i.e. the |
| // system has more than 1024 CPUs). Allocate a mask large enough for |
| // twice as many CPUs. |
| cpu_set_t *DynAffinity; |
| DynAffinity = CPU_ALLOC(2048); |
| if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { |
| int NumCPUs = CPU_COUNT(DynAffinity); |
| CPU_FREE(DynAffinity); |
| return NumCPUs; |
| } |
| return -1; |
| } |
| #elif defined(__APPLE__) |
| // Gets the number of *physical cores* on the machine. |
| static int computeHostNumPhysicalCores() { |
| uint32_t count; |
| size_t len = sizeof(count); |
| sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); |
| if (count < 1) { |
| int nm[2]; |
| nm[0] = CTL_HW; |
| nm[1] = HW_AVAILCPU; |
| sysctl(nm, 2, &count, &len, NULL, 0); |
| if (count < 1) |
| return -1; |
| } |
| return count; |
| } |
| #elif defined(__MVS__) |
| static int computeHostNumPhysicalCores() { |
| enum { |
| // Byte offset of the pointer to the Communications Vector Table (CVT) in |
| // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and |
| // will be zero-extended to uintptr_t. |
| FLCCVT = 16, |
| // Byte offset of the pointer to the Common System Data Area (CSD) in the |
| // CVT. The table entry is a 31-bit pointer and will be zero-extended to |
| // uintptr_t. |
| CVTCSD = 660, |
| // Byte offset to the number of live CPs in the LPAR, stored as a signed |
| // 32-bit value in the table. |
| CSD_NUMBER_ONLINE_STANDARD_CPS = 264, |
| }; |
| char *PSA = 0; |
| char *CVT = reinterpret_cast<char *>( |
| static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT]))); |
| char *CSD = reinterpret_cast<char *>( |
| static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD]))); |
| return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); |
| } |
| #else |
| // On other systems, return -1 to indicate unknown. |
| static int computeHostNumPhysicalCores() { return -1; } |
| #endif |
| |
| int llvm::get_physical_cores() { |
| static int NumCores = computeHostNumPhysicalCores(); |
| return NumCores; |
| } |