mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-09 12:29:24 +00:00
Update hwloc for MSVC builds.
This commit is contained in:
parent
bbb19ea2f9
commit
f25e65b5ac
19 changed files with 314 additions and 255 deletions
70
src/3rdparty/hwloc/NEWS
vendored
70
src/3rdparty/hwloc/NEWS
vendored
|
@ -17,6 +17,76 @@ bug fixes (and other actions) for each version of hwloc since version
|
||||||
0.9.
|
0.9.
|
||||||
|
|
||||||
|
|
||||||
|
Version 2.7.0
|
||||||
|
-------------
|
||||||
|
* Backends
|
||||||
|
+ Add support for NUMA nodes and caches with more than 64 PUs across
|
||||||
|
multiple processor groups on Windows 11 and Windows Server 2022.
|
||||||
|
+ Group objects are not created for Windows processor groups anymore,
|
||||||
|
except if HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS=1 in the environment.
|
||||||
|
+ Expose "Cluster" group objects on Linux kernel 5.16+ for CPUs
|
||||||
|
that share some internal cache or bus. This can be equivalent
|
||||||
|
to the L2 Cache level on some platforms (e.g. x86) or a specific
|
||||||
|
level between L2 and L3 on others (e.g. ARM Kungpeng 920).
|
||||||
|
Thanks to Jonathan Cameron for the help.
|
||||||
|
- HWLOC_DONT_MERGE_CLUSTER_GROUPS=1 may be set in the environment
|
||||||
|
to prevent these groups from being merged with identical caches, etc.
|
||||||
|
+ Improve the oneAPI LevelZero backend:
|
||||||
|
- Expose subdevices such as "ze0.1" inside root OS devices ("ze0")
|
||||||
|
when the hardware contains multiple subdevices.
|
||||||
|
- Add many new attributes to describe device type, and the
|
||||||
|
numbers of slices, subslices, execution units and threads.
|
||||||
|
- Expose the memory information as LevelZeroHBM/DDR/MemorySize infos.
|
||||||
|
+ Ignore the max frequencies of cores in Linux cpukinds when the
|
||||||
|
base frequencies are available (to avoid exposing hybrid CPUs
|
||||||
|
when Intel Turbo Boost Max 3.0 gives slightly different max
|
||||||
|
frequencies to CPU cores).
|
||||||
|
- May be reverted by setting HWLOC_CPUKINDS_MAXFREQ=1 in the environment.
|
||||||
|
* Tools
|
||||||
|
+ Add --grey and --palette options to switch lstopo to greyscale or
|
||||||
|
white-background-only graphics, or to tune individual colors.
|
||||||
|
* Build
|
||||||
|
+ Windows CMake builds now support non-MSVC compilers, detect several
|
||||||
|
features at build time, can build/run tests, etc.
|
||||||
|
Thanks to Michael Hirsch and Alexander Neumann .
|
||||||
|
|
||||||
|
|
||||||
|
Version 2.6.0
|
||||||
|
-------------
|
||||||
|
* Backends
|
||||||
|
+ Expose two cpukinds for energy-efficient cores (icestorm) and
|
||||||
|
high-performance cores (firestorm) on Apple M1 on Mac OS X.
|
||||||
|
+ Use sysfs CPU "capacity" to rank hybrid cores by efficiency
|
||||||
|
on Linux when available (mostly on recent ARM platforms for now).
|
||||||
|
+ Improve HWLOC_MEMBIND_BIND (without the STRICT flag) on Linux kernel
|
||||||
|
>= 5.15: If more than one node is given, the kernel may now use all
|
||||||
|
of them instead of only the first one before falling back to others.
|
||||||
|
+ Expose cache os_index when available on Linux, it may be needed
|
||||||
|
when using resctrl to configure cache partitioning, memory bandwidth
|
||||||
|
monitoring, etc.
|
||||||
|
+ Add a "XGMIHops" distances matrix in the RSMI backend for AMD GPU
|
||||||
|
interconnected through XGMI links.
|
||||||
|
+ Expose AMD GPU memory information (VRAM and GTT) in the RSMI backend.
|
||||||
|
+ Add OS devices such as "bxi0" for Atos/Bull BXI HCAs on Linux.
|
||||||
|
* Tools
|
||||||
|
+ lstopo has a better placement algorithm with respect to I/O
|
||||||
|
objects, see --children-order in the manpage for details.
|
||||||
|
+ hwloc-annotate may now change object subtypes and cache or memory
|
||||||
|
sizes.
|
||||||
|
* Build
|
||||||
|
+ Allow to specify the ROCm installation for building the RSMI backend:
|
||||||
|
- Use a custom installation path if specified with --with-rocm=<dir>.
|
||||||
|
- Use /opt/rocm-<version> if specified with --with-rocm-version=<version>
|
||||||
|
or the ROCM_VERSION environment variable.
|
||||||
|
- Try /opt/rocm if it exists.
|
||||||
|
- See "How do I enable ROCm SMI and select which version to use?"
|
||||||
|
in the FAQ for details.
|
||||||
|
+ Add a CMakeLists for Windows under contrib/windows-cmake/ .
|
||||||
|
* Documentation
|
||||||
|
+ Add FAQ entry "How do I create a custom heterogeneous and
|
||||||
|
asymmetric topology?"
|
||||||
|
|
||||||
|
|
||||||
Version 2.5.0
|
Version 2.5.0
|
||||||
-------------
|
-------------
|
||||||
* API
|
* API
|
||||||
|
|
6
src/3rdparty/hwloc/VERSION
vendored
6
src/3rdparty/hwloc/VERSION
vendored
|
@ -8,7 +8,7 @@
|
||||||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||||
|
|
||||||
major=2
|
major=2
|
||||||
minor=5
|
minor=7
|
||||||
release=0
|
release=0
|
||||||
|
|
||||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||||
|
@ -22,7 +22,7 @@ greek=
|
||||||
|
|
||||||
# The date when this release was created
|
# The date when this release was created
|
||||||
|
|
||||||
date="Jun 14, 2021"
|
date="Dec 06, 2021"
|
||||||
|
|
||||||
# If snapshot=1, then use the value from snapshot_version as the
|
# If snapshot=1, then use the value from snapshot_version as the
|
||||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||||
|
@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
||||||
# 2. Version numbers are described in the Libtool current:revision:age
|
# 2. Version numbers are described in the Libtool current:revision:age
|
||||||
# format.
|
# format.
|
||||||
|
|
||||||
libhwloc_so_version=20:0:5
|
libhwloc_so_version=20:2:5
|
||||||
libnetloc_so_version=0:0:0
|
libnetloc_so_version=0:0:0
|
||||||
|
|
||||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||||
|
|
45
src/3rdparty/hwloc/include/hwloc.h
vendored
45
src/3rdparty/hwloc/include/hwloc.h
vendored
|
@ -346,7 +346,8 @@ typedef enum hwloc_obj_osdev_type_e {
|
||||||
* For instance the "eth0" interface on Linux. */
|
* For instance the "eth0" interface on Linux. */
|
||||||
HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device.
|
HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device.
|
||||||
* For instance the "mlx4_0" InfiniBand HCA,
|
* For instance the "mlx4_0" InfiniBand HCA,
|
||||||
* or "hfi1_0" Omni-Path interface on Linux. */
|
* "hfi1_0" Omni-Path interface,
|
||||||
|
* or "bxi0" Atos/Bull BXI HCA on Linux. */
|
||||||
HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device.
|
HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device.
|
||||||
* For instance the "dma0chan0" DMA channel on Linux. */
|
* For instance the "dma0chan0" DMA channel on Linux. */
|
||||||
HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device.
|
HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device.
|
||||||
|
@ -1212,8 +1213,9 @@ HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpus
|
||||||
|
|
||||||
/** \brief Get current process or thread binding.
|
/** \brief Get current process or thread binding.
|
||||||
*
|
*
|
||||||
* Writes into \p set the physical cpuset which the process or thread (according to \e
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
* flags) was last bound to.
|
* is filled with the list of PUs which the process or
|
||||||
|
* thread (according to \e flags) was last bound to.
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
|
HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
|
||||||
|
|
||||||
|
@ -1231,6 +1233,10 @@ HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t s
|
||||||
HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
|
HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
|
||||||
|
|
||||||
/** \brief Get the current physical binding of process \p pid.
|
/** \brief Get the current physical binding of process \p pid.
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the list of PUs which the process
|
||||||
|
* was last bound to.
|
||||||
*
|
*
|
||||||
* \note \p hwloc_pid_t is \p pid_t on Unix platforms,
|
* \note \p hwloc_pid_t is \p pid_t on Unix platforms,
|
||||||
* and \p HANDLE on native Windows platforms.
|
* and \p HANDLE on native Windows platforms.
|
||||||
|
@ -1256,6 +1262,10 @@ HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thr
|
||||||
|
|
||||||
#ifdef hwloc_thread_t
|
#ifdef hwloc_thread_t
|
||||||
/** \brief Get the current physical binding of thread \p tid.
|
/** \brief Get the current physical binding of thread \p tid.
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the list of PUs which the thread
|
||||||
|
* was last bound to.
|
||||||
*
|
*
|
||||||
* \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
|
* \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
|
||||||
* and \p HANDLE on native Windows platforms.
|
* and \p HANDLE on native Windows platforms.
|
||||||
|
@ -1266,6 +1276,10 @@ HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** \brief Get the last physical CPU where the current process or thread ran.
|
/** \brief Get the last physical CPU where the current process or thread ran.
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the list of PUs which the process or
|
||||||
|
* thread (according to \e flags) last ran on.
|
||||||
*
|
*
|
||||||
* The operating system may move some tasks from one processor
|
* The operating system may move some tasks from one processor
|
||||||
* to another at any time according to their binding,
|
* to another at any time according to their binding,
|
||||||
|
@ -1281,6 +1295,10 @@ HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thr
|
||||||
HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
|
HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
|
||||||
|
|
||||||
/** \brief Get the last physical CPU where a process ran.
|
/** \brief Get the last physical CPU where a process ran.
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the list of PUs which the process
|
||||||
|
* last ran on.
|
||||||
*
|
*
|
||||||
* The operating system may move some tasks from one processor
|
* The operating system may move some tasks from one processor
|
||||||
* to another at any time according to their binding,
|
* to another at any time according to their binding,
|
||||||
|
@ -1511,6 +1529,9 @@ HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitm
|
||||||
/** \brief Query the default memory binding policy and physical locality of the
|
/** \brief Query the default memory binding policy and physical locality of the
|
||||||
* current process or thread.
|
* current process or thread.
|
||||||
*
|
*
|
||||||
|
* The bitmap \p set (previously allocated by the caller)
|
||||||
|
* is filled with the process or thread memory binding.
|
||||||
|
*
|
||||||
* This function has two output parameters: \p set and \p policy.
|
* This function has two output parameters: \p set and \p policy.
|
||||||
* The values returned in these parameters depend on both the \p flags
|
* The values returned in these parameters depend on both the \p flags
|
||||||
* passed in and the current memory binding policies and nodesets in
|
* passed in and the current memory binding policies and nodesets in
|
||||||
|
@ -1571,6 +1592,9 @@ HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t
|
||||||
/** \brief Query the default memory binding policy and physical locality of the
|
/** \brief Query the default memory binding policy and physical locality of the
|
||||||
* specified process.
|
* specified process.
|
||||||
*
|
*
|
||||||
|
* The bitmap \p set (previously allocated by the caller)
|
||||||
|
* is filled with the process memory binding.
|
||||||
|
*
|
||||||
* This function has two output parameters: \p set and \p policy.
|
* This function has two output parameters: \p set and \p policy.
|
||||||
* The values returned in these parameters depend on both the \p flags
|
* The values returned in these parameters depend on both the \p flags
|
||||||
* passed in and the current memory binding policies and nodesets in
|
* passed in and the current memory binding policies and nodesets in
|
||||||
|
@ -1624,6 +1648,9 @@ HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void
|
||||||
/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of
|
/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of
|
||||||
* the memory identified by (\p addr, \p len ).
|
* the memory identified by (\p addr, \p len ).
|
||||||
*
|
*
|
||||||
|
* The bitmap \p set (previously allocated by the caller)
|
||||||
|
* is filled with the memory area binding.
|
||||||
|
*
|
||||||
* This function has two output parameters: \p set and \p policy.
|
* This function has two output parameters: \p set and \p policy.
|
||||||
* The values returned in these parameters depend on both the \p flags
|
* The values returned in these parameters depend on both the \p flags
|
||||||
* passed in and the memory binding policies and nodesets of the pages
|
* passed in and the memory binding policies and nodesets of the pages
|
||||||
|
@ -1652,7 +1679,8 @@ HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void
|
||||||
|
|
||||||
/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated.
|
/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated.
|
||||||
*
|
*
|
||||||
* Fills \p set according to the NUMA nodes where the memory area pages
|
* The bitmap \p set (previously allocated by the caller)
|
||||||
|
* is filled according to the NUMA nodes where the memory area pages
|
||||||
* are physically allocated. If no page is actually allocated yet,
|
* are physically allocated. If no page is actually allocated yet,
|
||||||
* \p set may be empty.
|
* \p set may be empty.
|
||||||
*
|
*
|
||||||
|
@ -1698,9 +1726,12 @@ HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len,
|
||||||
|
|
||||||
/** \brief Allocate some memory on NUMA memory nodes specified by \p set
|
/** \brief Allocate some memory on NUMA memory nodes specified by \p set
|
||||||
*
|
*
|
||||||
* This is similar to hwloc_alloc_membind_nodeset() except that it is allowed to change
|
* First, try to allocate properly with hwloc_alloc_membind().
|
||||||
* the current memory binding policy, thus providing more binding support, at
|
* On failure, the current process or thread memory binding policy
|
||||||
* the expense of changing the current state.
|
* is changed with hwloc_set_membind() before allocating memory.
|
||||||
|
* Thus this function works in more cases, at the expense of changing
|
||||||
|
* the current state (possibly affecting future allocations that
|
||||||
|
* would not specify any policy).
|
||||||
*
|
*
|
||||||
* If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
|
* If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
|
||||||
* Otherwise it's a cpuset.
|
* Otherwise it's a cpuset.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2012 Université Bordeaux
|
* Copyright © 2009-2012 Université Bordeaux
|
||||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
|
@ -11,9 +11,9 @@
|
||||||
#ifndef HWLOC_CONFIG_H
|
#ifndef HWLOC_CONFIG_H
|
||||||
#define HWLOC_CONFIG_H
|
#define HWLOC_CONFIG_H
|
||||||
|
|
||||||
#define HWLOC_VERSION "2.5.0"
|
#define HWLOC_VERSION "2.7.0"
|
||||||
#define HWLOC_VERSION_MAJOR 2
|
#define HWLOC_VERSION_MAJOR 2
|
||||||
#define HWLOC_VERSION_MINOR 5
|
#define HWLOC_VERSION_MINOR 7
|
||||||
#define HWLOC_VERSION_RELEASE 0
|
#define HWLOC_VERSION_RELEASE 0
|
||||||
#define HWLOC_VERSION_GREEK ""
|
#define HWLOC_VERSION_GREEK ""
|
||||||
|
|
||||||
|
|
25
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
25
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2020 Inria. All rights reserved.
|
* Copyright © 2020-2021 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -42,18 +42,23 @@ extern "C" {
|
||||||
* (for instance the "CoreType" and "FrequencyMaxMHz",
|
* (for instance the "CoreType" and "FrequencyMaxMHz",
|
||||||
* see \ref topoattrs_cpukinds).
|
* see \ref topoattrs_cpukinds).
|
||||||
*
|
*
|
||||||
* A higher efficiency value means intrinsic greater performance
|
* A higher efficiency value means greater intrinsic performance
|
||||||
* (and possibly less performance/power efficiency).
|
* (and possibly less performance/power efficiency).
|
||||||
* Kinds with lower efficiency are ranked first:
|
* Kinds with lower efficiency values are ranked first:
|
||||||
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will
|
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will
|
||||||
* return information about the less efficient CPU kind.
|
* return information about the CPU kind with lower performance
|
||||||
|
* but higher energy-efficiency.
|
||||||
|
* Higher \p kind_index values would rather return information
|
||||||
|
* about power-hungry high-performance cores.
|
||||||
*
|
*
|
||||||
* When available, efficiency values are gathered from the operating
|
* When available, efficiency values are gathered from the operating system.
|
||||||
* system (when \p cpukind_efficiency is set in the
|
* If so, \p cpukind_efficiency is set in the struct hwloc_topology_discovery_support array.
|
||||||
* struct hwloc_topology_discovery_support array, only on Windows 10 for now).
|
* This is currently available on Windows 10, Mac OS X (Darwin),
|
||||||
* Otherwise hwloc tries to compute efficiencies
|
* and on some Linux platforms where core "capacity" is exposed in sysfs.
|
||||||
* by comparing CPU kinds using frequencies (on ARM),
|
*
|
||||||
* or core types and frequencies (on other architectures).
|
* If the operating system does not expose core efficiencies natively,
|
||||||
|
* hwloc tries to compute efficiencies by comparing CPU kinds using
|
||||||
|
* frequencies (on ARM), or core types and frequencies (on other architectures).
|
||||||
* The environment variable HWLOC_CPUKINDS_RANKING may be used
|
* The environment variable HWLOC_CPUKINDS_RANKING may be used
|
||||||
* to change this heuristics, see \ref envvar.
|
* to change this heuristics, see \ref envvar.
|
||||||
*
|
*
|
||||||
|
|
5
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
5
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
|
@ -35,7 +35,8 @@ extern "C" {
|
||||||
* from a core in another node.
|
* from a core in another node.
|
||||||
* The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
|
* The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
|
||||||
* The name of this distances structure is "NUMALatency".
|
* The name of this distances structure is "NUMALatency".
|
||||||
* Others distance structures include and "XGMIBandwidth" and "NVLinkBandwidth".
|
* Others distance structures include and "XGMIBandwidth", "XGMIHops"
|
||||||
|
* and "NVLinkBandwidth".
|
||||||
*
|
*
|
||||||
* The matrix may also contain bandwidths between random sets of objects,
|
* The matrix may also contain bandwidths between random sets of objects,
|
||||||
* possibly provided by the user, as specified in the \p kind attribute.
|
* possibly provided by the user, as specified in the \p kind attribute.
|
||||||
|
@ -159,7 +160,7 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
||||||
* Usually only one distances structure may match a given name.
|
* Usually only one distances structure may match a given name.
|
||||||
*
|
*
|
||||||
* The name of the most common structure is "NUMALatency".
|
* The name of the most common structure is "NUMALatency".
|
||||||
* Others include "XGMIBandwidth" and "NVLinkBandwidth".
|
* Others include "XGMIBandwidth", "XGMIHops" and "NVLinkBandwidth".
|
||||||
*/
|
*/
|
||||||
HWLOC_DECLSPEC int
|
HWLOC_DECLSPEC int
|
||||||
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||||
|
|
9
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
9
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2009 CNRS
|
* Copyright © 2009 CNRS
|
||||||
* Copyright © 2009-2016 Inria. All rights reserved.
|
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||||
* Copyright © 2009-2011 Université Bordeaux
|
* Copyright © 2009-2011 Université Bordeaux
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
@ -44,6 +44,10 @@ extern "C" {
|
||||||
HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
|
HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
|
||||||
|
|
||||||
/** \brief Get the current binding of thread \p tid
|
/** \brief Get the current binding of thread \p tid
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the list of PUs which the thread
|
||||||
|
* was last bound to.
|
||||||
*
|
*
|
||||||
* The behavior is exactly the same as the Linux sched_getaffinity system call,
|
* The behavior is exactly the same as the Linux sched_getaffinity system call,
|
||||||
* but uses a hwloc cpuset.
|
* but uses a hwloc cpuset.
|
||||||
|
@ -54,6 +58,9 @@ HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t
|
||||||
HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
|
HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
|
||||||
|
|
||||||
/** \brief Get the last physical CPU where thread \p tid ran.
|
/** \brief Get the last physical CPU where thread \p tid ran.
|
||||||
|
*
|
||||||
|
* The CPU-set \p set (previously allocated by the caller)
|
||||||
|
* is filled with the PU which the thread last ran on.
|
||||||
*
|
*
|
||||||
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
|
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
|
||||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||||
|
|
1
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
1
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
|
@ -497,6 +497,7 @@ hwloc_filter_check_pcidev_subtype_important(unsigned classid)
|
||||||
return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */
|
return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */
|
||||||
|| baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */
|
|| baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */
|
||||||
|| baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */
|
|| baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */
|
||||||
|
|| baseclass == 0x00 /* Unclassified, for Atos/Bull BXI */
|
||||||
|| baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */
|
|| baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */
|
||||||
|| classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */
|
|| classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */
|
||||||
|| classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */
|
|| classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
|
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
|
||||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||||
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
|
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
|
||||||
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
|
@ -290,10 +290,6 @@
|
||||||
/* Define to '1' if sysctlbyname is present and usable */
|
/* Define to '1' if sysctlbyname is present and usable */
|
||||||
/* #undef HAVE_SYSCTLBYNAME */
|
/* #undef HAVE_SYSCTLBYNAME */
|
||||||
|
|
||||||
/* Define to 1 if the system has the type
|
|
||||||
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */
|
|
||||||
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION 1
|
|
||||||
|
|
||||||
/* Define to 1 if the system has the type
|
/* Define to 1 if the system has the type
|
||||||
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
|
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
|
||||||
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1
|
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1
|
||||||
|
|
2
src/3rdparty/hwloc/include/private/misc.h
vendored
2
src/3rdparty/hwloc/include/private/misc.h
vendored
|
@ -504,7 +504,7 @@ hwloc__obj_type_is_icache(hwloc_obj_type_t type)
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
#else /* HAVE_USELOCALE */
|
#else /* HAVE_USELOCALE */
|
||||||
#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
|
#if HWLOC_HAVE_ATTRIBUTE_UNUSED
|
||||||
#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
|
#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
|
||||||
#define hwloc_localeswitch_init()
|
#define hwloc_localeswitch_init()
|
||||||
#else
|
#else
|
||||||
|
|
1
src/3rdparty/hwloc/include/private/private.h
vendored
1
src/3rdparty/hwloc/include/private/private.h
vendored
|
@ -480,6 +480,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
|
||||||
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
|
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
|
||||||
#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */
|
#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */
|
||||||
#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */
|
#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */
|
||||||
|
#define HWLOC_GROUP_KIND_LINUX_CLUSTER 222 /* no subkind */
|
||||||
/* distance groups */
|
/* distance groups */
|
||||||
#define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */
|
#define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */
|
||||||
/* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */
|
/* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */
|
||||||
|
|
21
src/3rdparty/hwloc/include/private/windows.h
vendored
Normal file
21
src/3rdparty/hwloc/include/private/windows.h
vendored
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2009 Université Bordeaux
|
||||||
|
* Copyright © 2020 Inria. All rights reserved.
|
||||||
|
*
|
||||||
|
* See COPYING in top-level directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HWLOC_PRIVATE_WINDOWS_H
|
||||||
|
#define HWLOC_PRIVATE_WINDOWS_H
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define _ANONYMOUS_UNION __extension__
|
||||||
|
#define _ANONYMOUS_STRUCT __extension__
|
||||||
|
#else
|
||||||
|
#define _ANONYMOUS_UNION
|
||||||
|
#define _ANONYMOUS_STRUCT
|
||||||
|
#endif /* __GNUC__ */
|
||||||
|
#define DUMMYUNIONNAME
|
||||||
|
#define DUMMYSTRUCTNAME
|
||||||
|
|
||||||
|
#endif /* HWLOC_PRIVATE_WINDOWS_H */
|
7
src/3rdparty/hwloc/src/cpukinds.c
vendored
7
src/3rdparty/hwloc/src/cpukinds.c
vendored
|
@ -42,6 +42,9 @@ hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old)
|
||||||
struct hwloc_internal_cpukind_s *kinds;
|
struct hwloc_internal_cpukind_s *kinds;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
if (!old->nr_cpukinds)
|
||||||
|
return 0;
|
||||||
|
|
||||||
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
|
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
|
||||||
if (!kinds)
|
if (!kinds)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -445,7 +448,9 @@ static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b
|
||||||
{
|
{
|
||||||
const struct hwloc_internal_cpukind_s *a = _a;
|
const struct hwloc_internal_cpukind_s *a = _a;
|
||||||
const struct hwloc_internal_cpukind_s *b = _b;
|
const struct hwloc_internal_cpukind_s *b = _b;
|
||||||
return a->ranking_value - b->ranking_value;
|
uint64_t arv = a->ranking_value;
|
||||||
|
uint64_t brv = b->ranking_value;
|
||||||
|
return arv < brv ? -1 : arv > brv ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this function requires ranking values to be unique */
|
/* this function requires ranking values to be unique */
|
||||||
|
|
4
src/3rdparty/hwloc/src/memattrs.c
vendored
4
src/3rdparty/hwloc/src/memattrs.c
vendored
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2020 Inria. All rights reserved.
|
* Copyright © 2020-2021 Inria. All rights reserved.
|
||||||
* See COPYING in top-level directory.
|
* See COPYING in top-level directory.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -127,6 +127,8 @@ hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *o
|
||||||
struct hwloc_internal_memattr_s *imattrs;
|
struct hwloc_internal_memattr_s *imattrs;
|
||||||
hwloc_memattr_id_t id;
|
hwloc_memattr_id_t id;
|
||||||
|
|
||||||
|
/* old->nr_memattrs is always > 0 thanks to default memattrs */
|
||||||
|
|
||||||
imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs));
|
imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs));
|
||||||
if (!imattrs)
|
if (!imattrs)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
3
src/3rdparty/hwloc/src/pci-common.c
vendored
3
src/3rdparty/hwloc/src/pci-common.c
vendored
|
@ -810,13 +810,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config,
|
||||||
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
|
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
|
||||||
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
|
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
|
||||||
* PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
|
* PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
|
||||||
|
* PCIe Gen6 = 64 GT/s signal-rate per lane with 128/130 encoding = 8 GB/s data-rate per lane
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* lanespeed in Gbit/s */
|
/* lanespeed in Gbit/s */
|
||||||
if (speed <= 2)
|
if (speed <= 2)
|
||||||
lanespeed = 2.5f * speed * 0.8f;
|
lanespeed = 2.5f * speed * 0.8f;
|
||||||
else
|
else
|
||||||
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
|
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen7 will be 128 GT/s and so on */
|
||||||
|
|
||||||
/* linkspeed in GB/s */
|
/* linkspeed in GB/s */
|
||||||
*linkspeed = lanespeed * width / 8;
|
*linkspeed = lanespeed * width / 8;
|
||||||
|
|
242
src/3rdparty/hwloc/src/topology-windows.c
vendored
242
src/3rdparty/hwloc/src/topology-windows.c
vendored
|
@ -13,6 +13,7 @@
|
||||||
#include "hwloc.h"
|
#include "hwloc.h"
|
||||||
#include "hwloc/windows.h"
|
#include "hwloc/windows.h"
|
||||||
#include "private/private.h"
|
#include "private/private.h"
|
||||||
|
#include "private/windows.h" /* must be before windows.h */
|
||||||
#include "private/debug.h"
|
#include "private/debug.h"
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
@ -65,26 +66,6 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
|
||||||
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
|
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
|
||||||
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
||||||
|
|
||||||
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
|
|
||||||
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
|
|
||||||
ULONG_PTR ProcessorMask;
|
|
||||||
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
|
|
||||||
_ANONYMOUS_UNION
|
|
||||||
union {
|
|
||||||
struct {
|
|
||||||
BYTE flags;
|
|
||||||
} ProcessorCore;
|
|
||||||
struct {
|
|
||||||
DWORD NodeNumber;
|
|
||||||
} NumaNode;
|
|
||||||
CACHE_DESCRIPTOR Cache;
|
|
||||||
ULONGLONG Reserved[2];
|
|
||||||
} DUMMYUNIONNAME;
|
|
||||||
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Extended interface, for group support */
|
|
||||||
|
|
||||||
#ifndef HAVE_GROUP_AFFINITY
|
#ifndef HAVE_GROUP_AFFINITY
|
||||||
typedef struct _GROUP_AFFINITY {
|
typedef struct _GROUP_AFFINITY {
|
||||||
KAFFINITY Mask;
|
KAFFINITY Mask;
|
||||||
|
@ -93,35 +74,40 @@ typedef struct _GROUP_AFFINITY {
|
||||||
} GROUP_AFFINITY, *PGROUP_AFFINITY;
|
} GROUP_AFFINITY, *PGROUP_AFFINITY;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_PROCESSOR_RELATIONSHIP
|
/* always use our own structure because the EfficiencyClass field didn't exist before Win10 */
|
||||||
typedef struct HWLOC_PROCESSOR_RELATIONSHIP {
|
typedef struct HWLOC_PROCESSOR_RELATIONSHIP {
|
||||||
BYTE Flags;
|
BYTE Flags;
|
||||||
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */
|
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency */
|
||||||
BYTE Reserved[20];
|
BYTE Reserved[20];
|
||||||
WORD GroupCount;
|
WORD GroupCount;
|
||||||
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
|
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
|
||||||
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
|
} HWLOC_PROCESSOR_RELATIONSHIP;
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HAVE_NUMA_NODE_RELATIONSHIP
|
/* always use our own structure because the GroupCount and GroupMasks fields didn't exist in some Win10 */
|
||||||
typedef struct _NUMA_NODE_RELATIONSHIP {
|
typedef struct HWLOC_NUMA_NODE_RELATIONSHIP {
|
||||||
DWORD NodeNumber;
|
DWORD NodeNumber;
|
||||||
BYTE Reserved[20];
|
BYTE Reserved[18];
|
||||||
|
WORD GroupCount;
|
||||||
|
_ANONYMOUS_UNION
|
||||||
|
union {
|
||||||
GROUP_AFFINITY GroupMask;
|
GROUP_AFFINITY GroupMask;
|
||||||
} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
|
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
|
||||||
#endif
|
} DUMMYUNIONNAME;
|
||||||
|
} HWLOC_NUMA_NODE_RELATIONSHIP;
|
||||||
|
|
||||||
#ifndef HAVE_CACHE_RELATIONSHIP
|
typedef struct HWLOC_CACHE_RELATIONSHIP {
|
||||||
typedef struct _CACHE_RELATIONSHIP {
|
|
||||||
BYTE Level;
|
BYTE Level;
|
||||||
BYTE Associativity;
|
BYTE Associativity;
|
||||||
WORD LineSize;
|
WORD LineSize;
|
||||||
DWORD CacheSize;
|
DWORD CacheSize;
|
||||||
PROCESSOR_CACHE_TYPE Type;
|
PROCESSOR_CACHE_TYPE Type;
|
||||||
BYTE Reserved[20];
|
BYTE Reserved[18];
|
||||||
|
WORD GroupCount;
|
||||||
|
union {
|
||||||
GROUP_AFFINITY GroupMask;
|
GROUP_AFFINITY GroupMask;
|
||||||
} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
|
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
|
||||||
#endif
|
} DUMMYUNIONNAME;
|
||||||
|
} HWLOC_CACHE_RELATIONSHIP;
|
||||||
|
|
||||||
#ifndef HAVE_PROCESSOR_GROUP_INFO
|
#ifndef HAVE_PROCESSOR_GROUP_INFO
|
||||||
typedef struct _PROCESSOR_GROUP_INFO {
|
typedef struct _PROCESSOR_GROUP_INFO {
|
||||||
|
@ -141,20 +127,19 @@ typedef struct _GROUP_RELATIONSHIP {
|
||||||
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
|
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
/* always use our own structure because we need our own HWLOC_PROCESSOR/CACHE/NUMA_NODE_RELATIONSHIP */
|
||||||
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
|
typedef struct HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
|
||||||
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
|
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
|
||||||
DWORD Size;
|
DWORD Size;
|
||||||
_ANONYMOUS_UNION
|
_ANONYMOUS_UNION
|
||||||
union {
|
union {
|
||||||
PROCESSOR_RELATIONSHIP Processor;
|
HWLOC_PROCESSOR_RELATIONSHIP Processor;
|
||||||
NUMA_NODE_RELATIONSHIP NumaNode;
|
HWLOC_NUMA_NODE_RELATIONSHIP NumaNode;
|
||||||
CACHE_RELATIONSHIP Cache;
|
HWLOC_CACHE_RELATIONSHIP Cache;
|
||||||
GROUP_RELATIONSHIP Group;
|
GROUP_RELATIONSHIP Group;
|
||||||
/* Odd: no member to tell the cpu mask of the package... */
|
/* Odd: no member to tell the cpu mask of the package... */
|
||||||
} DUMMYUNIONNAME;
|
} DUMMYUNIONNAME;
|
||||||
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
|
} HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
|
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
|
||||||
typedef union _PSAPI_WORKING_SET_EX_BLOCK {
|
typedef union _PSAPI_WORKING_SET_EX_BLOCK {
|
||||||
|
@ -200,10 +185,7 @@ static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc;
|
||||||
typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
|
typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
|
||||||
static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
|
static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
|
||||||
|
|
||||||
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength);
|
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *Buffer, PDWORD ReturnLength);
|
||||||
static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc;
|
|
||||||
|
|
||||||
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
|
|
||||||
static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
|
static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
|
||||||
|
|
||||||
typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
|
typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
|
||||||
|
@ -244,8 +226,6 @@ static void hwloc_win_get_function_ptrs(void)
|
||||||
(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
|
(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
|
||||||
GetActiveProcessorCountProc =
|
GetActiveProcessorCountProc =
|
||||||
(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
|
(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
|
||||||
GetLogicalProcessorInformationProc =
|
|
||||||
(PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation");
|
|
||||||
GetCurrentProcessorNumberProc =
|
GetCurrentProcessorNumberProc =
|
||||||
(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
|
(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
|
||||||
GetCurrentProcessorNumberExProc =
|
GetCurrentProcessorNumberExProc =
|
||||||
|
@ -370,13 +350,13 @@ static hwloc_cpuset_t * processor_group_cpusets = NULL;
|
||||||
static void
|
static void
|
||||||
hwloc_win_get_processor_groups(void)
|
hwloc_win_get_processor_groups(void)
|
||||||
{
|
{
|
||||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo;
|
HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
|
||||||
DWORD length;
|
DWORD length;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
hwloc_debug("querying windows processor groups\n");
|
hwloc_debug("querying windows processor groups\n");
|
||||||
|
|
||||||
if (!GetActiveProcessorGroupCountProc || !GetLogicalProcessorInformationExProc)
|
if (!GetLogicalProcessorInformationExProc)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
nr_processor_groups = GetActiveProcessorGroupCountProc();
|
nr_processor_groups = GetActiveProcessorGroupCountProc();
|
||||||
|
@ -415,6 +395,8 @@ hwloc_win_get_processor_groups(void)
|
||||||
|
|
||||||
assert(procInfo->Relationship == RelationGroup);
|
assert(procInfo->Relationship == RelationGroup);
|
||||||
|
|
||||||
|
hwloc_debug("Found %u active windows processor groups\n",
|
||||||
|
(unsigned) procInfo->Group.ActiveGroupCount);
|
||||||
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
|
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
|
||||||
KAFFINITY mask;
|
KAFFINITY mask;
|
||||||
hwloc_bitmap_t set;
|
hwloc_bitmap_t set;
|
||||||
|
@ -424,8 +406,8 @@ hwloc_win_get_processor_groups(void)
|
||||||
goto error_with_cpusets;
|
goto error_with_cpusets;
|
||||||
|
|
||||||
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
|
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
|
||||||
hwloc_debug("group %u %d cpus mask %lx\n", id,
|
hwloc_debug("group %u with %u cpus mask 0x%llx\n", id,
|
||||||
procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask);
|
(unsigned) procInfo->Group.GroupInfo[id].ActiveProcessorCount, (unsigned long long) mask);
|
||||||
/* KAFFINITY is ULONG_PTR */
|
/* KAFFINITY is ULONG_PTR */
|
||||||
hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
|
hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
|
||||||
/* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
|
/* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
|
||||||
|
@ -1008,6 +990,8 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||||
unsigned hostname_size = sizeof(hostname);
|
unsigned hostname_size = sizeof(hostname);
|
||||||
int has_efficiencyclass = 0;
|
int has_efficiencyclass = 0;
|
||||||
struct hwloc_win_efficiency_classes eclasses;
|
struct hwloc_win_efficiency_classes eclasses;
|
||||||
|
char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS");
|
||||||
|
int keep_pgroup_objs = (env && atoi(env));
|
||||||
|
|
||||||
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
|
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
|
||||||
|
|
||||||
|
@ -1038,137 +1022,8 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||||
|
|
||||||
GetSystemInfo(&SystemInfo);
|
GetSystemInfo(&SystemInfo);
|
||||||
|
|
||||||
if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
|
|
||||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo;
|
|
||||||
unsigned id;
|
|
||||||
unsigned i;
|
|
||||||
struct hwloc_obj *obj;
|
|
||||||
hwloc_obj_type_t type;
|
|
||||||
|
|
||||||
length = 0;
|
|
||||||
procInfo = NULL;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
if (GetLogicalProcessorInformationProc(procInfo, &length))
|
|
||||||
break;
|
|
||||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
|
||||||
return -1;
|
|
||||||
tmpprocInfo = realloc(procInfo, length);
|
|
||||||
if (!tmpprocInfo) {
|
|
||||||
free(procInfo);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
procInfo = tmpprocInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(!length || procInfo);
|
|
||||||
|
|
||||||
for (i = 0; i < length / sizeof(*procInfo); i++) {
|
|
||||||
|
|
||||||
/* Ignore unknown caches */
|
|
||||||
if (procInfo->Relationship == RelationCache
|
|
||||||
&& procInfo->Cache.Type != CacheUnified
|
|
||||||
&& procInfo->Cache.Type != CacheData
|
|
||||||
&& procInfo->Cache.Type != CacheInstruction)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
id = HWLOC_UNKNOWN_INDEX;
|
|
||||||
switch (procInfo[i].Relationship) {
|
|
||||||
case RelationNumaNode:
|
|
||||||
type = HWLOC_OBJ_NUMANODE;
|
|
||||||
id = procInfo[i].NumaNode.NodeNumber;
|
|
||||||
gotnuma++;
|
|
||||||
if (id > max_numanode_index)
|
|
||||||
max_numanode_index = id;
|
|
||||||
break;
|
|
||||||
case RelationProcessorPackage:
|
|
||||||
type = HWLOC_OBJ_PACKAGE;
|
|
||||||
break;
|
|
||||||
case RelationCache:
|
|
||||||
type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1;
|
|
||||||
break;
|
|
||||||
case RelationProcessorCore:
|
|
||||||
type = HWLOC_OBJ_CORE;
|
|
||||||
break;
|
|
||||||
case RelationGroup:
|
|
||||||
default:
|
|
||||||
type = HWLOC_OBJ_GROUP;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hwloc_filter_check_keep_object_type(topology, type))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
obj = hwloc_alloc_setup_object(topology, type, id);
|
|
||||||
obj->cpuset = hwloc_bitmap_alloc();
|
|
||||||
hwloc_debug("%s#%u mask %llx\n", hwloc_obj_type_string(type), id, (unsigned long long) procInfo[i].ProcessorMask);
|
|
||||||
/* ProcessorMask is a ULONG_PTR */
|
|
||||||
hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask);
|
|
||||||
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case HWLOC_OBJ_NUMANODE:
|
|
||||||
{
|
|
||||||
ULONGLONG avail;
|
|
||||||
obj->nodeset = hwloc_bitmap_alloc();
|
|
||||||
hwloc_bitmap_set(obj->nodeset, id);
|
|
||||||
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|
|
||||||
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) {
|
|
||||||
obj->attr->numanode.local_memory = avail;
|
|
||||||
gotnumamemory++;
|
|
||||||
}
|
|
||||||
obj->attr->numanode.page_types_len = 2;
|
|
||||||
obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types));
|
|
||||||
memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types));
|
|
||||||
obj->attr->numanode.page_types_len = 1;
|
|
||||||
obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize;
|
|
||||||
#if HAVE_DECL__SC_LARGE_PAGESIZE
|
|
||||||
obj->attr->numanode.page_types_len++;
|
|
||||||
obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case HWLOC_OBJ_L1CACHE:
|
|
||||||
case HWLOC_OBJ_L2CACHE:
|
|
||||||
case HWLOC_OBJ_L3CACHE:
|
|
||||||
case HWLOC_OBJ_L4CACHE:
|
|
||||||
case HWLOC_OBJ_L5CACHE:
|
|
||||||
case HWLOC_OBJ_L1ICACHE:
|
|
||||||
case HWLOC_OBJ_L2ICACHE:
|
|
||||||
case HWLOC_OBJ_L3ICACHE:
|
|
||||||
obj->attr->cache.size = procInfo[i].Cache.Size;
|
|
||||||
obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ;
|
|
||||||
obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
|
|
||||||
obj->attr->cache.depth = procInfo[i].Cache.Level;
|
|
||||||
switch (procInfo->Cache.Type) {
|
|
||||||
case CacheUnified:
|
|
||||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
|
|
||||||
break;
|
|
||||||
case CacheData:
|
|
||||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
|
|
||||||
break;
|
|
||||||
case CacheInstruction:
|
|
||||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
hwloc_free_unlinked_object(obj);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case HWLOC_OBJ_GROUP:
|
|
||||||
obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation");
|
|
||||||
}
|
|
||||||
|
|
||||||
free(procInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (GetLogicalProcessorInformationExProc) {
|
if (GetLogicalProcessorInformationExProc) {
|
||||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo;
|
HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
|
||||||
unsigned id;
|
unsigned id;
|
||||||
struct hwloc_obj *obj;
|
struct hwloc_obj *obj;
|
||||||
hwloc_obj_type_t type;
|
hwloc_obj_type_t type;
|
||||||
|
@ -1207,8 +1062,16 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||||
switch (procInfo->Relationship) {
|
switch (procInfo->Relationship) {
|
||||||
case RelationNumaNode:
|
case RelationNumaNode:
|
||||||
type = HWLOC_OBJ_NUMANODE;
|
type = HWLOC_OBJ_NUMANODE;
|
||||||
|
/* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
|
||||||
|
* and we may read GroupMasks[]. Older releases have GroupCount==0 and we must read GroupMask.
|
||||||
|
*/
|
||||||
|
if (procInfo->NumaNode.GroupCount) {
|
||||||
|
num = procInfo->NumaNode.GroupCount;
|
||||||
|
GroupMask = procInfo->NumaNode.GroupMasks;
|
||||||
|
} else {
|
||||||
num = 1;
|
num = 1;
|
||||||
GroupMask = &procInfo->NumaNode.GroupMask;
|
GroupMask = &procInfo->NumaNode.GroupMask;
|
||||||
|
}
|
||||||
id = procInfo->NumaNode.NodeNumber;
|
id = procInfo->NumaNode.NodeNumber;
|
||||||
gotnuma++;
|
gotnuma++;
|
||||||
if (id > max_numanode_index)
|
if (id > max_numanode_index)
|
||||||
|
@ -1221,18 +1084,20 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||||
break;
|
break;
|
||||||
case RelationCache:
|
case RelationCache:
|
||||||
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
|
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
|
||||||
|
/* GroupCount added approximately with NumaNode.GroupCount above */
|
||||||
|
if (procInfo->Cache.GroupCount) {
|
||||||
|
num = procInfo->Cache.GroupCount;
|
||||||
|
GroupMask = procInfo->Cache.GroupMasks;
|
||||||
|
} else {
|
||||||
num = 1;
|
num = 1;
|
||||||
GroupMask = &procInfo->Cache.GroupMask;
|
GroupMask = &procInfo->Cache.GroupMask;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case RelationProcessorCore:
|
case RelationProcessorCore:
|
||||||
type = HWLOC_OBJ_CORE;
|
type = HWLOC_OBJ_CORE;
|
||||||
num = procInfo->Processor.GroupCount;
|
num = procInfo->Processor.GroupCount;
|
||||||
GroupMask = procInfo->Processor.GroupMask;
|
GroupMask = procInfo->Processor.GroupMask;
|
||||||
if (has_efficiencyclass)
|
efficiency_class = procInfo->Processor.EfficiencyClass;
|
||||||
/* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers,
|
|
||||||
* so just access it manually instead of trying to detect it.
|
|
||||||
*/
|
|
||||||
efficiency_class = * ((&procInfo->Processor.Flags) + 1);
|
|
||||||
break;
|
break;
|
||||||
case RelationGroup:
|
case RelationGroup:
|
||||||
/* So strange an interface... */
|
/* So strange an interface... */
|
||||||
|
@ -1257,11 +1122,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||||
groups_pu_set = hwloc_bitmap_alloc();
|
groups_pu_set = hwloc_bitmap_alloc();
|
||||||
hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
|
hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
|
||||||
|
|
||||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
/* Ignore processor groups unless requested and filtered-in */
|
||||||
|
if (keep_pgroup_objs && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||||
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
|
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
|
||||||
obj->cpuset = set;
|
obj->cpuset = set;
|
||||||
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
|
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
|
||||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup");
|
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx:ProcessorGroup");
|
||||||
} else
|
} else
|
||||||
hwloc_bitmap_free(set);
|
hwloc_bitmap_free(set);
|
||||||
}
|
}
|
||||||
|
|
35
src/3rdparty/hwloc/src/topology-x86.c
vendored
35
src/3rdparty/hwloc/src/topology-x86.c
vendored
|
@ -500,7 +500,8 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
||||||
nodes_per_proc = ((ecx >> 8) & 7) + 1;
|
nodes_per_proc = ((ecx >> 8) & 7) + 1;
|
||||||
}
|
}
|
||||||
if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
|
if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
|
||||||
|| ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
|
|| ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)
|
||||||
|
|| (infos->cpufamilynumber == 0x19 && nodes_per_proc > 1)) {
|
||||||
hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
|
hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -775,13 +776,19 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||||
|
|
||||||
} else if (cpuid_type == amd) {
|
} else if (cpuid_type == amd) {
|
||||||
/* AMD quirks */
|
/* AMD quirks */
|
||||||
if (infos->cpufamilynumber == 0x17
|
if (infos->cpufamilynumber >= 0x17 && cache->level == 3) {
|
||||||
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
/* AMD family 0x19 always shares L3 between 16 APIC ids (8 HT cores).
|
||||||
/* AMD family 0x17 always shares L3 between 8 APIC ids,
|
* while Family 0x17 shares between 8 APIC ids (4 HT cores).
|
||||||
* even when only 6 APIC ids are enabled and reported in nbthreads_sharing
|
* But many models have less APIC ids enabled and reported in nbthreads_sharing.
|
||||||
* (on 24-core CPUs).
|
* It means we must round-up nbthreads_sharing to the nearest power of 2
|
||||||
|
* before computing cacheid.
|
||||||
*/
|
*/
|
||||||
cache->cacheid = infos->apicid / 8;
|
unsigned nbapics_sharing = cache->nbthreads_sharing;
|
||||||
|
if (nbapics_sharing & (nbapics_sharing-1))
|
||||||
|
/* not a power of two, round-up */
|
||||||
|
nbapics_sharing = 1U<<(1+hwloc_ffsl(nbapics_sharing));
|
||||||
|
|
||||||
|
cache->cacheid = infos->apicid / nbapics_sharing;
|
||||||
|
|
||||||
} else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
|
} else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
|
||||||
&& cache->level == 3
|
&& cache->level == 3
|
||||||
|
@ -807,7 +814,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||||
} else if (infos->cpufamilynumber == 0x15
|
} else if (infos->cpufamilynumber == 0x15
|
||||||
&& (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
|
&& (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
|
||||||
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
||||||
/* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours below,
|
/* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
|
||||||
* but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
|
* but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
|
||||||
*/
|
*/
|
||||||
cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
|
cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
|
||||||
|
@ -1231,6 +1238,18 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX);
|
cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX);
|
||||||
|
/* We don't specify the os_index of caches because we want to be
|
||||||
|
* 100% sure they are identical to what the Linux kernel reports
|
||||||
|
* (so that things like resctrl work).
|
||||||
|
* However, vendor/model-specific quirks in the x86 code above
|
||||||
|
* make this difficult.
|
||||||
|
*
|
||||||
|
* Caveat: if the x86 backend is used on Linux to avoid kernel bugs,
|
||||||
|
* IDs won't be available to resctrl users. But resctrl heavily
|
||||||
|
* relies on the kernel x86 discovery being non-buggy anyway.
|
||||||
|
*
|
||||||
|
* TODO: make this optional? or only disable it on Linux?
|
||||||
|
*/
|
||||||
cache->attr->cache.depth = level;
|
cache->attr->cache.depth = level;
|
||||||
cache->attr->cache.size = infos[i].cache[l].size;
|
cache->attr->cache.size = infos[i].cache[l].size;
|
||||||
cache->attr->cache.linesize = infos[i].cache[l].linesize;
|
cache->attr->cache.linesize = infos[i].cache[l].linesize;
|
||||||
|
|
3
src/3rdparty/hwloc/src/topology-xml.c
vendored
3
src/3rdparty/hwloc/src/topology-xml.c
vendored
|
@ -243,7 +243,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||||
else if (!strcmp(name, "dont_merge")) {
|
else if (!strcmp(name, "dont_merge")) {
|
||||||
unsigned long lvalue = strtoul(value, NULL, 10);
|
unsigned long lvalue = strtoul(value, NULL, 10);
|
||||||
if (obj->type == HWLOC_OBJ_GROUP)
|
if (obj->type == HWLOC_OBJ_GROUP)
|
||||||
obj->attr->group.dont_merge = lvalue;
|
obj->attr->group.dont_merge = (unsigned char) lvalue;
|
||||||
else if (hwloc__xml_verbose())
|
else if (hwloc__xml_verbose())
|
||||||
fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n",
|
fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n",
|
||||||
state->global->msgprefix);
|
state->global->msgprefix);
|
||||||
|
@ -2825,6 +2825,7 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
|
||||||
/* child has sibling, we must add a Group around those memory children */
|
/* child has sibling, we must add a Group around those memory children */
|
||||||
hwloc_obj_t group = parentstate->global->v1_memory_group;
|
hwloc_obj_t group = parentstate->global->v1_memory_group;
|
||||||
parentstate->new_child(parentstate, &gstate, "object");
|
parentstate->new_child(parentstate, &gstate, "object");
|
||||||
|
group->parent = obj->parent;
|
||||||
group->cpuset = obj->cpuset;
|
group->cpuset = obj->cpuset;
|
||||||
group->complete_cpuset = obj->complete_cpuset;
|
group->complete_cpuset = obj->complete_cpuset;
|
||||||
group->nodeset = obj->nodeset;
|
group->nodeset = obj->nodeset;
|
||||||
|
|
64
src/3rdparty/hwloc/src/topology.c
vendored
64
src/3rdparty/hwloc/src/topology.c
vendored
|
@ -69,7 +69,7 @@
|
||||||
* it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable().
|
* it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable().
|
||||||
* Hopefully L0 will be provide a way to enable Sysman without env vars before it happens.
|
* Hopefully L0 will be provide a way to enable Sysman without env vars before it happens.
|
||||||
*/
|
*/
|
||||||
#ifdef HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR
|
#if HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR
|
||||||
static void hwloc_constructor(void) __attribute__((constructor));
|
static void hwloc_constructor(void) __attribute__((constructor));
|
||||||
static void hwloc_constructor(void)
|
static void hwloc_constructor(void)
|
||||||
{
|
{
|
||||||
|
@ -1901,6 +1901,9 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
|
||||||
static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
|
static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
|
||||||
static void propagate_total_memory(hwloc_obj_t obj);
|
static void propagate_total_memory(hwloc_obj_t obj);
|
||||||
static void hwloc_set_group_depth(hwloc_topology_t topology);
|
static void hwloc_set_group_depth(hwloc_topology_t topology);
|
||||||
|
static void hwloc_connect_children(hwloc_obj_t parent);
|
||||||
|
static int hwloc_connect_levels(hwloc_topology_t topology);
|
||||||
|
static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
||||||
|
|
||||||
hwloc_obj_t
|
hwloc_obj_t
|
||||||
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||||
|
@ -2474,13 +2477,26 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* return > 0 if any level was removed, which means reconnect is needed */
|
/* return > 0 if any level was removed.
|
||||||
static void
|
* performs its own reconnect internally if needed
|
||||||
|
*/
|
||||||
|
static int
|
||||||
hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
||||||
{
|
{
|
||||||
unsigned i, j;
|
unsigned i, j;
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
|
if (topology->modified) {
|
||||||
|
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
||||||
|
* and at the end of this function:
|
||||||
|
* - we need normal levels before merging.
|
||||||
|
* - and we'll need to update special levels after merging.
|
||||||
|
*/
|
||||||
|
hwloc_connect_children(topology->levels[0][0]);
|
||||||
|
if (hwloc_connect_levels(topology) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/* start from the bottom since we'll remove intermediate levels */
|
/* start from the bottom since we'll remove intermediate levels */
|
||||||
for(i=topology->nb_levels-1; i>0; i--) {
|
for(i=topology->nb_levels-1; i>0; i--) {
|
||||||
int replacechild = 0, replaceparent = 0;
|
int replacechild = 0, replaceparent = 0;
|
||||||
|
@ -2646,6 +2662,22 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
||||||
topology->type_depth[type] = HWLOC_TYPE_DEPTH_MULTIPLE;
|
topology->type_depth[type] = HWLOC_TYPE_DEPTH_MULTIPLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (res > 0 || topology-> modified) {
|
||||||
|
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
||||||
|
* and at the beginning of this function.
|
||||||
|
* If we merged some levels, some child+parent special children lisst
|
||||||
|
* may have been merged, hence specials level might need reordering,
|
||||||
|
* So reconnect special levels only here at the end
|
||||||
|
* (it's not needed at the beginning of this function).
|
||||||
|
*/
|
||||||
|
if (hwloc_connect_special_levels(topology) < 0)
|
||||||
|
return -1;
|
||||||
|
topology->modified = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -2963,9 +2995,9 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Build I/O levels */
|
/* Build Memory, I/O and Misc levels */
|
||||||
static int
|
static int
|
||||||
hwloc_connect_io_misc_levels(hwloc_topology_t topology)
|
hwloc_connect_special_levels(hwloc_topology_t topology)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
@ -3176,6 +3208,10 @@ hwloc_connect_levels(hwloc_topology_t topology)
|
||||||
int
|
int
|
||||||
hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||||
{
|
{
|
||||||
|
/* WARNING: when updating this function, the replicated code must
|
||||||
|
* also be updated inside hwloc_filter_levels_keep_structure()
|
||||||
|
*/
|
||||||
|
|
||||||
if (flags) {
|
if (flags) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -3188,7 +3224,7 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||||
if (hwloc_connect_levels(topology) < 0)
|
if (hwloc_connect_levels(topology) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (hwloc_connect_io_misc_levels(topology) < 0)
|
if (hwloc_connect_special_levels(topology) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
topology->modified = 0;
|
topology->modified = 0;
|
||||||
|
@ -3529,15 +3565,12 @@ hwloc_discover(struct hwloc_topology *topology,
|
||||||
}
|
}
|
||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
|
|
||||||
/* Reconnect things after all these changes.
|
|
||||||
* Often needed because of Groups inserted for I/Os.
|
|
||||||
* And required for KEEP_STRUCTURE below.
|
|
||||||
*/
|
|
||||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
|
hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
|
||||||
hwloc_filter_levels_keep_structure(topology);
|
if (hwloc_filter_levels_keep_structure(topology) < 0)
|
||||||
|
return -1;
|
||||||
|
/* takes care of reconnecting children/levels internally,
|
||||||
|
* because it needs normal levels.
|
||||||
|
* and it's often needed below because of Groups inserted for I/Os anyway */
|
||||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||||
|
|
||||||
/* accumulate children memory in total_memory fields (only once parent is set) */
|
/* accumulate children memory in total_memory fields (only once parent is set) */
|
||||||
|
@ -4360,14 +4393,13 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
||||||
hwloc_bitmap_free(droppedcpuset);
|
hwloc_bitmap_free(droppedcpuset);
|
||||||
hwloc_bitmap_free(droppednodeset);
|
hwloc_bitmap_free(droppednodeset);
|
||||||
|
|
||||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* some objects may have disappeared, we need to update distances objs arrays */
|
/* some objects may have disappeared, we need to update distances objs arrays */
|
||||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||||
hwloc_internal_memattrs_need_refresh(topology);
|
hwloc_internal_memattrs_need_refresh(topology);
|
||||||
|
|
||||||
hwloc_filter_levels_keep_structure(topology);
|
|
||||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||||
propagate_total_memory(topology->levels[0][0]);
|
propagate_total_memory(topology->levels[0][0]);
|
||||||
hwloc_internal_cpukinds_restrict(topology);
|
hwloc_internal_cpukinds_restrict(topology);
|
||||||
|
|
Loading…
Reference in a new issue