mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-22 03:29:32 +00:00
Update hwloc for MSVC builds.
This commit is contained in:
parent
850b43c079
commit
7a85257ad4
42 changed files with 2554 additions and 1583 deletions
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(xmrig)
|
||||
|
||||
option(WITH_HWLOC "Enable hwloc support" ON)
|
||||
|
|
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(argon2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
|
|
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project (hwloc C)
|
||||
|
||||
include_directories(include)
|
||||
|
|
107
src/3rdparty/hwloc/NEWS
vendored
107
src/3rdparty/hwloc/NEWS
vendored
|
@ -1,5 +1,5 @@
|
|||
Copyright © 2009 CNRS
|
||||
Copyright © 2009-2022 Inria. All rights reserved.
|
||||
Copyright © 2009-2023 Inria. All rights reserved.
|
||||
Copyright © 2009-2013 Université Bordeaux
|
||||
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
||||
|
@ -17,6 +17,103 @@ bug fixes (and other actions) for each version of hwloc since version
|
|||
0.9.
|
||||
|
||||
|
||||
Version 2.10.0
|
||||
--------------
|
||||
* Heterogeneous Memory core improvements
|
||||
+ Better heuristics to identify the subtype of memory such as HBM,
|
||||
DRAM, NVM, CXL-DRAM, etc.
|
||||
+ Build memory tiers, i.e. sets of NUMA nodes with the same subtype
|
||||
and similar performance.
|
||||
- NUMA node tier ranks are exposed in the new MemoryTier info
|
||||
attribute (starts from 0 for highest bandwidth tier)..
|
||||
+ See the new Heterogeneous Memory section in the documentation.
|
||||
* API
|
||||
+ Add hwloc_topology_free_group_object() to discard a Group created
|
||||
by hwloc_topology_alloc_group_object().
|
||||
* Linux backend
|
||||
+ Fix cpukinds on NVIDIA Grace to report identical cores even if they
|
||||
actually have very small frequency differences.
|
||||
Thanks to John C. Linford for the report.
|
||||
+ Add CXLDevice attributes to CXL DAX objects and NUMA nodes to show
|
||||
which PCI device implements which window.
|
||||
+ Ignore buggy memory-side caches and memory attributes when fake NUMA
|
||||
emulation is enabled on the Linux kernel command-line.
|
||||
+ Add more info attributes in MemoryModule Misc objects,
|
||||
thanks to Zubiao Xiong for the patch.
|
||||
+ Get CPUModel and CPUFamily info attributes on LoongArch platforms.
|
||||
* x86 backend
|
||||
+ Add support for new AMD CPUID leaf 0x80000026 for better detection
|
||||
of Core Complex and Die on Zen4 processors.
|
||||
+ Improve Zhaoxin CPU topology detection.
|
||||
* Tools
|
||||
+ Input locations and many command-line options (e.g. hwloc-calc -I -N -H,
|
||||
lstopo --only) now accept filters such as "NUMA[HBM]" so that only
|
||||
objects are that type and subtype are considered.
|
||||
- NUMA[tier=1] is also accepted for selecting NUMA nodes depending
|
||||
on their MemoryTier info attribute.
|
||||
+ Add --object-output to hwloc-calc to report the type as a prefix to
|
||||
object indexes, e.g. Core:2 instead of 2 in the output of -I.
|
||||
+ hwloc-info --ancestor and --descendants now accepts kinds of objects
|
||||
instead of single types.
|
||||
- The new --first option only shows the first matching object.
|
||||
+ Add --children-of-pid to hwloc-ps to show a hierarchy of processes.
|
||||
Thanks to Antoine Morvan for the suggestion.
|
||||
+ Add --misc-from to lstopo to add Misc objects described in a file.
|
||||
- To be combined with the new hwloc-ps --lstopo-misc for a customizable
|
||||
lstopo --top replacement.
|
||||
* Misc
|
||||
+ lstopo may now configure the layout of memory object placed above,
|
||||
for instance with --children-order memory:above:vert.
|
||||
+ Fix XML import from memory or stdin when using libxml2 2.12.
|
||||
+ Fix installation failures when configuring with --target,
|
||||
thanks to Clement Foyer for the patch.
|
||||
+ Fix support for 128bit pointer architectures.
|
||||
+ Remove Netloc.
|
||||
|
||||
|
||||
Version 2.9.3
|
||||
-------------
|
||||
* Handle Linux glibc allocation errors in binding routines (CVE-2022-47022).
|
||||
* Fix hwloc-calc when searching objects on heterogeneous memory platforms,
|
||||
thanks to Antoine Morvan for the report.
|
||||
* Fix hwloc_get_next_child() when there are some memory-side caches.
|
||||
* Don't crash if the topology is empty because Linux cgroups are wrong.
|
||||
* Improve some hwloc-bind warnings in case of command-line parsing errors.
|
||||
* Many documentation improvements all over the place, including:
|
||||
+ hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder
|
||||
children, causing the logical indexes of objects to change.
|
||||
|
||||
|
||||
Version 2.9.2
|
||||
-------------
|
||||
* Don't forget L3i when defining filters for multiple levels of caches
|
||||
with hwloc_topology_set_cache/icache_types_filter().
|
||||
* Fix object total_memory after hwloc_topology_insert_group_object().
|
||||
* Fix the (non-yet) exporting in synthetic description for complex memory
|
||||
hierarchies with memory-side caches, etc.
|
||||
* Fix some default size attributes when building synthetic topologies.
|
||||
* Fix size units in hwloc-annotate.
|
||||
* Improve bitmap reallocation error management in many functions.
|
||||
* Documentation improvements:
|
||||
+ Better document return values of functions.
|
||||
+ Add "Error reporting" section (in hwloc.h and in the doxygen doc).
|
||||
+ Add FAQ entry "What may I disable to make hwloc faster?"
|
||||
+ Improve FAQ entries "Why is lstopo slow?" and
|
||||
"I only need ..., why should I use hwloc?"
|
||||
+ Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind
|
||||
manpages.
|
||||
|
||||
|
||||
Version 2.9.1
|
||||
-------------
|
||||
* Don't forget to apply object type filters to "perflevel" caches detected
|
||||
on recent Mac OS X releases, thanks to Michel Lesoinne for the report.
|
||||
* Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes
|
||||
are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are.
|
||||
Thanks to Mark Grondona for reporting the issue.
|
||||
* Mark HPE Cray Slingshot NICs with subtype "Slingshot".
|
||||
|
||||
|
||||
Version 2.9.0
|
||||
-------------
|
||||
* Backends
|
||||
|
@ -61,6 +158,14 @@ Version 2.8.0
|
|||
file from the documentation.
|
||||
|
||||
|
||||
Version 2.7.2
|
||||
-------------
|
||||
* Fix a crash when LevelZero devices have multiple subdevices,
|
||||
e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton.
|
||||
* Fix a leak when importing cpukinds from XML,
|
||||
thanks to Hui Zhou.
|
||||
|
||||
|
||||
Version 2.7.1
|
||||
-------------
|
||||
* Workaround crashes when virtual machines report incoherent x86 CPUID
|
||||
|
|
496
src/3rdparty/hwloc/README
vendored
496
src/3rdparty/hwloc/README
vendored
|
@ -1,4 +1,8 @@
|
|||
Introduction
|
||||
This is a truncated and poorly-formatted version of the documentation main page.
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more.
|
||||
|
||||
|
||||
hwloc Overview
|
||||
|
||||
The Hardware Locality (hwloc) software project aims at easing the process of
|
||||
discovering hardware resources in parallel architectures. It offers
|
||||
|
@ -8,66 +12,456 @@ high-performance computing (HPC) applications, but is also applicable to any
|
|||
project seeking to exploit code and/or data locality on modern computing
|
||||
platforms.
|
||||
|
||||
hwloc is actually made of two subprojects distributed together:
|
||||
hwloc provides command line tools and a C API to obtain the hierarchical map of
|
||||
key computing elements within a node, such as: NUMA memory nodes, shared
|
||||
caches, processor packages, dies and cores, processing units (logical
|
||||
processors or "threads") and even I/O devices. hwloc also gathers various
|
||||
attributes such as cache and memory information, and is portable across a
|
||||
variety of different operating systems and platforms.
|
||||
|
||||
* The original hwloc project for describing the internals of computing nodes.
|
||||
It is described in details starting at section Hardware Locality (hwloc)
|
||||
Introduction.
|
||||
* The network-oriented companion called netloc (Network Locality), described
|
||||
in details starting with section Network Locality (netloc).
|
||||
hwloc primarily aims at helping high-performance computing (HPC) applications,
|
||||
but is also applicable to any project seeking to exploit code and/or data
|
||||
locality on modern computing platforms.
|
||||
|
||||
See also the Related pages tab above for links to other sections.
|
||||
hwloc supports the following operating systems:
|
||||
|
||||
Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc
|
||||
APIs are documented after these sections.
|
||||
* Linux (with knowledge of cgroups and cpusets, memory targets/initiators,
|
||||
etc.) on all supported hardware, including Intel Xeon Phi, ScaleMP vSMP,
|
||||
and NumaScale NumaConnect.
|
||||
* Solaris (with support for processor sets and logical domains)
|
||||
* AIX
|
||||
* Darwin / OS X
|
||||
* FreeBSD and its variants (such as kFreeBSD/GNU)
|
||||
* NetBSD
|
||||
* HP-UX
|
||||
* Microsoft Windows
|
||||
* IBM BlueGene/Q Compute Node Kernel (CNK)
|
||||
|
||||
Installation
|
||||
Since it uses standard Operating System information, hwloc's support is mostly
|
||||
independant from the processor type (x86, powerpc, ...) and just relies on the
|
||||
Operating System support. The main exception is BSD operating systems (NetBSD,
|
||||
FreeBSD, etc.) because they do not provide support topology information, hence
|
||||
hwloc uses an x86-only CPUID-based backend (which can be used for other OSes
|
||||
too, see the Components and plugins section).
|
||||
|
||||
hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD
|
||||
license. It is hosted as a sub-project of the overall Open MPI project (https:/
|
||||
/www.open-mpi.org/). Note that hwloc does not require any functionality from
|
||||
Open MPI -- it is a wholly separate (and much smaller!) project and code base.
|
||||
It just happens to be hosted as part of the overall Open MPI project.
|
||||
To check whether hwloc works on a particular machine, just try to build it and
|
||||
run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus
|
||||
or missing cache information), see Questions and Bugs.
|
||||
|
||||
Basic Installation
|
||||
hwloc only reports the number of processors on unsupported operating systems;
|
||||
no topology information is available.
|
||||
|
||||
Installation is the fairly common GNU-based process:
|
||||
For development and debugging purposes, hwloc also offers the ability to work
|
||||
on "fake" topologies:
|
||||
|
||||
shell$ ./configure --prefix=...
|
||||
shell$ make
|
||||
shell$ make install
|
||||
* Symmetrical tree of resources generated from a list of level arities, see
|
||||
Synthetic topologies.
|
||||
* Remote machine simulation through the gathering of topology as XML files,
|
||||
see Importing and exporting topologies from/to XML files.
|
||||
|
||||
hwloc- and netloc-specific configure options and requirements are documented in
|
||||
sections hwloc Installation and Netloc Installation respectively.
|
||||
hwloc can display the topology in a human-readable format, either in graphical
|
||||
mode (X11), or by exporting in one of several different formats, including:
|
||||
plain text, LaTeX tikzpicture, PDF, PNG, and FIG (see Command-line Examples
|
||||
below). Note that some of the export formats require additional support
|
||||
libraries.
|
||||
|
||||
Also note that if you install supplemental libraries in non-standard locations,
|
||||
hwloc's configure script may not be able to find them without some help. You
|
||||
may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on
|
||||
the configure command line.
|
||||
hwloc offers a programming interface for manipulating topologies and objects.
|
||||
It also brings a powerful CPU bitmap API that is used to describe topology
|
||||
objects location on physical/logical processors. See the Programming Interface
|
||||
below. It may also be used to binding applications onto certain cores or memory
|
||||
nodes. Several utility programs are also provided to ease command-line
|
||||
manipulation of topology objects, binding of processes, and so on.
|
||||
|
||||
For example, if libpciaccess was installed into /opt/pciaccess, hwloc's
|
||||
configure script may not find it be default. Try adding PKG_CONFIG_PATH to the
|
||||
./configure command line, like this:
|
||||
Bindings for several other languages are available from the project website.
|
||||
|
||||
./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ...
|
||||
Command-line Examples
|
||||
|
||||
Running the "lstopo" tool is a good way to check as a graphical output whether
|
||||
hwloc properly detected the architecture of your node. Netloc command-line
|
||||
tools can be used to display the network topology interconnecting your nodes.
|
||||
On a 4-package 2-core machine with hyper-threading, the lstopo tool may show
|
||||
the following graphical output:
|
||||
|
||||
Installing from a Git clone
|
||||
[dudley]
|
||||
|
||||
Additionally, the code can be directly cloned from Git:
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
shell$ git clone https://github.com/open-mpi/hwloc.git
|
||||
shell$ cd hwloc
|
||||
shell$ ./autogen.sh
|
||||
Machine
|
||||
NUMANode L#0 (P#0)
|
||||
Package L#0 + L3 L#0 (4096KB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0
|
||||
PU L#0 (P#0)
|
||||
PU L#1 (P#8)
|
||||
L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1
|
||||
PU L#2 (P#4)
|
||||
PU L#3 (P#12)
|
||||
Package L#1 + L3 L#1 (4096KB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2
|
||||
PU L#4 (P#1)
|
||||
PU L#5 (P#9)
|
||||
L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3
|
||||
PU L#6 (P#5)
|
||||
PU L#7 (P#13)
|
||||
Package L#2 + L3 L#2 (4096KB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4
|
||||
PU L#8 (P#2)
|
||||
PU L#9 (P#10)
|
||||
L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5
|
||||
PU L#10 (P#6)
|
||||
PU L#11 (P#14)
|
||||
Package L#3 + L3 L#3 (4096KB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6
|
||||
PU L#12 (P#3)
|
||||
PU L#13 (P#11)
|
||||
L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7
|
||||
PU L#14 (P#7)
|
||||
PU L#15 (P#15)
|
||||
|
||||
Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required
|
||||
when building from a Git clone.
|
||||
Note that there is also an equivalent output in XML that is meant for exporting
|
||||
/importing topologies but it is hardly readable to human-beings (see Importing
|
||||
and exporting topologies from/to XML files for details).
|
||||
|
||||
Nightly development snapshots are available on the web site, they can be
|
||||
configured and built without any need for Git or GNU Autotools.
|
||||
On a 4-package 2-core Opteron NUMA machine (with two core cores disallowed by
|
||||
the administrator), the lstopo tool may show the following graphical output
|
||||
(with --disallowed for displaying disallowed objects):
|
||||
|
||||
[hagrid]
|
||||
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
Machine (32GB total)
|
||||
Package L#0
|
||||
NUMANode L#0 (P#0 8190MB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0)
|
||||
L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1)
|
||||
Package L#1
|
||||
NUMANode L#1 (P#1 8192MB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2)
|
||||
L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3)
|
||||
Package L#2
|
||||
NUMANode L#2 (P#2 8192MB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4)
|
||||
L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5)
|
||||
Package L#3
|
||||
NUMANode L#3 (P#3 8192MB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6)
|
||||
L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each
|
||||
package):
|
||||
|
||||
[emmett]
|
||||
|
||||
Here's the same output in textual form:
|
||||
|
||||
Machine (total 16GB)
|
||||
NUMANode L#0 (P#0 16GB)
|
||||
Package L#0
|
||||
L2 L#0 (4096KB)
|
||||
L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0)
|
||||
L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4)
|
||||
L2 L#1 (4096KB)
|
||||
L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2)
|
||||
L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6)
|
||||
Package L#1
|
||||
L2 L#2 (4096KB)
|
||||
L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1)
|
||||
L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5)
|
||||
L2 L#3 (4096KB)
|
||||
L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3)
|
||||
L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
Programming Interface
|
||||
|
||||
The basic interface is available in hwloc.h. Some higher-level functions are
|
||||
available in hwloc/helper.h to reduce the need to manually manipulate objects
|
||||
and follow links between them. Documentation for all these is provided later in
|
||||
this document. Developers may also want to look at hwloc/inlines.h which
|
||||
contains the actual inline code of some hwloc.h routines, and at this document,
|
||||
which provides good higher-level topology traversal examples.
|
||||
|
||||
To precisely define the vocabulary used by hwloc, a Terms and Definitions
|
||||
section is available and should probably be read first.
|
||||
|
||||
Each hwloc object contains a cpuset describing the list of processing units
|
||||
that it contains. These bitmaps may be used for CPU binding and Memory binding.
|
||||
hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h.
|
||||
|
||||
Moreover, hwloc also comes with additional helpers for interoperability with
|
||||
several commonly used environments. See the Interoperability With Other
|
||||
Software section for details.
|
||||
|
||||
The complete API documentation is available in a full set of HTML pages, man
|
||||
pages, and self-contained PDF files (formatted for both both US letter and A4
|
||||
formats) in the source tarball in doc/doxygen-doc/.
|
||||
|
||||
NOTE: If you are building the documentation from a Git clone, you will need to
|
||||
have Doxygen and pdflatex installed -- the documentation will be built during
|
||||
the normal "make" process. The documentation is installed during "make install"
|
||||
to $prefix/share/doc/hwloc/ and your systems default man page tree (under
|
||||
$prefix, of course).
|
||||
|
||||
Portability
|
||||
|
||||
Operating System have varying support for CPU and memory binding, e.g. while
|
||||
some Operating Systems provide interfaces for all kinds of CPU and memory
|
||||
bindings, some others provide only interfaces for a limited number of kinds of
|
||||
CPU and memory binding, and some do not provide any binding interface at all.
|
||||
Hwloc's binding functions would then simply return the ENOSYS error (Function
|
||||
not implemented), meaning that the underlying Operating System does not provide
|
||||
any interface for them. CPU binding and Memory binding provide more information
|
||||
on which hwloc binding functions should be preferred because interfaces for
|
||||
them are usually available on the supported Operating Systems.
|
||||
|
||||
Similarly, the ability of reporting topology information varies from one
|
||||
platform to another. As shown in Command-line Examples, hwloc can obtain
|
||||
information on a wide variety of hardware topologies. However, some platforms
|
||||
and/or operating system versions will only report a subset of this information.
|
||||
For example, on an PPC64-based system with 8 cores (each with 2 hardware
|
||||
threads) running a default 2.6.18-based kernel from RHEL 5.4, hwloc is only
|
||||
able to glean information about NUMA nodes and processor units (PUs). No
|
||||
information about caches, packages, or cores is available.
|
||||
|
||||
Here's the graphical output from lstopo on this platform when Simultaneous
|
||||
Multi-Threading (SMT) is enabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
And here's the graphical output from lstopo on this platform when SMT is
|
||||
disabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
Notice that hwloc only sees half the PUs when SMT is disabled. PU L#6, for
|
||||
example, seems to change location from NUMA node #0 to #1. In reality, no PUs
|
||||
"moved" -- they were simply re-numbered when hwloc only saw half as many (see
|
||||
also Logical index in Indexes and Sets). Hence, PU L#6 in the SMT-disabled
|
||||
picture probably corresponds to PU L#12 in the SMT-enabled picture.
|
||||
|
||||
This same "PUs have disappeared" effect can be seen on other platforms -- even
|
||||
platforms / OSs that provide much more information than the above PPC64 system.
|
||||
This is an unfortunate side-effect of how operating systems report information
|
||||
to hwloc.
|
||||
|
||||
Note that upgrading the Linux kernel on the same PPC64 system mentioned above
|
||||
to 2.6.34, hwloc is able to discover all the topology information. The
|
||||
following picture shows the entire topology layout when SMT is enabled:
|
||||
|
||||
[ppc64-full]
|
||||
|
||||
Developers using the hwloc API or XML output for portable applications should
|
||||
therefore be extremely careful to not make any assumptions about the structure
|
||||
of data that is returned. For example, per the above reported PPC topology, it
|
||||
is not safe to assume that PUs will always be descendants of cores.
|
||||
|
||||
Additionally, future hardware may insert new topology elements that are not
|
||||
available in this version of hwloc. Long-lived applications that are meant to
|
||||
span multiple different hardware platforms should also be careful about making
|
||||
structure assumptions. For example, a new element may someday exist between a
|
||||
core and a PU.
|
||||
|
||||
API Example
|
||||
|
||||
The following small C example (available in the source tree as ``doc/examples/
|
||||
hwloc-hello.c'') prints the topology of the machine and performs some thread
|
||||
and memory binding. More examples are available in the doc/examples/ directory
|
||||
of the source tree.
|
||||
|
||||
/* Example hwloc API program.
|
||||
*
|
||||
* See other examples under doc/examples/ in the source tree
|
||||
* for more details.
|
||||
*
|
||||
* Copyright (c) 2009-2016 Inria. All rights reserved.
|
||||
* Copyright (c) 2009-2011 Universit?eacute; Bordeaux
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* hwloc-hello.c
|
||||
*/
|
||||
#include "hwloc.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj,
|
||||
int depth)
|
||||
{
|
||||
char type[32], attr[1024];
|
||||
unsigned i;
|
||||
hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
|
||||
printf("%*s%s", 2*depth, "", type);
|
||||
if (obj->os_index != (unsigned) -1)
|
||||
printf("#%u", obj->os_index);
|
||||
hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0);
|
||||
if (*attr)
|
||||
printf("(%s)", attr);
|
||||
printf("\n");
|
||||
for (i = 0; i < obj->arity; i++) {
|
||||
print_children(topology, obj->children[i], depth + 1);
|
||||
}
|
||||
}
|
||||
int main(void)
|
||||
{
|
||||
int depth;
|
||||
unsigned i, n;
|
||||
unsigned long size;
|
||||
int levels;
|
||||
char string[128];
|
||||
int topodepth;
|
||||
void *m;
|
||||
hwloc_topology_t topology;
|
||||
hwloc_cpuset_t cpuset;
|
||||
hwloc_obj_t obj;
|
||||
/* Allocate and initialize topology object. */
|
||||
hwloc_topology_init(&topology);
|
||||
/* ... Optionally, put detection configuration here to ignore
|
||||
some objects types, define a synthetic topology, etc....
|
||||
The default is to detect all the objects of the machine that
|
||||
the caller is allowed to access. See Configure Topology
|
||||
Detection. */
|
||||
/* Perform the topology detection. */
|
||||
hwloc_topology_load(topology);
|
||||
/* Optionally, get some additional topology information
|
||||
in case we need the topology depth later. */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
/*****************************************************************
|
||||
* First example:
|
||||
* Walk the topology with an array style, from level 0 (always
|
||||
* the system level) to the lowest level (always the proc level).
|
||||
*****************************************************************/
|
||||
for (depth = 0; depth < topodepth; depth++) {
|
||||
printf("*** Objects at level %d\n", depth);
|
||||
for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
i++) {
|
||||
hwloc_obj_type_snprintf(string, sizeof(string),
|
||||
hwloc_get_obj_by_depth(topology, depth, i), 0);
|
||||
printf("Index %u: %s\n", i, string);
|
||||
}
|
||||
}
|
||||
/*****************************************************************
|
||||
* Second example:
|
||||
* Walk the topology with a tree style.
|
||||
*****************************************************************/
|
||||
printf("*** Printing overall tree\n");
|
||||
print_children(topology, hwloc_get_root_obj(topology), 0);
|
||||
/*****************************************************************
|
||||
* Third example:
|
||||
* Print the number of packages.
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
|
||||
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
|
||||
printf("*** The number of packages is unknown\n");
|
||||
} else {
|
||||
printf("*** %u package(s)\n",
|
||||
hwloc_get_nbobjs_by_depth(topology, depth));
|
||||
}
|
||||
/*****************************************************************
|
||||
* Fourth example:
|
||||
* Compute the amount of cache that the first logical processor
|
||||
* has above it.
|
||||
*****************************************************************/
|
||||
levels = 0;
|
||||
size = 0;
|
||||
for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
|
||||
obj;
|
||||
obj = obj->parent)
|
||||
if (hwloc_obj_type_is_cache(obj->type)) {
|
||||
levels++;
|
||||
size += obj->attr->cache.size;
|
||||
}
|
||||
printf("*** Logical processor 0 has %d caches totaling %luKB\n",
|
||||
levels, size / 1024);
|
||||
/*****************************************************************
|
||||
* Fifth example:
|
||||
* Bind to only one thread of the last core of the machine.
|
||||
*
|
||||
* First find out where cores are, or else smaller sets of CPUs if
|
||||
* the OS doesn't have the notion of a "core".
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);
|
||||
/* Get last core. */
|
||||
obj = hwloc_get_obj_by_depth(topology, depth,
|
||||
hwloc_get_nbobjs_by_depth(topology, depth) - 1);
|
||||
if (obj) {
|
||||
/* Get a copy of its cpuset that we may modify. */
|
||||
cpuset = hwloc_bitmap_dup(obj->cpuset);
|
||||
/* Get only one logical processor (in case the core is
|
||||
SMT/hyper-threaded). */
|
||||
hwloc_bitmap_singlify(cpuset);
|
||||
/* And try to bind ourself there. */
|
||||
if (hwloc_set_cpubind(topology, cpuset, 0)) {
|
||||
char *str;
|
||||
int error = errno;
|
||||
hwloc_bitmap_asprintf(&str, obj->cpuset);
|
||||
printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
|
||||
free(str);
|
||||
}
|
||||
/* Free our cpuset copy */
|
||||
hwloc_bitmap_free(cpuset);
|
||||
}
|
||||
/*****************************************************************
|
||||
* Sixth example:
|
||||
* Allocate some memory on the last NUMA node, bind some existing
|
||||
* memory to the last NUMA node.
|
||||
*****************************************************************/
|
||||
/* Get last node. There's always at least one. */
|
||||
n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
|
||||
obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);
|
||||
size = 1024*1024;
|
||||
m = hwloc_alloc_membind(topology, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
hwloc_free(topology, m, size);
|
||||
m = malloc(size);
|
||||
hwloc_set_area_membind(topology, m, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
free(m);
|
||||
/* Destroy topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hwloc provides a pkg-config executable to obtain relevant compiler and linker
|
||||
flags. For example, it can be used thusly to compile applications that utilize
|
||||
the hwloc library (assuming GNU Make):
|
||||
|
||||
CFLAGS += $(shell pkg-config --cflags hwloc)
|
||||
LDLIBS += $(shell pkg-config --libs hwloc)
|
||||
|
||||
hwloc-hello: hwloc-hello.c
|
||||
$(CC) hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS)
|
||||
|
||||
On a machine 2 processor packages -- each package of which has two processing
|
||||
cores -- the output from running hwloc-hello could be something like the
|
||||
following:
|
||||
|
||||
shell$ ./hwloc-hello
|
||||
*** Objects at level 0
|
||||
Index 0: Machine
|
||||
*** Objects at level 1
|
||||
Index 0: Package#0
|
||||
Index 1: Package#1
|
||||
*** Objects at level 2
|
||||
Index 0: Core#0
|
||||
Index 1: Core#1
|
||||
Index 2: Core#3
|
||||
Index 3: Core#2
|
||||
*** Objects at level 3
|
||||
Index 0: PU#0
|
||||
Index 1: PU#1
|
||||
Index 2: PU#2
|
||||
Index 3: PU#3
|
||||
*** Printing overall tree
|
||||
Machine
|
||||
Package#0
|
||||
Core#0
|
||||
PU#0
|
||||
Core#1
|
||||
PU#1
|
||||
Package#1
|
||||
Core#3
|
||||
PU#2
|
||||
Core#2
|
||||
PU#3
|
||||
*** 2 package(s)
|
||||
*** Logical processor 0 has 0 caches totaling 0KB
|
||||
shell$
|
||||
|
||||
Questions and Bugs
|
||||
|
||||
|
@ -80,6 +474,20 @@ www.open-mpi.org/community/lists/hwloc.php).
|
|||
|
||||
There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat).
|
||||
|
||||
History / Credits
|
||||
|
||||
hwloc is the evolution and merger of the libtopology project and the Portable
|
||||
Linux Processor Affinity (PLPA) (https://www.open-mpi.org/projects/plpa/)
|
||||
project. Because of functional and ideological overlap, these two code bases
|
||||
and ideas were merged and released under the name "hwloc" as an Open MPI
|
||||
sub-project.
|
||||
|
||||
libtopology was initially developed by the Inria Runtime Team-Project. PLPA was
|
||||
initially developed by the Open MPI development team as a sub-project. Both are
|
||||
now deprecated in favor of hwloc, which is distributed as an Open MPI
|
||||
sub-project.
|
||||
|
||||
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation.
|
||||
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation,
|
||||
actual links to related pages, images, etc.
|
||||
|
|
7
src/3rdparty/hwloc/VERSION
vendored
7
src/3rdparty/hwloc/VERSION
vendored
|
@ -8,7 +8,7 @@
|
|||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||
|
||||
major=2
|
||||
minor=9
|
||||
minor=10
|
||||
release=0
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
|
@ -22,7 +22,7 @@ greek=
|
|||
|
||||
# The date when this release was created
|
||||
|
||||
date="Dec 14, 2022"
|
||||
date="Dec 04, 2023"
|
||||
|
||||
# If snapshot=1, then use the value from snapshot_version as the
|
||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||
|
@ -41,7 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
|||
# 2. Version numbers are described in the Libtool current:revision:age
|
||||
# format.
|
||||
|
||||
libhwloc_so_version=21:1:6
|
||||
libnetloc_so_version=0:0:0
|
||||
libhwloc_so_version=22:0:7
|
||||
|
||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -11,9 +11,9 @@
|
|||
#ifndef HWLOC_CONFIG_H
|
||||
#define HWLOC_CONFIG_H
|
||||
|
||||
#define HWLOC_VERSION "2.9.0"
|
||||
#define HWLOC_VERSION "2.10.0"
|
||||
#define HWLOC_VERSION_MAJOR 2
|
||||
#define HWLOC_VERSION_MINOR 9
|
||||
#define HWLOC_VERSION_MINOR 10
|
||||
#define HWLOC_VERSION_RELEASE 0
|
||||
#define HWLOC_VERSION_GREEK ""
|
||||
|
||||
|
|
51
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
51
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -50,9 +50,10 @@ extern "C" {
|
|||
* hwloc_bitmap_free(set);
|
||||
* \endcode
|
||||
*
|
||||
* \note Most functions below return an int that may be negative in case of
|
||||
* error. The usual error case would be an internal failure to realloc/extend
|
||||
* \note Most functions below return 0 on success and -1 on error.
|
||||
* The usual error case would be an internal failure to realloc/extend
|
||||
* the storage of the bitmap (\p errno would be set to \c ENOMEM).
|
||||
* See also \ref hwlocality_api_error_reporting.
|
||||
*
|
||||
* \note Several examples of using the bitmap API are available under the
|
||||
* doc/examples/ directory in the source tree.
|
||||
|
@ -83,7 +84,13 @@ typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
|
|||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Allocate a new full bitmap. */
|
||||
/** \brief Allocate a new full bitmap.
|
||||
*
|
||||
* \returns A valid bitmap or \c NULL.
|
||||
*
|
||||
* The bitmap should be freed by a corresponding call to
|
||||
* hwloc_bitmap_free().
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Free bitmap \p bitmap.
|
||||
|
@ -119,11 +126,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buf
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -144,11 +153,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated list string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a list string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -168,11 +179,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, si
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -279,6 +292,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned
|
|||
* When called on the output of hwloc_topology_get_topology_cpuset(),
|
||||
* the returned number is large enough for all cpusets of the topology.
|
||||
*
|
||||
* \return the number of unsigned longs required.
|
||||
* \return -1 if \p bitmap is infinite.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
@ -305,21 +319,23 @@ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attr
|
|||
|
||||
/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap.
|
||||
* \return the first index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first index is returned.
|
||||
*
|
||||
* \return the first index set in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index set in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is set in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty, or if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
|
@ -327,28 +343,29 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib
|
|||
* indexes that are in the bitmap).
|
||||
*
|
||||
* \return the number of indexes that are in the bitmap.
|
||||
*
|
||||
* \return -1 if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap.
|
||||
* \return the first unset index in \p bitmap.
|
||||
* \return -1 if \p bitmap is full.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first unset index is returned.
|
||||
*
|
||||
* \return the first index unset in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index unset in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is unset in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index unset in \p bitmap.
|
||||
* \return -1 if \p bitmap is full, or if \p bitmap is not infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
|
@ -428,6 +445,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bi
|
|||
/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects.
|
||||
*
|
||||
* \return 1 if bitmaps intersect, 0 otherwise.
|
||||
*
|
||||
* \note The empty bitmap does not intersect any other bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
|
||||
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
8
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2021 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -42,6 +42,9 @@ extern "C" {
|
|||
/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
|
||||
*
|
||||
* Device \p cudevice must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
@ -87,6 +90,9 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
8
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2021 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -43,6 +43,9 @@ extern "C" {
|
|||
/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
|
||||
*
|
||||
* Device index \p idx must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
@ -84,6 +87,9 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -222,6 +222,8 @@ enum hwloc_topology_diff_apply_flags_e {
|
|||
HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
|
||||
|
||||
/** \brief Destroy a list of topology differences.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
||||
|
||||
|
@ -233,6 +235,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
|
@ -246,10 +250,17 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topol
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
* This attribute is given back when reading the diff from XML.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
|
||||
|
||||
/** \brief Load a list of topology differences from a XML buffer.
|
||||
*
|
||||
* Build a list of differences from the XML memory buffer given
|
||||
* at \p xmlbuffer and of length \p buflen (including an ending \0).
|
||||
* This buffer may have been filled earlier with
|
||||
* hwloc_topology_diff_export_xmlbuffer().
|
||||
*
|
||||
* If not \c NULL, \p refname will be filled with the identifier
|
||||
* string of the reference topology for the difference file,
|
||||
|
@ -257,6 +268,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
|
@ -274,6 +287,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
|
|||
* The returned buffer ends with a \0 that is included in the returned
|
||||
* length.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);
|
||||
|
|
37
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
37
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2022 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -131,6 +131,8 @@ enum hwloc_distances_kind_e {
|
|||
*
|
||||
* Each distance matrix returned in the \p distances array should be released
|
||||
* by the caller using hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get(hwloc_topology_t topology,
|
||||
|
@ -140,6 +142,8 @@ hwloc_distances_get(hwloc_topology_t topology,
|
|||
/** \brief Retrieve distance matrices for object at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p depth filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
||||
|
@ -149,6 +153,8 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
|||
/** \brief Retrieve distance matrices for object of a specific type.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p type filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
||||
|
@ -162,6 +168,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
|||
* The name of the most common structure is "NUMALatency".
|
||||
* Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
|
||||
* and "NVLinkBandwidth".
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||
|
@ -171,7 +179,12 @@ hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
|||
/** \brief Get a description of what a distances structure contains.
|
||||
*
|
||||
* For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
|
||||
* or NULL if unknown.
|
||||
* or \c NULL if unknown.
|
||||
*
|
||||
* \return the constant string with the name of the distance structure.
|
||||
*
|
||||
* \note The returned name should not be freed by the caller,
|
||||
* it belongs to the hwloc library.
|
||||
*/
|
||||
HWLOC_DECLSPEC const char *
|
||||
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
@ -252,6 +265,8 @@ enum hwloc_distances_transform_e {
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success, -1 on error for instance if flags are invalid.
|
||||
*
|
||||
* \note Objects in distances array \p objs may be directly modified
|
||||
* in place without using hwloc_distances_transform().
|
||||
* One may use hwloc_get_obj_with_same_locality() to easily convert
|
||||
|
@ -272,6 +287,7 @@ HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct h
|
|||
|
||||
/** \brief Find the index of an object in a distances structure.
|
||||
*
|
||||
* \return the index of the object in the distances structure if any.
|
||||
* \return -1 if object \p obj is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
|
@ -289,6 +305,7 @@ hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
|
|||
* The distance from \p obj1 to \p obj2 is stored in the value pointed by
|
||||
* \p value1to2 and reciprocally.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
|
@ -374,8 +391,8 @@ hwloc_distances_add_create(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 on error.
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
|
@ -411,8 +428,8 @@ enum hwloc_distances_add_flag_e {
|
|||
*
|
||||
* On error, the temporary distances structure and its content are destroyed.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 on error.
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
|
@ -433,18 +450,24 @@ HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
|
|||
*
|
||||
* If these distances were used to group objects, these additional
|
||||
* Group objects are not removed from the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
|
||||
|
||||
/** \brief Remove distance matrices for objects at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
|
||||
|
||||
/** \brief Remove distance matrices for objects of a specific type in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
||||
|
@ -458,6 +481,8 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
|||
/** \brief Release and remove the given distance matrice from the topology.
|
||||
*
|
||||
* This function includes a call to hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
||||
|
|
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
|
@ -55,7 +55,7 @@ enum hwloc_topology_export_xml_flags_e {
|
|||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
|
@ -91,7 +91,7 @@ HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const ch
|
|||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
|
@ -145,13 +145,15 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t
|
|||
* that were given to the export callback.
|
||||
*
|
||||
* Only printable characters may be exported to XML string attributes.
|
||||
* If a non-printable character is passed in \p name or \p buffer,
|
||||
* the function returns -1 with errno set to EINVAL.
|
||||
*
|
||||
* If exporting binary data, the application should first encode into
|
||||
* printable characters only (or use hwloc_export_obj_userdata_base64()).
|
||||
* It should also take care of portability issues if the export may
|
||||
* be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name or \b buffer.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
|
@ -165,8 +167,14 @@ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t to
|
|||
* This function may only be called from within the export() callback passed
|
||||
* to hwloc_topology_set_userdata_export_callback().
|
||||
*
|
||||
* The name must be made of printable characters for export to XML string attributes.
|
||||
*
|
||||
* The function does not take care of portability issues if the export
|
||||
* may be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
5
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -102,7 +102,8 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
|
|||
* Retrieves the OpenGL display port (server) in \p port and device (screen)
|
||||
* in \p screen that correspond to the given hwloc OS device object.
|
||||
*
|
||||
* \return \c -1 if none could be found.
|
||||
* \return 0 on success.
|
||||
* \return -1 if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
|
|
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -52,6 +52,8 @@ extern "C" {
|
|||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
|
||||
|
@ -80,6 +82,9 @@ hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute
|
|||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
|
||||
|
@ -95,7 +100,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
|||
cpu = 0;
|
||||
while (count) {
|
||||
if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
count--;
|
||||
}
|
||||
cpu++;
|
||||
|
@ -107,7 +113,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
|||
assert(schedsetsize == sizeof(cpu_set_t));
|
||||
for(cpu=0; cpu<CPU_SETSIZE; cpu++)
|
||||
if (CPU_ISSET(cpu, schedset))
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
#endif /* !CPU_ZERO_S */
|
||||
return 0;
|
||||
}
|
||||
|
|
1039
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
1039
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
File diff suppressed because it is too large
Load diff
10
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
10
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2021 Inria. All rights reserved.
|
||||
* Copyright © 2021-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -44,8 +44,9 @@ extern "C" {
|
|||
* the Level Zero device \p device.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* The Level Zero must have been initialized with Sysman enabled
|
||||
* (ZES_ENABLE_SYSMAN=1 in the environment).
|
||||
* The Level Zero library must have been initialized with Sysman enabled
|
||||
* (by calling zesInit(0) if supported,
|
||||
* or by setting ZES_ENABLE_SYSMAN=1 in the environment).
|
||||
* I/O devices detection and the Level Zero component are not needed in the
|
||||
* topology.
|
||||
*
|
||||
|
@ -55,6 +56,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2017 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2012 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -50,6 +50,8 @@ extern "C" {
|
|||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
|
||||
|
@ -84,6 +86,8 @@ hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpus
|
|||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
|
||||
|
@ -119,6 +123,9 @@ hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nod
|
|||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if failing an internal reallocation.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
|
@ -130,7 +137,8 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
|||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -142,6 +150,9 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
|||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
|
@ -153,7 +164,8 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
|||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -184,7 +196,7 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
|||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
|
||||
|
@ -209,7 +221,7 @@ hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpu
|
|||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
|
||||
|
@ -231,6 +243,9 @@ hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_no
|
|||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
|
@ -241,7 +256,8 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
|||
hwloc_bitmap_zero(cpuset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -249,6 +265,9 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
|||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
|
@ -259,7 +278,8 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese
|
|||
hwloc_bitmap_zero(nodeset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
10
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
10
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -38,6 +38,8 @@ extern "C" {
|
|||
* The behavior is exactly the same as the Linux sched_setaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_set_proc_cpubind() with
|
||||
* HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -52,6 +54,8 @@ HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t
|
|||
* The behavior is exactly the same as the Linux sched_getaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_cpubind() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -62,6 +66,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t
|
|||
* The CPU-set \p set (previously allocated by the caller)
|
||||
* is filled with the PU which the thread last ran on.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -72,6 +78,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topolo
|
|||
* Might be used when reading CPU set from sysfs attributes such as topology
|
||||
* and caches for processors, or local_cpus for devices.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This function ignores the HWLOC_FSROOT environment variable.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set);
|
||||
|
|
44
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
44
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2019-2022 Inria. All rights reserved.
|
||||
* Copyright © 2019-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -54,6 +54,10 @@ extern "C" {
|
|||
* Attribute values for these nodes, if any, may then be obtained with
|
||||
* hwloc_memattr_get_value() and manually compared with the desired criteria.
|
||||
*
|
||||
* Memory attributes are also used internally to build Memory Tiers which provide
|
||||
* an easy way to distinguish NUMA nodes of different kinds, as explained
|
||||
* in \ref heteromem.
|
||||
*
|
||||
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
|
||||
*
|
||||
* \note The API also supports specific objects as initiator,
|
||||
|
@ -178,6 +182,9 @@ enum hwloc_memattr_id_e {
|
|||
typedef unsigned hwloc_memattr_id_t;
|
||||
|
||||
/** \brief Return the identifier of the memory attribute with the given name.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if no such attribute exists.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_by_name(hwloc_topology_t topology,
|
||||
|
@ -247,6 +254,8 @@ enum hwloc_local_numanode_flag_e {
|
|||
* or the number of nodes that would have been stored if there were
|
||||
* enough room.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note Some of these NUMA nodes may not have any memory attribute
|
||||
* values and hence not be reported as actual targets in other functions.
|
||||
*
|
||||
|
@ -276,6 +285,10 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance with errno set to \c EINVAL if flags
|
||||
* are invalid or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
|
@ -307,7 +320,10 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching targets.
|
||||
* \return -1 with errno set to \c EINVAL if flags are invalid,
|
||||
* or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
|
@ -323,10 +339,6 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
|||
hwloc_obj_t *best_target, hwloc_uint64_t *value);
|
||||
|
||||
/** \brief Return the best initiator for the given attribute and target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* \c -1 is returned and \p errno is set to \c EINVAL.
|
||||
*
|
||||
* If \p value is non \c NULL, the corresponding value is returned there.
|
||||
*
|
||||
|
@ -342,7 +354,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching initiators.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR).
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
||||
|
@ -359,6 +374,9 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
|||
*/
|
||||
|
||||
/** \brief Return the name of a memory attribute.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_name(hwloc_topology_t topology,
|
||||
|
@ -368,6 +386,9 @@ hwloc_memattr_get_name(hwloc_topology_t topology,
|
|||
/** \brief Return the flags of the given attribute.
|
||||
*
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
||||
|
@ -397,6 +418,9 @@ enum hwloc_memattr_flag_e {
|
|||
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e.
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least
|
||||
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EBUSY if another attribute already uses this name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_register(hwloc_topology_t topology,
|
||||
|
@ -421,6 +445,8 @@ hwloc_memattr_register(hwloc_topology_t topology,
|
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
|
@ -460,6 +486,8 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
|
|||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
* values.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when referring to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
|
@ -497,6 +525,8 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
|
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* no initiator is returned.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note This function is meant for tools and debugging (listing internal information)
|
||||
* rather than for application queries. Applications should rather select useful
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
5
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -51,6 +51,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
8
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright © 2013, 2018 Université Bordeaux. All right reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -69,6 +69,9 @@ typedef union {
|
|||
/** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
|
||||
*
|
||||
* Device \p device must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
||||
|
@ -126,6 +129,9 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
|||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
|
||||
* get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if the device could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -57,6 +57,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
2
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
2
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
|
@ -164,7 +164,7 @@ struct hwloc_disc_status {
|
|||
*/
|
||||
unsigned excluded_phases;
|
||||
|
||||
/** \brief OR'ed set of hwloc_disc_status_flag_e */
|
||||
/** \brief OR'ed set of ::hwloc_disc_status_flag_e */
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
|
|
1
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
1
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
|
@ -176,6 +176,7 @@ extern "C" {
|
|||
|
||||
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
|
||||
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
|
||||
#define hwloc_topology_free_group_object HWLOC_NAME(topology_free_group_object)
|
||||
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
|
||||
#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
|
||||
#define hwloc_topology_refresh HWLOC_NAME(topology_refresh)
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
5
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Written by Advanced Micro Devices,
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -55,6 +55,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2018 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -48,6 +48,8 @@ extern "C" {
|
|||
* This length (in bytes) must be used in hwloc_shmem_topology_write()
|
||||
* and hwloc_shmem_topology_adopt() later.
|
||||
*
|
||||
* \return the length, or -1 on error, for instance if flags are invalid.
|
||||
*
|
||||
* \note Flags \p flags are currently unused, must be 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
||||
|
@ -74,9 +76,10 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
|||
* is not. However the caller may also allocate it manually in shared memory
|
||||
* to share it as well.
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
|
@ -112,14 +115,16 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
*
|
||||
* \note This function takes care of calling hwloc_topology_abi_check().
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
*
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned, or do not match what was given to
|
||||
* hwloc_shmem_topology_write() earlier.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if the layout of the topology structure
|
||||
* \return -1 with errno set to \c EINVAL if the layout of the topology structure
|
||||
* is different between the writer process and the adopter process.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
|
|
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
|
@ -1,578 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2013-2014 University of Wisconsin-La Crosse.
|
||||
* All rights reserved.
|
||||
* Copyright © 2015-2017 Inria. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _NETLOC_PRIVATE_H_
|
||||
#define _NETLOC_PRIVATE_H_
|
||||
|
||||
#include <hwloc.h>
|
||||
#include <netloc.h>
|
||||
#include <netloc/uthash.h>
|
||||
#include <netloc/utarray.h>
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#define NETLOCFILE_VERSION 1
|
||||
|
||||
#ifdef NETLOC_SCOTCH
|
||||
#include <stdint.h>
|
||||
#include <scotch.h>
|
||||
#define NETLOC_int SCOTCH_Num
|
||||
#else
|
||||
#define NETLOC_int int
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "Import" a few things from hwloc
|
||||
*/
|
||||
#define __netloc_attribute_unused __hwloc_attribute_unused
|
||||
#define __netloc_attribute_malloc __hwloc_attribute_malloc
|
||||
#define __netloc_attribute_const __hwloc_attribute_const
|
||||
#define __netloc_attribute_pure __hwloc_attribute_pure
|
||||
#define __netloc_attribute_deprecated __hwloc_attribute_deprecated
|
||||
#define __netloc_attribute_may_alias __hwloc_attribute_may_alias
|
||||
#define NETLOC_DECLSPEC HWLOC_DECLSPEC
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Types
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Definitions for Comparators
|
||||
* \sa These are the return values from the following functions:
|
||||
* netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_CMP_SAME = 0, /**< Compared as the Same */
|
||||
NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */
|
||||
NETLOC_CMP_DIFF = -2 /**< Compared as Different */
|
||||
} netloc_compare_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported networks
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */
|
||||
NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */
|
||||
NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */
|
||||
} netloc_network_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported topologies
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */
|
||||
NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */
|
||||
} netloc_topology_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of nodes
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */
|
||||
NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */
|
||||
NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */
|
||||
} netloc_node_type_t;
|
||||
|
||||
typedef enum {
|
||||
NETLOC_ARCH_TREE = 0, /* Fat tree */
|
||||
} netloc_arch_type_t;
|
||||
|
||||
|
||||
/* Pre declarations to avoid inter dependency problems */
|
||||
/** \cond IGNORE */
|
||||
struct netloc_topology_t;
|
||||
typedef struct netloc_topology_t netloc_topology_t;
|
||||
struct netloc_node_t;
|
||||
typedef struct netloc_node_t netloc_node_t;
|
||||
struct netloc_edge_t;
|
||||
typedef struct netloc_edge_t netloc_edge_t;
|
||||
struct netloc_physical_link_t;
|
||||
typedef struct netloc_physical_link_t netloc_physical_link_t;
|
||||
struct netloc_path_t;
|
||||
typedef struct netloc_path_t netloc_path_t;
|
||||
|
||||
struct netloc_arch_tree_t;
|
||||
typedef struct netloc_arch_tree_t netloc_arch_tree_t;
|
||||
struct netloc_arch_node_t;
|
||||
typedef struct netloc_arch_node_t netloc_arch_node_t;
|
||||
struct netloc_arch_node_slot_t;
|
||||
typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t;
|
||||
struct netloc_arch_t;
|
||||
typedef struct netloc_arch_t netloc_arch_t;
|
||||
/** \endcond */
|
||||
|
||||
/**
|
||||
* \struct netloc_topology_t
|
||||
* \brief Netloc Topology Context
|
||||
*
|
||||
* An opaque data structure used to reference a network topology.
|
||||
*
|
||||
* \note Must be initialized with \ref netloc_topology_construct()
|
||||
*/
|
||||
struct netloc_topology_t {
|
||||
/** Topology path */
|
||||
char *topopath;
|
||||
/** Subnet ID */
|
||||
char *subnet_id;
|
||||
|
||||
/** Node List */
|
||||
netloc_node_t *nodes; /* Hash table of nodes by physical_id */
|
||||
netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */
|
||||
|
||||
netloc_physical_link_t *physical_links; /* Hash table with physcial links */
|
||||
|
||||
/** Partition List */
|
||||
UT_array *partitions;
|
||||
|
||||
/** Hwloc topology List */
|
||||
char *hwlocpath;
|
||||
UT_array *topos;
|
||||
hwloc_topology_t *hwloc_topos;
|
||||
|
||||
/** Type of the graph */
|
||||
netloc_topology_type_t type;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Node Type
|
||||
*
|
||||
* Represents the concept of a node (a.k.a., vertex, endpoint) within a network
|
||||
* graph. This could be a server or a network switch. The \ref node_type parameter
|
||||
* will distinguish the exact type of node this represents in the graph.
|
||||
*/
|
||||
struct netloc_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable with physical_id */
|
||||
UT_hash_handle hh2; /* makes this structure hashable with hostname */
|
||||
|
||||
/** Physical ID of the node */
|
||||
char physical_id[20];
|
||||
|
||||
/** Logical ID of the node (if any) */
|
||||
int logical_id;
|
||||
|
||||
/** Type of the node */
|
||||
netloc_node_type_t type;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
|
||||
/** Outgoing edges from this node */
|
||||
netloc_edge_t *edges;
|
||||
|
||||
UT_array *subnodes; /* the group of nodes for the virtual nodes */
|
||||
|
||||
netloc_path_t *paths;
|
||||
|
||||
char *hostname;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
hwloc_topology_t hwlocTopo;
|
||||
int hwlocTopoIdx;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Edge Type
|
||||
*
|
||||
* Represents the concept of a directed edge within a network graph.
|
||||
*
|
||||
* \note We do not point to the netloc_node_t structure directly to
|
||||
* simplify the representation, and allow the information to more easily
|
||||
* be entered into the data store without circular references.
|
||||
* \todo JJH Is the note above still true?
|
||||
*/
|
||||
struct netloc_edge_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
netloc_node_t *dest;
|
||||
|
||||
int id;
|
||||
|
||||
/** Pointers to the parent node */
|
||||
netloc_node_t *node;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** total gbits of the links */
|
||||
float total_gbits;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
UT_array *subnode_edges; /* for edges going to virtual nodes */
|
||||
|
||||
struct netloc_edge_t *other_way;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
|
||||
struct netloc_physical_link_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
int id; // TODO long long
|
||||
netloc_node_t *src;
|
||||
netloc_node_t *dest;
|
||||
int ports[2];
|
||||
char *width;
|
||||
char *speed;
|
||||
|
||||
netloc_edge_t *edge;
|
||||
|
||||
int other_way_id;
|
||||
struct netloc_physical_link_t *other_way;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
/** gbits of the link from speed and width */
|
||||
float gbits;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
};
|
||||
|
||||
struct netloc_path_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char dest_id[20];
|
||||
UT_array *links;
|
||||
};
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture structures
|
||||
**********************************************************************/
|
||||
struct netloc_arch_tree_t {
|
||||
NETLOC_int num_levels;
|
||||
NETLOC_int *degrees;
|
||||
NETLOC_int *cost;
|
||||
};
|
||||
|
||||
struct netloc_arch_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char *name; /* Hash key */
|
||||
netloc_node_t *node; /* Corresponding node */
|
||||
int idx_in_topo; /* idx with ghost hosts to have complete topo */
|
||||
int num_slots; /* it is not the real number of slots but the maximum slot idx */
|
||||
int *slot_idx; /* corresponding idx in slot_tree */
|
||||
int *slot_os_idx; /* corresponding os index for each leaf in tree */
|
||||
netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */
|
||||
int num_current_slots; /* Number of PUs */
|
||||
NETLOC_int *current_slots; /* indices in the complete tree */
|
||||
int *slot_ranks; /* corresponding MPI rank for each leaf in tree */
|
||||
};
|
||||
|
||||
struct netloc_arch_node_slot_t {
|
||||
netloc_arch_node_t *node;
|
||||
int slot;
|
||||
};
|
||||
|
||||
struct netloc_arch_t {
|
||||
netloc_topology_t *topology;
|
||||
int has_slots; /* if slots are included in the architecture */
|
||||
netloc_arch_type_t type;
|
||||
union {
|
||||
netloc_arch_tree_t *node_tree;
|
||||
netloc_arch_tree_t *global_tree;
|
||||
} arch;
|
||||
netloc_arch_node_t *nodes_by_name;
|
||||
netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */
|
||||
NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */
|
||||
NETLOC_int *current_hosts; /* indices in the complete topology */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* Topology Functions
|
||||
**********************************************************************/
|
||||
/**
|
||||
* Allocate a topology handle.
|
||||
*
|
||||
* User is responsible for calling \ref netloc_detach on the topology handle.
|
||||
* The network parameter information is deep copied into the topology handle, so the
|
||||
* user may destruct the network handle after calling this function and/or reuse
|
||||
* the network handle.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
netloc_topology_t *netloc_topology_construct(char *path);
|
||||
|
||||
/**
|
||||
* Destruct a topology handle
|
||||
*
|
||||
* \param topology A valid pointer to a \ref netloc_topology_t handle created
|
||||
* from a prior call to \ref netloc_topology_construct.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
int netloc_topology_destruct(netloc_topology_t *topology);
|
||||
|
||||
int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name);
|
||||
|
||||
int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
|
||||
netloc_node_t **node_list);
|
||||
|
||||
#define netloc_topology_iter_partitions(topology,partition) \
|
||||
for ((partition) = (char **)utarray_front(topology->partitions); \
|
||||
(partition) != NULL; \
|
||||
(partition) = (char **)utarray_next(topology->partitions, partition))
|
||||
|
||||
#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \
|
||||
for ((hwloctopo) = (char **)utarray_front(topology->topos); \
|
||||
(hwloctopo) != NULL; \
|
||||
(hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo))
|
||||
|
||||
#define netloc_topology_find_node(topology,node_id,node) \
|
||||
HASH_FIND_STR(topology->nodes, node_id, node)
|
||||
|
||||
#define netloc_topology_iter_nodes(topology,node,_tmp) \
|
||||
HASH_ITER(hh, topology->nodes, node, _tmp)
|
||||
|
||||
#define netloc_topology_num_nodes(topology) \
|
||||
HASH_COUNT(topology->nodes)
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_node_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the network information.
|
||||
*/
|
||||
netloc_node_t *netloc_node_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_node_t
|
||||
*
|
||||
* \param node A valid node handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_node_destruct(netloc_node_t *node);
|
||||
|
||||
char *netloc_node_pretty_print(netloc_node_t* node);
|
||||
|
||||
#define netloc_node_get_num_subnodes(node) \
|
||||
utarray_len((node)->subnodes)
|
||||
|
||||
#define netloc_node_get_subnode(node,i) \
|
||||
(*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i)))
|
||||
|
||||
#define netloc_node_get_num_edges(node) \
|
||||
utarray_len((node)->edges)
|
||||
|
||||
#define netloc_node_get_edge(node,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((node)->edges, (i)))
|
||||
|
||||
#define netloc_node_iter_edges(node,edge,_tmp) \
|
||||
HASH_ITER(hh, node->edges, edge, _tmp)
|
||||
|
||||
#define netloc_node_iter_paths(node,path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
#define netloc_node_is_host(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_HOST)
|
||||
|
||||
#define netloc_node_is_switch(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_SWITCH)
|
||||
|
||||
#define netloc_node_iter_paths(node, path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
int netloc_node_is_in_partition(netloc_node_t *node, int partition);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_edge_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the edge information.
|
||||
*/
|
||||
netloc_edge_t *netloc_edge_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_edge_t
|
||||
*
|
||||
* \param edge A valid edge handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_edge_destruct(netloc_edge_t *edge);
|
||||
|
||||
char * netloc_edge_pretty_print(netloc_edge_t* edge);
|
||||
|
||||
void netloc_edge_reset_uid(void);
|
||||
|
||||
int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition);
|
||||
|
||||
#define netloc_edge_get_num_links(edge) \
|
||||
utarray_len((edge)->physical_links)
|
||||
|
||||
#define netloc_edge_get_link(edge,i) \
|
||||
(*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i)))
|
||||
|
||||
#define netloc_edge_get_num_subedges(edge) \
|
||||
utarray_len((edge)->subnode_edges)
|
||||
|
||||
#define netloc_edge_get_subedge(edge,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i)))
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_physical_link_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the physical link information.
|
||||
*/
|
||||
netloc_physical_link_t * netloc_physical_link_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_physical_link_t
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_physical_link_destruct(netloc_physical_link_t *link);
|
||||
|
||||
char * netloc_link_pretty_print(netloc_physical_link_t* link);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
netloc_path_t *netloc_path_construct(void);
|
||||
int netloc_path_destruct(netloc_path_t *path);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture functions
|
||||
**********************************************************************/
|
||||
|
||||
netloc_arch_t * netloc_arch_construct(void);
|
||||
|
||||
int netloc_arch_destruct(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_build(netloc_arch_t *arch, int add_slots);
|
||||
|
||||
int netloc_arch_set_current_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_set_global_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch);
|
||||
|
||||
void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
|
||||
int num_hosts, int **parch_idx);
|
||||
|
||||
NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Access functions of various elements of the topology
|
||||
**********************************************************************/
|
||||
|
||||
#define netloc_get_num_partitions(object) \
|
||||
utarray_len((object)->partitions)
|
||||
|
||||
#define netloc_get_partition(object,i) \
|
||||
(*(int *)utarray_eltptr((object)->partitions, (i)))
|
||||
|
||||
|
||||
#define netloc_path_iter_links(path,link) \
|
||||
for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \
|
||||
(link) != NULL; \
|
||||
(link) = (netloc_physical_link_t **)utarray_next(path->links, link))
|
||||
|
||||
/**********************************************************************
|
||||
* Misc functions
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Decode the network type
|
||||
*
|
||||
* \param net_type A valid member of the \ref netloc_network_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_network_type_t type
|
||||
*/
|
||||
static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) {
|
||||
if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) {
|
||||
return "ETH";
|
||||
}
|
||||
else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) {
|
||||
return "IB";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the node type
|
||||
*
|
||||
* \param node_type A valid member of the \ref netloc_node_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_node_type_t type
|
||||
*/
|
||||
static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) {
|
||||
if( NETLOC_NODE_TYPE_SWITCH == node_type ) {
|
||||
return "SW";
|
||||
}
|
||||
else if( NETLOC_NODE_TYPE_HOST == node_type ) {
|
||||
return "CA";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream);
|
||||
|
||||
char *netloc_line_get_next_token(char **string, char c);
|
||||
|
||||
int netloc_build_comm_mat(char *filename, int *pn, double ***pmat);
|
||||
|
||||
#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str))
|
||||
#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str)
|
||||
|
||||
|
||||
#endif // _NETLOC_PRIVATE_H_
|
11
src/3rdparty/hwloc/include/private/private.h
vendored
11
src/3rdparty/hwloc/include/private/private.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
|
@ -245,6 +245,12 @@ struct hwloc_topology {
|
|||
* temporary variables during discovery
|
||||
*/
|
||||
|
||||
/* set to 1 at the beginning of load() if the filter of any cpu cache type (L1 to L3i) is not NONE,
|
||||
* may be checked by backends before querying caches
|
||||
* (when they don't know the level of caches they are querying).
|
||||
*/
|
||||
int want_some_cpu_caches;
|
||||
|
||||
/* machine-wide memory.
|
||||
* temporarily stored there by OSes that only provide this without NUMA information,
|
||||
* and actually used later by the core.
|
||||
|
@ -420,7 +426,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology);
|
|||
extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value);
|
||||
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype);
|
||||
|
||||
extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology);
|
||||
|
@ -477,6 +483,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
|
|||
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */
|
||||
/* then, OS-specific groups */
|
||||
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
|
||||
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
|
||||
|
|
8
src/3rdparty/hwloc/include/private/xml.h
vendored
8
src/3rdparty/hwloc/include/private/xml.h
vendored
|
@ -19,13 +19,14 @@ HWLOC_DECLSPEC int hwloc__xml_verbose(void);
|
|||
typedef struct hwloc__xml_import_state_s {
|
||||
struct hwloc__xml_import_state_s *parent;
|
||||
|
||||
/* globals shared because the entire stack of states during import */
|
||||
/* globals shared between the entire stack of states during import */
|
||||
struct hwloc_xml_backend_data_s *global;
|
||||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 3 ptrs. nolibxml is 3 ptr + one int.
|
||||
*/
|
||||
char data[32];
|
||||
char data[4 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_import_state_t;
|
||||
|
||||
struct hwloc__xml_imported_v1distances_s {
|
||||
|
@ -74,8 +75,9 @@ typedef struct hwloc__xml_export_state_s {
|
|||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 1 ptr. nolibxml is 1 ptr + 2 size_t + 3 ints.
|
||||
*/
|
||||
char data[40];
|
||||
char data[6 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_export_state_t;
|
||||
|
||||
HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags);
|
||||
|
|
3
src/3rdparty/hwloc/src/components.c
vendored
3
src/3rdparty/hwloc/src/components.c
vendored
|
@ -94,8 +94,7 @@ static hwloc_dlhandle hwloc_dlopenext(const char *_filename)
|
|||
{
|
||||
hwloc_dlhandle handle;
|
||||
char *filename = NULL;
|
||||
(void) asprintf(&filename, "%s.so", _filename);
|
||||
if (!filename)
|
||||
if (asprintf(&filename, "%s.so", _filename) < 0)
|
||||
return NULL;
|
||||
handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL);
|
||||
free(filename);
|
||||
|
|
26
src/3rdparty/hwloc/src/diff.c
vendored
26
src/3rdparty/hwloc/src/diff.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2022 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -411,6 +411,30 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
|
|||
}
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
/* cpukinds */
|
||||
if (topo1->nr_cpukinds != topo2->nr_cpukinds)
|
||||
goto roottoocomplex;
|
||||
for(i=0; i<topo1->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *ic1 = &topo1->cpukinds[i];
|
||||
struct hwloc_internal_cpukind_s *ic2 = &topo2->cpukinds[i];
|
||||
unsigned j;
|
||||
if (!hwloc_bitmap_isequal(ic1->cpuset, ic2->cpuset)
|
||||
|| ic1->efficiency != ic2->efficiency
|
||||
|| ic1->forced_efficiency != ic2->forced_efficiency
|
||||
|| ic1->ranking_value != ic2->ranking_value
|
||||
|| ic1->nr_infos != ic2->nr_infos)
|
||||
goto roottoocomplex;
|
||||
for(j=0; j<ic1->nr_infos; j++) {
|
||||
struct hwloc_info_s *info1 = &ic1->infos[j], *info2 = &ic2->infos[j];
|
||||
if (strcmp(info1->name, info2->name)
|
||||
|| strcmp(info1->value, info2->value)) {
|
||||
goto roottoocomplex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
roottoocomplex:
|
||||
|
|
758
src/3rdparty/hwloc/src/memattrs.c
vendored
758
src/3rdparty/hwloc/src/memattrs.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2020-2022 Inria. All rights reserved.
|
||||
* Copyright © 2020-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -1219,24 +1219,82 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||
* Using memattrs to identify HBM/DRAM
|
||||
*/
|
||||
|
||||
enum hwloc_memory_tier_type_e {
|
||||
/* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */
|
||||
HWLOC_MEMORY_TIER_HBM = 1UL<<0,
|
||||
HWLOC_MEMORY_TIER_DRAM = 1UL<<1,
|
||||
HWLOC_MEMORY_TIER_GPU = 1UL<<2,
|
||||
HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/
|
||||
HWLOC_MEMORY_TIER_NVM = 1UL<<4,
|
||||
HWLOC_MEMORY_TIER_CXL = 1UL<<5
|
||||
};
|
||||
typedef unsigned long hwloc_memory_tier_type_t;
|
||||
#define HWLOC_MEMORY_TIER_UNKNOWN 0UL
|
||||
|
||||
static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case HWLOC_MEMORY_TIER_DRAM: return "DRAM";
|
||||
case HWLOC_MEMORY_TIER_HBM: return "HBM";
|
||||
case HWLOC_MEMORY_TIER_GPU: return "GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_SPM: return "SPM";
|
||||
case HWLOC_MEMORY_TIER_NVM: return "NVM";
|
||||
case HWLOC_MEMORY_TIER_CXL:
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name)
|
||||
{
|
||||
if (!strcasecmp(name, "DRAM"))
|
||||
return HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "HBM"))
|
||||
return HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "SPM"))
|
||||
return HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "NVM"))
|
||||
return HWLOC_MEMORY_TIER_NVM;
|
||||
if (!strcasecmp(name, "CXL-DRAM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "CXL-HBM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "CXL-GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "CXL-SPM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "CXL-NVM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* factorized tier, grouping multiple nodes */
|
||||
struct hwloc_memory_tier_s {
|
||||
hwloc_obj_t node;
|
||||
uint64_t local_bw;
|
||||
enum hwloc_memory_tier_type_e {
|
||||
/* warning the order is important for guess_memory_tiers() after qsort() */
|
||||
HWLOC_MEMORY_TIER_UNKNOWN,
|
||||
HWLOC_MEMORY_TIER_DRAM,
|
||||
HWLOC_MEMORY_TIER_HBM,
|
||||
HWLOC_MEMORY_TIER_SPM, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm */
|
||||
HWLOC_MEMORY_TIER_NVM,
|
||||
HWLOC_MEMORY_TIER_GPU,
|
||||
} type;
|
||||
hwloc_nodeset_t nodeset;
|
||||
uint64_t local_bw_min, local_bw_max;
|
||||
uint64_t local_lat_min, local_lat_max;
|
||||
hwloc_memory_tier_type_t type;
|
||||
};
|
||||
|
||||
static int compare_tiers(const void *_a, const void *_b)
|
||||
/* early tier discovery, one entry per node */
|
||||
struct hwloc_memory_node_info_s {
|
||||
hwloc_obj_t node;
|
||||
uint64_t local_bw;
|
||||
uint64_t local_lat;
|
||||
hwloc_memory_tier_type_t type;
|
||||
unsigned rank;
|
||||
};
|
||||
|
||||
static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b)
|
||||
{
|
||||
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
||||
/* sort by type of tier first */
|
||||
const struct hwloc_memory_node_info_s *a = _a, *b = _b;
|
||||
/* sort by type of node first */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
/* then by bandwidth */
|
||||
|
@ -1247,180 +1305,560 @@ static int compare_tiers(const void *_a, const void *_b)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology)
|
||||
static int compare_tiers_by_bw_and_type(const void *_a, const void *_b)
|
||||
{
|
||||
struct hwloc_internal_memattr_s *imattr;
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned i, j, n;
|
||||
const char *env;
|
||||
int spm_is_hbm = -1; /* -1 will guess from BW, 0 no, 1 forced */
|
||||
int mark_dram = 1;
|
||||
unsigned first_spm, first_nvm;
|
||||
hwloc_uint64_t max_unknown_bw, min_spm_bw;
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_GUESS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none")) {
|
||||
return 0;
|
||||
} else if (!strcmp(env, "default")) {
|
||||
/* nothing */
|
||||
} else if (!strcmp(env, "spm_is_hbm")) {
|
||||
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
||||
spm_is_hbm = 1;
|
||||
} else if (HWLOC_SHOW_CRITICAL_ERRORS()) {
|
||||
fprintf(stderr, "hwloc: Failed to recognize HWLOC_MEMTIERS_GUESS value %s\n", env);
|
||||
}
|
||||
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
||||
/* sort by (average) BW first */
|
||||
if (a->local_bw_min && b->local_bw_min) {
|
||||
if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max)
|
||||
return -1;
|
||||
else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max)
|
||||
return 1;
|
||||
}
|
||||
/* then by tier type */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
return 0;
|
||||
}
|
||||
|
||||
imattr = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
||||
|
||||
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr);
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__group_memory_tiers(hwloc_topology_t topology,
|
||||
unsigned *nr_tiers_p)
|
||||
{
|
||||
struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat;
|
||||
struct hwloc_memory_node_info_s *nodeinfos;
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
float bw_threshold = 0.1;
|
||||
float lat_threshold = 0.1;
|
||||
const char *env;
|
||||
unsigned i, j, n;
|
||||
|
||||
n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE);
|
||||
assert(n);
|
||||
|
||||
tiers = malloc(n * sizeof(*tiers));
|
||||
if (!tiers)
|
||||
return -1;
|
||||
env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD");
|
||||
if (env)
|
||||
bw_threshold = atof(env);
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD");
|
||||
if (env)
|
||||
lat_threshold = atof(env);
|
||||
|
||||
imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
||||
imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY];
|
||||
|
||||
if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_bw);
|
||||
if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_lat);
|
||||
|
||||
nodeinfos = malloc(n * sizeof(*nodeinfos));
|
||||
if (!nodeinfos)
|
||||
return NULL;
|
||||
|
||||
for(i=0; i<n; i++) {
|
||||
hwloc_obj_t node;
|
||||
const char *daxtype;
|
||||
struct hwloc_internal_location_s iloc;
|
||||
struct hwloc_internal_memattr_target_s *imtg = NULL;
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
struct hwloc_internal_memattr_target_s *imtg;
|
||||
|
||||
node = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, i);
|
||||
assert(node);
|
||||
tiers[i].node = node;
|
||||
nodeinfos[i].node = node;
|
||||
|
||||
/* defaults */
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
tiers[i].local_bw = 0; /* unknown */
|
||||
/* defaults to unknown */
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
nodeinfos[i].local_bw = 0;
|
||||
nodeinfos[i].local_lat = 0;
|
||||
|
||||
daxtype = hwloc_obj_get_info_by_name(node, "DAXType");
|
||||
/* mark NVM, SPM and GPU nodes */
|
||||
if (daxtype && !strcmp(daxtype, "NVM"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_NVM;
|
||||
if (daxtype && !strcmp(daxtype, "SPM"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_SPM;
|
||||
if (node->subtype && !strcmp(node->subtype, "GPUMemory"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_GPU;
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU;
|
||||
else if (daxtype && !strcmp(daxtype, "NVM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM;
|
||||
else if (daxtype && !strcmp(daxtype, "SPM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM;
|
||||
/* add CXL flag */
|
||||
if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) {
|
||||
/* CXL is always SPM for now. HBM and DRAM not possible here yet.
|
||||
* Hence remove all but NVM first.
|
||||
*/
|
||||
nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM;
|
||||
nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL;
|
||||
}
|
||||
|
||||
if (spm_is_hbm == -1) {
|
||||
for(j=0; j<imattr->nr_targets; j++)
|
||||
if (imattr->targets[j].obj == node) {
|
||||
imtg = &imattr->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
tiers[i].local_bw = imi->value;
|
||||
/* get local bandwidth */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_bw->nr_targets; j++)
|
||||
if (imattr_bw->targets[j].obj == node) {
|
||||
imtg = &imattr_bw->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_bw = imi->value;
|
||||
}
|
||||
/* get local latency */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_lat->nr_targets; j++)
|
||||
if (imattr_lat->targets[j].obj == node) {
|
||||
imtg = &imattr_lat->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_lat = imi->value;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort nodes.
|
||||
* We could also sort by the existing subtype.
|
||||
* KNL is the only case where subtypes are set in backends, but we set memattrs as well there.
|
||||
* Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes.
|
||||
*/
|
||||
hwloc_debug("Sorting memory node infos...\n");
|
||||
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<n; i++)
|
||||
hwloc_debug(" node info %u = node L#%u P#%u with info type %lx and local BW %llu lat %llu\n",
|
||||
i,
|
||||
nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index,
|
||||
nodeinfos[i].type,
|
||||
(unsigned long long) nodeinfos[i].local_bw,
|
||||
(unsigned long long) nodeinfos[i].local_lat);
|
||||
#endif
|
||||
/* now we have UNKNOWN nodes (sorted by BW only), then known ones */
|
||||
|
||||
/* iterate among them and add a rank value.
|
||||
* start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */
|
||||
hwloc_debug("Starting memory tier #0 and iterating over nodes...\n");
|
||||
nodeinfos[0].rank = 0;
|
||||
for(i=1; i<n; i++) {
|
||||
/* reuse the same rank by default */
|
||||
nodeinfos[i].rank = nodeinfos[i-1].rank;
|
||||
/* comparing type */
|
||||
if (nodeinfos[i].type != nodeinfos[i-1].type) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of type\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
/* comparing bandwidth */
|
||||
if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) {
|
||||
float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw;
|
||||
if (bw_ratio < 1.)
|
||||
bw_ratio = 1./bw_ratio;
|
||||
if (bw_ratio > 1.0 + bw_threshold) {
|
||||
nodeinfos[i].rank++;
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* comparing latency */
|
||||
if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) {
|
||||
float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat;
|
||||
if (lat_ratio < 1.)
|
||||
lat_ratio = 1./lat_ratio;
|
||||
if (lat_ratio > 1.0 + lat_threshold) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */
|
||||
hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1);
|
||||
|
||||
/* sort tiers */
|
||||
qsort(tiers, n, sizeof(*tiers), compare_tiers);
|
||||
hwloc_debug("Sorting memory tiers...\n");
|
||||
for(i=0; i<n; i++)
|
||||
hwloc_debug(" tier %u = node L#%u P#%u with tier type %d and local BW #%llu\n",
|
||||
i,
|
||||
tiers[i].node->logical_index, tiers[i].node->os_index,
|
||||
tiers[i].type, (unsigned long long) tiers[i].local_bw);
|
||||
|
||||
/* now we have UNKNOWN tiers (sorted by BW), then SPM tiers (sorted by BW), then NVM, then GPU */
|
||||
|
||||
/* iterate over UNKNOWN tiers, and find their BW */
|
||||
/* now group nodeinfos into factorized tiers */
|
||||
nr_tiers = nodeinfos[n-1].rank + 1;
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers)
|
||||
goto out_with_nodeinfos;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
tiers[i].nodeset = hwloc_bitmap_alloc();
|
||||
if (!tiers[i].nodeset)
|
||||
goto out_with_tiers;
|
||||
tiers[i].local_bw_min = tiers[i].local_bw_max = 0;
|
||||
tiers[i].local_lat_min = tiers[i].local_lat_max = 0;
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
}
|
||||
for(i=0; i<n; i++) {
|
||||
if (tiers[i].type > HWLOC_MEMORY_TIER_UNKNOWN)
|
||||
break;
|
||||
}
|
||||
first_spm = i;
|
||||
/* get max BW from first */
|
||||
if (first_spm > 0)
|
||||
max_unknown_bw = tiers[0].local_bw;
|
||||
else
|
||||
max_unknown_bw = 0;
|
||||
|
||||
/* there are no DRAM or HBM tiers yet */
|
||||
|
||||
/* iterate over SPM tiers, and find their BW */
|
||||
for(i=first_spm; i<n; i++) {
|
||||
if (tiers[i].type > HWLOC_MEMORY_TIER_SPM)
|
||||
break;
|
||||
}
|
||||
first_nvm = i;
|
||||
/* get min BW from last */
|
||||
if (first_nvm > first_spm)
|
||||
min_spm_bw = tiers[first_nvm-1].local_bw;
|
||||
else
|
||||
min_spm_bw = 0;
|
||||
|
||||
/* FIXME: if there's more than 10% between some sets of nodes inside a tier, split it? */
|
||||
/* FIXME: if there are cpuset-intersecting nodes in same tier, abort? */
|
||||
|
||||
if (spm_is_hbm == -1) {
|
||||
/* if we have BW for all SPM and UNKNOWN
|
||||
* and all SPM BW are 2x superior to all UNKNOWN BW
|
||||
*/
|
||||
hwloc_debug("UNKNOWN-memory-tier max bandwidth %llu\n", (unsigned long long) max_unknown_bw);
|
||||
hwloc_debug("SPM-memory-tier min bandwidth %llu\n", (unsigned long long) min_spm_bw);
|
||||
if (max_unknown_bw > 0 && min_spm_bw > 0 && max_unknown_bw*2 < min_spm_bw) {
|
||||
hwloc_debug("assuming SPM means HBM and !SPM means DRAM since bandwidths are very different\n");
|
||||
spm_is_hbm = 1;
|
||||
} else {
|
||||
hwloc_debug("cannot assume SPM means HBM\n");
|
||||
spm_is_hbm = 0;
|
||||
}
|
||||
unsigned rank = nodeinfos[i].rank;
|
||||
assert(rank < nr_tiers);
|
||||
hwloc_bitmap_set(tiers[rank].nodeset, nodeinfos[i].node->os_index);
|
||||
assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN
|
||||
|| tiers[rank].type == nodeinfos[i].type);
|
||||
tiers[rank].type = nodeinfos[i].type;
|
||||
/* nodeinfos are sorted in BW order, no need to compare */
|
||||
if (!tiers[rank].local_bw_min)
|
||||
tiers[rank].local_bw_min = nodeinfos[i].local_bw;
|
||||
tiers[rank].local_bw_max = nodeinfos[i].local_bw;
|
||||
/* compare latencies to update min/max */
|
||||
if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min)
|
||||
tiers[rank].local_lat_min = nodeinfos[i].local_lat;
|
||||
if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max)
|
||||
tiers[rank].local_lat_max = nodeinfos[i].local_lat;
|
||||
}
|
||||
|
||||
if (spm_is_hbm) {
|
||||
for(i=0; i<first_spm; i++)
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
||||
for(i=first_spm; i<first_nvm; i++)
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
||||
}
|
||||
|
||||
if (first_spm == n)
|
||||
mark_dram = 0;
|
||||
|
||||
/* now apply subtypes */
|
||||
for(i=0; i<n; i++) {
|
||||
const char *type = NULL;
|
||||
if (tiers[i].node->subtype) /* don't overwrite the existing subtype */
|
||||
continue;
|
||||
switch (tiers[i].type) {
|
||||
case HWLOC_MEMORY_TIER_DRAM:
|
||||
if (mark_dram)
|
||||
type = "DRAM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_HBM:
|
||||
type = "HBM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_SPM:
|
||||
type = "SPM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_NVM:
|
||||
type = "NVM";
|
||||
break;
|
||||
default:
|
||||
/* GPU memory is already marked with subtype="GPUMemory",
|
||||
* UNKNOWN doesn't deserve any subtype
|
||||
*/
|
||||
break;
|
||||
}
|
||||
if (type) {
|
||||
hwloc_debug("Marking node L#%u P#%u as %s\n", tiers[i].node->logical_index, tiers[i].node->os_index, type);
|
||||
tiers[i].node->subtype = strdup(type);
|
||||
}
|
||||
}
|
||||
free(nodeinfos);
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeinfos:
|
||||
free(nodeinfos);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
enum hwloc_guess_memtiers_flag {
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM = 1<<0,
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM = 1<<1
|
||||
};
|
||||
|
||||
static int
|
||||
hwloc__guess_dram_hbm_tiers(struct hwloc_memory_tier_s *tier1,
|
||||
struct hwloc_memory_tier_s *tier2,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tmp;
|
||||
|
||||
if (!tier1->local_bw_min || !tier2->local_bw_min) {
|
||||
hwloc_debug(" Missing BW info\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* reorder tiers by BW */
|
||||
if (tier1->local_bw_min > tier2->local_bw_min) {
|
||||
tmp = tier1; tier1 = tier2; tier2 = tmp;
|
||||
}
|
||||
/* tier1 < tier2 */
|
||||
|
||||
hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n",
|
||||
(unsigned long long) tier1->local_bw_min,
|
||||
(unsigned long long) tier1->local_bw_max,
|
||||
(unsigned long long) tier2->local_bw_min,
|
||||
(unsigned long long) tier2->local_bw_max);
|
||||
if (tier2->local_bw_min <= tier1->local_bw_max * 2) {
|
||||
/* tier2 BW isn't 2x tier1, we cannot guess HBM */
|
||||
hwloc_debug(" BW difference isn't >2x\n");
|
||||
return -1;
|
||||
}
|
||||
/* tier2 BW is >2x tier1 */
|
||||
|
||||
if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM)
|
||||
&& hwloc_bitmap_isset(tier2->nodeset, 0)) {
|
||||
/* node0 is not DRAM, and we assume that's not possible */
|
||||
hwloc_debug(" node0 shouldn't have HBM BW\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* assume tier1 == DRAM and tier2 == HBM */
|
||||
tier1->type = HWLOC_MEMORY_TIER_DRAM;
|
||||
tier2->type = HWLOC_MEMORY_TIER_HBM;
|
||||
hwloc_debug(" Success\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers)
|
||||
{
|
||||
unsigned long flags;
|
||||
const char *env;
|
||||
unsigned nr_unknown, nr_spm;
|
||||
struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier;
|
||||
unsigned i;
|
||||
|
||||
flags = 0;
|
||||
env = getenv("HWLOC_MEMTIERS_GUESS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
return 0;
|
||||
/* by default, we don't guess anything unsure */
|
||||
if (!strcmp(env, "all"))
|
||||
/* enable all typical cases */
|
||||
flags = ~0UL;
|
||||
if (strstr(env, "spm_is_hbm")) {
|
||||
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM;
|
||||
}
|
||||
if (strstr(env, "node0_is_dram")) {
|
||||
hwloc_debug("Assuming node0 is DRAM\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_tiers == 1)
|
||||
/* Likely DRAM only, but could also be HBM-only in non-SPM mode.
|
||||
* We cannot be sure, but it doesn't matter since there's a single tier.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
nr_unknown = nr_spm = 0;
|
||||
unknown_tier[0] = unknown_tier[1] = spm_tier = NULL;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
switch (tiers[i].type) {
|
||||
case HWLOC_MEMORY_TIER_UNKNOWN:
|
||||
if (nr_unknown < 2)
|
||||
unknown_tier[nr_unknown] = &tiers[i];
|
||||
nr_unknown++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_SPM:
|
||||
spm_tier = &tiers[i];
|
||||
nr_spm++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_DRAM:
|
||||
case HWLOC_MEMORY_TIER_HBM:
|
||||
/* not possible */
|
||||
abort();
|
||||
default:
|
||||
/* ignore HBM, NVM, ... */
|
||||
break;
|
||||
}
|
||||
}
|
||||
hwloc_debug("Found %u unknown memory tiers and %u SPM\n",
|
||||
nr_unknown, nr_spm);
|
||||
|
||||
/* Try to guess DRAM + HBM common cases.
|
||||
* Other things we'd like to detect:
|
||||
* single unknown => DRAM or HBM? HBM won't be SPM on HBM-only CPUs
|
||||
* unknown + CXL DRAM => DRAM or HBM?
|
||||
*/
|
||||
if (nr_unknown == 2 && !nr_spm) {
|
||||
/* 2 unknown, could be DRAM + non-SPM HBM */
|
||||
hwloc_debug(" Trying to guess 2 unknown tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags);
|
||||
} else if (nr_unknown == 1 && nr_spm == 1) {
|
||||
/* 1 unknown + 1 SPM, could be DRAM + SPM HBM */
|
||||
hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags);
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) {
|
||||
/* force mark SPM as HBM */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (tiers[i].type == HWLOC_MEMORY_TIER_SPM) {
|
||||
hwloc_debug("Forcing SPM tier to HBM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) {
|
||||
/* force mark node0's tier as DRAM if we couldn't guess it */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (hwloc_bitmap_isset(tiers[i].nodeset, 0)
|
||||
&& tiers[i].type == HWLOC_MEMORY_TIER_UNKNOWN) {
|
||||
hwloc_debug("Forcing node0 tier to DRAM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* parses something like 0xf=HBM;0x0f=DRAM;0x00f=CXL-DRAM */
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__force_memory_tiers(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned *nr_tiers_p,
|
||||
const char *_env)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers = NULL;
|
||||
unsigned nr_tiers, i;
|
||||
hwloc_bitmap_t nodeset = NULL;
|
||||
char *env;
|
||||
const char *tmp;
|
||||
|
||||
env = strdup(_env);
|
||||
if (!env) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to duplicate HWLOC_MEMTIERS envvar\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp = env;
|
||||
nr_tiers = 1;
|
||||
while (1) {
|
||||
tmp = strchr(tmp, ';');
|
||||
if (!tmp)
|
||||
break;
|
||||
tmp++;
|
||||
nr_tiers++;
|
||||
}
|
||||
|
||||
nodeset = hwloc_bitmap_alloc();
|
||||
if (!nodeset) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers' nodeset\n");
|
||||
goto out_with_envvar;
|
||||
}
|
||||
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers\n");
|
||||
goto out_with_nodeset;
|
||||
}
|
||||
nr_tiers = 0;
|
||||
|
||||
tmp = env;
|
||||
while (1) {
|
||||
char *end;
|
||||
char *equal;
|
||||
hwloc_memory_tier_type_t type;
|
||||
|
||||
end = strchr(tmp, ';');
|
||||
if (end)
|
||||
*end = '\0';
|
||||
|
||||
equal = strchr(tmp, '=');
|
||||
if (!equal) {
|
||||
fprintf(stderr, "[hwloc/memtiers] missing `=' before end of forced tier description at `%s'\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
*equal = '\0';
|
||||
|
||||
hwloc_bitmap_sscanf(nodeset, tmp);
|
||||
if (hwloc_bitmap_iszero(nodeset)) {
|
||||
fprintf(stderr, "[hwloc/memtiers] empty forced tier nodeset `%s', aborting\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
type = hwloc_memory_tier_type_sscanf(equal+1);
|
||||
if (!type)
|
||||
hwloc_debug("failed to recognize forced tier type `%s'\n", equal+1);
|
||||
tiers[nr_tiers].nodeset = hwloc_bitmap_dup(nodeset);
|
||||
tiers[nr_tiers].type = type;
|
||||
tiers[nr_tiers].local_bw_min = tiers[nr_tiers].local_bw_max = 0;
|
||||
tiers[nr_tiers].local_lat_min = tiers[nr_tiers].local_lat_max = 0;
|
||||
nr_tiers++;
|
||||
if (!end)
|
||||
break;
|
||||
tmp = end+1;
|
||||
}
|
||||
|
||||
free(env);
|
||||
hwloc_bitmap_free(nodeset);
|
||||
hwloc_debug("Forcing %u memory tiers\n", nr_tiers);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier #%u type %lx nodeset %s\n", i, tiers[i].type, s);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeset:
|
||||
hwloc_bitmap_free(nodeset);
|
||||
out_with_envvar:
|
||||
free(env);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__apply_memory_tiers_subtypes(hwloc_topology_t topology,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers,
|
||||
int force)
|
||||
{
|
||||
hwloc_obj_t node = NULL;
|
||||
hwloc_debug("Marking node tiers\n");
|
||||
while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) {
|
||||
unsigned j;
|
||||
for(j=0; j<nr_tiers; j++) {
|
||||
if (hwloc_bitmap_isset(tiers[j].nodeset, node->os_index)) {
|
||||
const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type);
|
||||
if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */
|
||||
if (subtype) { /* don't set a subtype for unknown tiers */
|
||||
hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype);
|
||||
free(node->subtype);
|
||||
node->subtype = strdup(subtype);
|
||||
}
|
||||
} else
|
||||
hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n",
|
||||
node->logical_index, node->os_index, node->subtype, subtype);
|
||||
if (nr_tiers > 1) {
|
||||
char tmp[20];
|
||||
snprintf(tmp, sizeof(tmp), "%u", j);
|
||||
hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1);
|
||||
}
|
||||
break; /* each node is in a single tier */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
unsigned i;
|
||||
const char *env;
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
goto out;
|
||||
tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env);
|
||||
if (tiers) {
|
||||
assert(nr_tiers > 0);
|
||||
force_subtype = 1;
|
||||
goto ready;
|
||||
}
|
||||
}
|
||||
|
||||
tiers = hwloc__group_memory_tiers(topology, &nr_tiers);
|
||||
if (!tiers)
|
||||
goto out;
|
||||
|
||||
hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers);
|
||||
|
||||
/* sort tiers by BW first, then by type */
|
||||
hwloc_debug("Sorting memory tiers...\n");
|
||||
qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type);
|
||||
|
||||
ready:
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier %u = nodes %s with type %lx and local BW %llu-%llu lat %llu-%llu\n",
|
||||
i,
|
||||
s, tiers[i].type,
|
||||
(unsigned long long) tiers[i].local_bw_min,
|
||||
(unsigned long long) tiers[i].local_bw_max,
|
||||
(unsigned long long) tiers[i].local_lat_min,
|
||||
(unsigned long long) tiers[i].local_lat_max);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
hwloc__apply_memory_tiers_subtypes(topology, nr_tiers, tiers, force_subtype);
|
||||
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
|
11
src/3rdparty/hwloc/src/shmem.c
vendored
11
src/3rdparty/hwloc/src/shmem.c
vendored
|
@ -23,6 +23,7 @@ struct hwloc_shmem_header {
|
|||
uint32_t header_length; /* where the actual topology starts in the file/mapping */
|
||||
uint64_t mmap_address; /* virtual address to pass to mmap */
|
||||
uint64_t mmap_length; /* length to pass to mmap (includes the header) */
|
||||
/* we will pad the end to a multiple of pointer size so that the topology is well aligned */
|
||||
};
|
||||
|
||||
#define HWLOC_SHMEM_MALLOC_ALIGN 8UL
|
||||
|
@ -85,6 +86,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
hwloc_topology_t new;
|
||||
struct hwloc_tma tma;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
|
@ -100,7 +102,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
header.header_version = HWLOC_SHMEM_HEADER_VERSION;
|
||||
header.header_length = sizeof(header);
|
||||
header.header_length = header_length;
|
||||
header.mmap_address = (uintptr_t) mmap_address;
|
||||
header.mmap_length = length;
|
||||
|
||||
|
@ -127,7 +129,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
|
||||
tma.malloc = tma_shmem_malloc;
|
||||
tma.dontfree = 1;
|
||||
tma.data = (char *)mmap_res + sizeof(header);
|
||||
tma.data = (char *)mmap_res + header_length;
|
||||
err = hwloc__topology_dup(&new, topology, &tma);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
@ -154,6 +156,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
{
|
||||
hwloc_topology_t new, old;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
|
@ -171,7 +174,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
return -1;
|
||||
|
||||
if (header.header_version != HWLOC_SHMEM_HEADER_VERSION
|
||||
|| header.header_length != sizeof(header)
|
||||
|| header.header_length != header_length
|
||||
|| header.mmap_address != (uintptr_t) mmap_address
|
||||
|| header.mmap_length != length) {
|
||||
errno = EINVAL;
|
||||
|
@ -186,7 +189,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
goto out_with_mmap;
|
||||
}
|
||||
|
||||
old = (hwloc_topology_t)((char*)mmap_address + sizeof(header));
|
||||
old = (hwloc_topology_t)((char*)mmap_address + header_length);
|
||||
if (hwloc_topology_abi_check(old) < 0) {
|
||||
errno = EINVAL;
|
||||
goto out_with_mmap;
|
||||
|
|
154
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
154
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -23,6 +23,7 @@ struct hwloc_synthetic_attr_s {
|
|||
unsigned depth; /* For caches/groups */
|
||||
hwloc_obj_cache_type_t cachetype; /* For caches */
|
||||
hwloc_uint64_t memorysize; /* For caches/memory */
|
||||
hwloc_uint64_t memorysidecachesize; /* Single level of memory-side-cache in-front of a NUMA node */
|
||||
};
|
||||
|
||||
struct hwloc_synthetic_indexes_s {
|
||||
|
@ -380,6 +381,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
} else if (!iscache && !strncmp("memory=", attrs, 7)) {
|
||||
memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs);
|
||||
|
||||
} else if (!strncmp("memorysidecachesize=", attrs, 20)) {
|
||||
sattr->memorysidecachesize = hwloc_synthetic_parse_memory_attr(attrs+20, &attrs);
|
||||
|
||||
} else if (!strncmp("indexes=", attrs, 8)) {
|
||||
index_string = attrs+8;
|
||||
attrs += 8;
|
||||
|
@ -387,10 +391,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
attrs += index_string_length;
|
||||
|
||||
} else {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Unknown attribute at '%s'\n", attrs);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
size_t length = strcspn(attrs, " )");
|
||||
fprintf(stderr, "hwloc/synthetic: Ignoring unknown attribute at '%s'\n", attrs);
|
||||
attrs += length;
|
||||
}
|
||||
|
||||
if (' ' == *attrs)
|
||||
|
@ -416,6 +419,32 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_synthetic_set_default_attrs(struct hwloc_synthetic_attr_s *sattr,
|
||||
int *type_count)
|
||||
{
|
||||
hwloc_obj_type_t type = sattr->type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (sattr->depth == (unsigned)-1)
|
||||
sattr->depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!sattr->memorysize) {
|
||||
if (1 == sattr->depth)
|
||||
/* 32KiB in L1 */
|
||||
sattr->memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MiB for L2, unified */
|
||||
sattr->memorysize = 256ULL*1024 << (2*sattr->depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !sattr->memorysize) {
|
||||
/* 1GiB in memory nodes. */
|
||||
sattr->memorysize = 1024*1024*1024;
|
||||
}
|
||||
}
|
||||
|
||||
/* frees level until arity = 0 */
|
||||
static void
|
||||
hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data)
|
||||
|
@ -465,6 +494,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[0].indexes.string = NULL;
|
||||
data->level[0].indexes.array = NULL;
|
||||
data->level[0].attr.memorysize = 0;
|
||||
data->level[0].attr.memorysidecachesize = 0;
|
||||
data->level[0].attached = NULL;
|
||||
type_count[HWLOC_OBJ_MACHINE] = 1;
|
||||
if (*description == '(') {
|
||||
|
@ -514,6 +544,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
if (attached) {
|
||||
attached->attr.type = type;
|
||||
attached->attr.memorysize = 0;
|
||||
attached->attr.memorysidecachesize = 0;
|
||||
/* attached->attr.depth and .cachetype unused */
|
||||
attached->next = NULL;
|
||||
pprev = &data->level[count-1].attached;
|
||||
|
@ -601,7 +632,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
}
|
||||
if (!item) {
|
||||
if (verbose)
|
||||
fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos);
|
||||
fprintf(stderr,"Synthetic string with disallowed 0 number of objects at '%s'\n", pos);
|
||||
errno = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
@ -611,6 +642,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[count].indexes.string = NULL;
|
||||
data->level[count].indexes.array = NULL;
|
||||
data->level[count].attr.memorysize = 0;
|
||||
data->level[count].attr.memorysidecachesize = 0;
|
||||
if (*next_pos == '(') {
|
||||
err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose);
|
||||
if (err < 0)
|
||||
|
@ -796,6 +828,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[1].indexes.string = NULL;
|
||||
data->level[1].indexes.array = NULL;
|
||||
data->level[1].attr.memorysize = 0;
|
||||
data->level[1].attr.memorysidecachesize = 0;
|
||||
data->level[1].totalwidth = data->level[0].totalwidth;
|
||||
/* update arity to insert a single NUMA node per parent */
|
||||
data->level[1].arity = data->level[0].arity;
|
||||
|
@ -803,30 +836,14 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
count++;
|
||||
}
|
||||
|
||||
/* set default attributes that depend on the depth/hierarchy of levels */
|
||||
for (i=0; i<count; i++) {
|
||||
struct hwloc_synthetic_attached_s *attached;
|
||||
struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
|
||||
hwloc_obj_type_t type = curlevel->attr.type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (curlevel->attr.depth == (unsigned)-1)
|
||||
curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!curlevel->attr.memorysize) {
|
||||
if (1 == curlevel->attr.depth)
|
||||
/* 32KiB in L1 */
|
||||
curlevel->attr.memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MiB for L2, unified */
|
||||
curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) {
|
||||
/* 1GiB in memory nodes. */
|
||||
curlevel->attr.memorysize = 1024*1024*1024;
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose);
|
||||
hwloc_synthetic_set_default_attrs(&curlevel->attr, type_count);
|
||||
for(attached = curlevel->attached; attached != NULL; attached = attached->next)
|
||||
hwloc_synthetic_set_default_attrs(&attached->attr, type_count);
|
||||
hwloc_synthetic_process_indexes(data, &curlevel->indexes, curlevel->totalwidth, verbose);
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose);
|
||||
|
@ -859,6 +876,12 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
|
|||
obj->attr->numanode.page_types[0].size = 4096;
|
||||
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
|
||||
break;
|
||||
case HWLOC_OBJ_MEMCACHE:
|
||||
obj->attr->cache.depth = 1;
|
||||
obj->attr->cache.linesize = 64;
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
|
||||
obj->attr->cache.size = sattr->memorysidecachesize;
|
||||
break;
|
||||
case HWLOC_OBJ_PACKAGE:
|
||||
case HWLOC_OBJ_DIE:
|
||||
break;
|
||||
|
@ -926,6 +949,14 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology,
|
|||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached");
|
||||
|
||||
if (attached->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(child->nodeset);
|
||||
hwloc_synthetic_set_attr(&attached->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:attached:mscache");
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, attached->next, set);
|
||||
}
|
||||
|
||||
|
@ -977,6 +1008,14 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
|||
hwloc_synthetic_set_attr(&curlevel->attr, obj);
|
||||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic");
|
||||
|
||||
if (type == HWLOC_OBJ_NUMANODE && curlevel->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(obj->nodeset);
|
||||
hwloc_synthetic_set_attr(&curlevel->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:mscache");
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set);
|
||||
|
@ -1217,6 +1256,7 @@ hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total,
|
|||
|
||||
static int
|
||||
hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
unsigned long flags,
|
||||
hwloc_obj_t obj,
|
||||
char *buffer, size_t buflen)
|
||||
{
|
||||
|
@ -1224,6 +1264,7 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
const char * prefix = "(";
|
||||
char cachesize[64] = "";
|
||||
char memsize[64] = "";
|
||||
char memorysidecachesize[64] = "";
|
||||
int needindexes = 0;
|
||||
|
||||
if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) {
|
||||
|
@ -1236,6 +1277,19 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
prefix, (unsigned long long) obj->attr->numanode.local_memory);
|
||||
prefix = separator;
|
||||
}
|
||||
if (obj->type == HWLOC_OBJ_NUMANODE && !(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1)) {
|
||||
hwloc_obj_t memorysidecache = obj->parent;
|
||||
hwloc_uint64_t size = 0;
|
||||
while (memorysidecache && memorysidecache->type == HWLOC_OBJ_MEMCACHE) {
|
||||
size += memorysidecache->attr->cache.size;
|
||||
memorysidecache = memorysidecache->parent;
|
||||
}
|
||||
if (size) {
|
||||
snprintf(memorysidecachesize, sizeof(memorysidecachesize), "%smemorysidecachesize=%llu",
|
||||
prefix, (unsigned long long) size);
|
||||
prefix = separator;
|
||||
}
|
||||
}
|
||||
if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */
|
||||
&& (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) {
|
||||
hwloc_obj_t cur = obj;
|
||||
|
@ -1247,12 +1301,12 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
cur = cur->next_cousin;
|
||||
}
|
||||
}
|
||||
if (*cachesize || *memsize || needindexes) {
|
||||
if (*cachesize || *memsize || *memorysidecachesize || needindexes) {
|
||||
ssize_t tmplen = buflen;
|
||||
char *tmp = buffer;
|
||||
int res, ret = 0;
|
||||
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")");
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s%s", cachesize, memsize, memorysidecachesize, needindexes ? "" : ")");
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -1326,7 +1380,7 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
|
|||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
@ -1351,7 +1405,7 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1: export a single NUMA child */
|
||||
if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) {
|
||||
if (parent->memory_arity > 1) {
|
||||
/* not supported */
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n");
|
||||
|
@ -1362,6 +1416,9 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
if (needprefix)
|
||||
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
|
||||
|
||||
/* ignore memcaches and export the NUMA node */
|
||||
while (mchild->type != HWLOC_OBJ_NUMANODE)
|
||||
mchild = mchild->memory_first_child;
|
||||
res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
|
@ -1369,16 +1426,25 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
}
|
||||
|
||||
while (mchild) {
|
||||
/* FIXME: really recurse to export memcaches and numanode,
|
||||
/* The core doesn't support shared memcache for now (because ACPI and Linux don't).
|
||||
* So, for each mchild here, recurse only in the first children at each level.
|
||||
*
|
||||
* FIXME: whenever supported by the core, really recurse to export memcaches and numanode,
|
||||
* but it requires clever parsing of [ memcache [numa] [numa] ] during import,
|
||||
* better attaching of things to describe the hierarchy.
|
||||
*/
|
||||
hwloc_obj_t numanode = mchild;
|
||||
/* only export the first NUMA node leaf of each memory child
|
||||
* FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms
|
||||
/* Only export the first NUMA node leaf of each memory child.
|
||||
* Memcaches are ignored here, they will be summed and exported as a single attribute
|
||||
* of the NUMA node in hwloc__export_synthetic_obj().
|
||||
*/
|
||||
while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
|
||||
assert(numanode->arity == 1);
|
||||
if (verbose && numanode->memory_arity > 1) {
|
||||
static int warned = 0;
|
||||
if (!warned)
|
||||
fprintf(stderr, "Ignoring non-first memory children at non-first level of memory hierarchy.\n");
|
||||
warned = 1;
|
||||
}
|
||||
numanode = numanode->memory_first_child;
|
||||
}
|
||||
assert(numanode); /* there's always a numanode at the bottom of the memory tree */
|
||||
|
@ -1511,17 +1577,21 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
|||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1 requires all NUMA at the same level */
|
||||
hwloc_obj_t node;
|
||||
hwloc_obj_t node, parent;
|
||||
signed pdepth;
|
||||
|
||||
node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
|
||||
assert(node);
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
pdepth = node->parent->depth;
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
pdepth = parent->depth;
|
||||
|
||||
while ((node = node->next_cousin) != NULL) {
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
if (node->parent->depth != pdepth) {
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
if (parent->depth != pdepth) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n");
|
||||
errno = EINVAL;
|
||||
|
@ -1534,7 +1604,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
|||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (res > 0)
|
||||
needprefix = 1;
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
|
|
26
src/3rdparty/hwloc/src/topology-windows.c
vendored
26
src/3rdparty/hwloc/src/topology-windows.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -367,7 +367,7 @@ hwloc_win_get_processor_groups(void)
|
|||
|
||||
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
|
||||
if (HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||
}
|
||||
|
||||
length = 0;
|
||||
|
@ -987,7 +987,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
OSVERSIONINFOEX osvi;
|
||||
char versionstr[20];
|
||||
char hostname[122] = "";
|
||||
unsigned hostname_size = sizeof(hostname);
|
||||
#if !defined(__CYGWIN__)
|
||||
DWORD hostname_size = sizeof(hostname);
|
||||
#else
|
||||
size_t hostname_size = sizeof(hostname);
|
||||
#endif
|
||||
int has_efficiencyclass = 0;
|
||||
struct hwloc_win_efficiency_classes eclasses;
|
||||
char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS");
|
||||
|
@ -1051,12 +1055,16 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
unsigned efficiency_class = 0;
|
||||
GROUP_AFFINITY *GroupMask;
|
||||
|
||||
/* Ignore unknown caches */
|
||||
if (procInfo->Relationship == RelationCache
|
||||
&& procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
continue;
|
||||
if (procInfo->Relationship == RelationCache) {
|
||||
if (!topology->want_some_cpu_caches)
|
||||
/* TODO: check if RelationAll&~RelationCache works? */
|
||||
continue;
|
||||
if (procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
/* Ignore unknown caches */
|
||||
continue;
|
||||
}
|
||||
|
||||
id = HWLOC_UNKNOWN_INDEX;
|
||||
switch (procInfo->Relationship) {
|
||||
|
|
308
src/3rdparty/hwloc/src/topology-x86.c
vendored
308
src/3rdparty/hwloc/src/topology-x86.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2022 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2013 Université Bordeaux
|
||||
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -38,6 +38,12 @@ struct hwloc_x86_backend_data_s {
|
|||
int apicid_unique;
|
||||
char *src_cpuiddump_path;
|
||||
int is_knl;
|
||||
int is_hybrid;
|
||||
int found_die_ids;
|
||||
int found_complex_ids;
|
||||
int found_unit_ids;
|
||||
int found_module_ids;
|
||||
int found_tile_ids;
|
||||
};
|
||||
|
||||
/************************************
|
||||
|
@ -80,7 +86,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
|
||||
cpuiddump = malloc(sizeof(*cpuiddump));
|
||||
if (!cpuiddump) {
|
||||
fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -91,7 +97,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
|
||||
file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
fprintf(stderr, "hwloc/x86: Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
goto out_with_filename;
|
||||
}
|
||||
|
||||
|
@ -100,7 +106,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
nr++;
|
||||
cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
|
||||
if (!cpuiddump->entries) {
|
||||
fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
goto out_with_file;
|
||||
}
|
||||
|
||||
|
@ -156,7 +162,7 @@ cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *e
|
|||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
fprintf(stderr, "hwloc/x86: Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
*eax, *ebx, *ecx, *edx);
|
||||
*eax = 0;
|
||||
*ebx = 0;
|
||||
|
@ -210,7 +216,8 @@ struct procinfo {
|
|||
#define TILE 4
|
||||
#define MODULE 5
|
||||
#define DIE 6
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 7
|
||||
#define COMPLEX 7
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 8
|
||||
unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
|
||||
unsigned *otherids;
|
||||
unsigned levels;
|
||||
|
@ -314,7 +321,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
|||
/* the code below doesn't want any other cache yet */
|
||||
assert(!infos->numcaches);
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
|
@ -325,7 +332,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
|||
|
||||
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
|
||||
if (cache) {
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
|
@ -378,7 +385,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
|||
unsigned cachenum;
|
||||
struct cacheinfo *cache;
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
|
@ -400,7 +407,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
|||
infos->cache = tmpcaches;
|
||||
cache = &infos->cache[oldnumcaches];
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
|
@ -480,7 +487,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
|
|||
}
|
||||
|
||||
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
|
||||
static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
|
||||
static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned apic_id, nodes_per_proc = 0;
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
|
@ -510,6 +517,7 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
|||
unsigned cores_per_unit;
|
||||
/* coreid was obtained from read_amd_cores_legacy() earlier */
|
||||
infos->ids[UNIT] = ebx & 0xff;
|
||||
data->found_unit_ids = 1;
|
||||
cores_per_unit = ((ebx >> 8) & 0xff) + 1;
|
||||
hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
|
||||
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
|
||||
|
@ -524,20 +532,35 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
|||
}
|
||||
}
|
||||
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
|
||||
static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
|
||||
* or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology)
|
||||
*/
|
||||
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
|
||||
unsigned eax, ebx, ecx = 0, edx;
|
||||
int apic_packageshift = 0;
|
||||
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
break;
|
||||
/* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15])
|
||||
* (if so, we also get 0 in eax/ebx for invalid subleaves).
|
||||
* However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]).
|
||||
* Zhaoxin follows the Intel specs but also returns "no thread at this level" for the last *valid* level (at least on KH-4000).
|
||||
* From the Linux kernel code, it's very likely that AMD also returns "invalid domain"
|
||||
* (because detect_extended_topology() uses that for all x86 CPUs)
|
||||
* but keep with the official doc until AMD can clarify that (see #593).
|
||||
*/
|
||||
if (cpuid_type == amd) {
|
||||
if (!(ebx & 0xffff))
|
||||
break;
|
||||
} else {
|
||||
if (!(ecx & 0xff00))
|
||||
break;
|
||||
}
|
||||
apic_packageshift = eax & 0x1f;
|
||||
}
|
||||
|
||||
|
@ -545,47 +568,73 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
|
|||
infos->otherids = malloc(level * sizeof(*infos->otherids));
|
||||
if (infos->otherids) {
|
||||
infos->levels = level;
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
break;
|
||||
if (cpuid_type == amd) {
|
||||
if (!(ebx & 0xffff))
|
||||
break;
|
||||
} else {
|
||||
if (!(ecx & 0xff00))
|
||||
break;
|
||||
}
|
||||
apic_nextshift = eax & 0x1f;
|
||||
apic_number = ebx & 0xffff;
|
||||
apic_type = (ecx & 0xff00) >> 8;
|
||||
apic_id = edx;
|
||||
id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n",
|
||||
apic_id,
|
||||
level,
|
||||
apic_nextshift,
|
||||
ebx & 0xffff /* number of threads in next level */,
|
||||
apic_type,
|
||||
id);
|
||||
infos->apicid = apic_id;
|
||||
infos->otherids[level] = UINT_MAX;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
/* apic_number is the actual number of threads per core */
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 3:
|
||||
infos->ids[MODULE] = id;
|
||||
/* apic_number is the actual number of threads per tile */
|
||||
break;
|
||||
case 4:
|
||||
infos->ids[TILE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 5:
|
||||
infos->ids[DIE] = id;
|
||||
/* apic_number is the actual number of threads per package */
|
||||
break;
|
||||
default:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
break;
|
||||
case 3:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_complex_ids = 1;
|
||||
infos->ids[COMPLEX] = id;
|
||||
} else {
|
||||
data->found_module_ids = 1;
|
||||
infos->ids[MODULE] = id;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
} else {
|
||||
data->found_tile_ids = 1;
|
||||
infos->ids[TILE] = id;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (leaf == 0x80000026) {
|
||||
goto unknown_type;
|
||||
} else {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
/* TODO: "DieGrp" on Intel */
|
||||
/* fallthrough */
|
||||
default:
|
||||
unknown_type:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
}
|
||||
infos->apicid = apic_id;
|
||||
infos->ids[PKG] = apic_id >> apic_shift;
|
||||
|
@ -704,12 +753,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
|
||||
if (highest_cpuid >= 0x1a && has_hybrid(features)) {
|
||||
/* Get hybrid cpu information from cpuid 0x1a */
|
||||
/* Get hybrid cpu information from cpuid 0x1a on Intel */
|
||||
eax = 0x1a;
|
||||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->hybridcoretype = eax >> 24;
|
||||
infos->hybridnativemodel = eax & 0xffffff;
|
||||
data->is_hybrid = 1;
|
||||
}
|
||||
|
||||
/*********************************************************************************
|
||||
|
@ -731,23 +781,30 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
*
|
||||
* Only needed when x2apic supported if NUMA nodes are needed.
|
||||
*/
|
||||
read_amd_cores_topoext(infos, flags, src_cpuiddump);
|
||||
read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
|
||||
}
|
||||
|
||||
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
|
||||
if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
|
||||
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
|
||||
* (AMD Extended CPU Topology)
|
||||
*/
|
||||
read_extended_topo(data, infos, 0x80000026, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x1f) {
|
||||
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
|
||||
* (Intel v2 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x1f, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
|
||||
&& highest_cpuid >= 0x0b && has_x2apic(features)) {
|
||||
/* Get package/core/thread information from cpuid 0x0b
|
||||
* (Intel v1 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x0b, cpuid_type, src_cpuiddump);
|
||||
}
|
||||
|
||||
if (backend->topology->want_some_cpu_caches) {
|
||||
/**************************************
|
||||
* Get caches from CPU-specific leaves
|
||||
*/
|
||||
|
@ -845,6 +902,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
|
||||
data->apicid_unique = 0;
|
||||
|
@ -1046,21 +1104,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||
if (fulldiscovery) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Complex inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
COMPLEX, "Complex",
|
||||
HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
|
||||
}
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
}
|
||||
if (data->found_module_ids) {
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
}
|
||||
if (data->found_tile_ids) {
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
}
|
||||
|
||||
/* Look for unknown objects */
|
||||
if (infos[one].otherids) {
|
||||
|
@ -1094,7 +1165,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
}
|
||||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
if (data->found_die_ids
|
||||
&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
/* Look for Intel Dies inside packages */
|
||||
if (fulldiscovery) {
|
||||
hwloc_bitmap_t die_cpuset;
|
||||
|
@ -1349,40 +1421,45 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
|
|||
if (data->apicid_unique) {
|
||||
summarize(backend, infos, flags);
|
||||
|
||||
if (has_hybrid(features) && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
if (data->is_hybrid
|
||||
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* use hybrid info for cpukinds */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
if (cpuid_type == intel) {
|
||||
/* Hybrid Intel */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hwloc_debug("x86 APIC IDs aren't unique, x86 discovery ignored.\n");
|
||||
/* do nothing and return success, so that the caller does nothing either */
|
||||
}
|
||||
/* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1459,7 +1536,15 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
unsigned i;
|
||||
unsigned highest_cpuid;
|
||||
unsigned highest_ext_cpuid;
|
||||
/* This stores cpuid features with the same indexing as Linux */
|
||||
/* This stores cpuid features with the same indexing as Linux:
|
||||
* [0] = 0x1 edx
|
||||
* [1] = 0x80000001 edx
|
||||
* [4] = 0x1 ecx
|
||||
* [6] = 0x80000001 ecx
|
||||
* [9] = 0x7/0 ebx
|
||||
* [16] = 0x7/0 ecx
|
||||
* [18] = 0x7/0 edx
|
||||
*/
|
||||
unsigned features[19] = { 0 };
|
||||
struct procinfo *infos = NULL;
|
||||
enum cpuid_type cpuid_type = unknown;
|
||||
|
@ -1579,6 +1664,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
features[9] = ebx;
|
||||
features[16] = ecx;
|
||||
features[18] = edx;
|
||||
}
|
||||
|
||||
|
@ -1730,17 +1816,17 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
|||
sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Couldn't open dumped cpuid summary %s\n", path);
|
||||
goto out_with_path;
|
||||
}
|
||||
if (!fgets(line, sizeof(line), file)) {
|
||||
fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Found read dumped cpuid summary in %s\n", path);
|
||||
fclose(file);
|
||||
goto out_with_path;
|
||||
}
|
||||
fclose(file);
|
||||
if (strcmp(line, "Architecture: x86\n")) {
|
||||
fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
goto out_with_path;
|
||||
}
|
||||
free(path);
|
||||
|
@ -1752,19 +1838,19 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
|||
if (!*end)
|
||||
hwloc_bitmap_set(set, idx);
|
||||
else
|
||||
fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
dirent->d_name, src_cpuiddump_path);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (hwloc_bitmap_iszero(set)) {
|
||||
fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
} else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
|
||||
/* The x86 backends enforces contigous set of PUs starting at 0 so far */
|
||||
fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
}
|
||||
|
@ -1816,9 +1902,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
|||
|
||||
/* default values */
|
||||
data->is_knl = 0;
|
||||
data->is_hybrid = 0;
|
||||
data->apicid_set = hwloc_bitmap_alloc();
|
||||
data->apicid_unique = 1;
|
||||
data->src_cpuiddump_path = NULL;
|
||||
data->found_die_ids = 0;
|
||||
data->found_complex_ids = 0;
|
||||
data->found_unit_ids = 0;
|
||||
data->found_module_ids = 0;
|
||||
data->found_tile_ids = 0;
|
||||
|
||||
src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
|
||||
if (src_cpuiddump_path) {
|
||||
|
@ -1829,7 +1921,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
|||
assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */
|
||||
data->nbprocs = hwloc_bitmap_weight(set);
|
||||
} else {
|
||||
fprintf(stderr, "Ignoring dumped cpuid directory.\n");
|
||||
fprintf(stderr, "hwloc/x86: Ignoring dumped cpuid directory.\n");
|
||||
}
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
|
|
|
@ -411,12 +411,12 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
|
|||
bdata->data = nbdata;
|
||||
|
||||
if (xmlbuffer) {
|
||||
nbdata->buffer = malloc(xmlbuflen+1);
|
||||
nbdata->buffer = malloc(xmlbuflen);
|
||||
if (!nbdata->buffer)
|
||||
goto out_with_nbdata;
|
||||
nbdata->buflen = xmlbuflen+1;
|
||||
nbdata->buflen = xmlbuflen;
|
||||
memcpy(nbdata->buffer, xmlbuffer, xmlbuflen);
|
||||
nbdata->buffer[xmlbuflen] = '\0';
|
||||
nbdata->buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen);
|
||||
|
@ -453,8 +453,9 @@ hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state,
|
|||
buffer = malloc(xmlbuflen);
|
||||
if (!buffer)
|
||||
goto out;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buflen = xmlbuflen;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen);
|
||||
|
|
77
src/3rdparty/hwloc/src/topology-xml.c
vendored
77
src/3rdparty/hwloc/src/topology-xml.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -562,7 +562,13 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, s
|
|||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "size"))
|
||||
if (!strcmp(attrname, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
int ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
/* ignored */
|
||||
} else if (!strcmp(attrname, "size"))
|
||||
size = strtoull(attrvalue, NULL, 10);
|
||||
else if (!strcmp(attrname, "count"))
|
||||
count = strtoull(attrvalue, NULL, 10);
|
||||
|
@ -1160,6 +1166,48 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||
data->last_numanode = obj;
|
||||
}
|
||||
|
||||
/* 3.0 forward compatibility */
|
||||
if (data->version_major >= 3 && obj->type == HWLOC_OBJ_OS_DEVICE) {
|
||||
/* osdev.type changed into bitmak in 3.0 */
|
||||
if (obj->attr->osdev.type & 3 /* STORAGE|MEMORY for BLOCK */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_BLOCK;
|
||||
} else if (obj->attr->osdev.type & 8 /* COPROC for COPROC and rsmi/nvml GPUs */) {
|
||||
if (obj->subtype && (!strcmp(obj->subtype, "RSMI") || !strcmp(obj->subtype, "NVML")))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
|
||||
} else if (obj->attr->osdev.type & 4 /* GPU for non-COPROC GPUs */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
} else if (obj->attr->osdev.type & 32 /* OFED */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
} else if (obj->attr->osdev.type & 16 /* NET for NET and BXI v2-fake-OFED */) {
|
||||
if (obj->subtype && !strcmp(obj->subtype, "BXI"))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_NETWORK;
|
||||
} else if (obj->attr->osdev.type & 64 /* DMA */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_DMA;
|
||||
} else { /* none or unknown */
|
||||
obj->attr->osdev.type = (hwloc_obj_osdev_type_t) -1;
|
||||
}
|
||||
/* Backend info only in root */
|
||||
if (obj->subtype && !hwloc_obj_get_info_by_name(obj, "Backend")) {
|
||||
if (!strcmp(obj->subtype, "CUDA")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "CUDA");
|
||||
} else if (!strcmp(obj->subtype, "NVML")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "NVML");
|
||||
} else if (!strcmp(obj->subtype, "OpenCL")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "OpenCL");
|
||||
} else if (!strcmp(obj->subtype, "RSMI")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "RSMI");
|
||||
} else if (!strcmp(obj->subtype, "LevelZero")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "LevelZero");
|
||||
} else if (!strcmp(obj->subtype, "Display")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "GL");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hwloc_filter_check_keep_object(topology, obj)) {
|
||||
/* Ignore this object instead of inserting it.
|
||||
*
|
||||
|
@ -1433,7 +1481,14 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
if (!strcmp(tag, "indexes"))
|
||||
if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
goto out_with_arrays;
|
||||
/* ignored */
|
||||
continue;
|
||||
} else if (!strcmp(tag, "indexes"))
|
||||
is_index = 1;
|
||||
else if (!strcmp(tag, "u64values"))
|
||||
is_u64values = 1;
|
||||
|
@ -1766,6 +1821,10 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
|
|||
|
||||
if (!strcmp(tag, "memattr_value")) {
|
||||
ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate);
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
/* ignored */
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: memattr with unrecognized child %s\n",
|
||||
|
@ -2094,9 +2153,10 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
|||
if (ret < 0)
|
||||
goto failed;
|
||||
|
||||
if (data->version_major > 2) {
|
||||
if (data->version_major > 3
|
||||
|| (data->version_major == 3 && data->version_minor > 0)) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n",
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 3.0\n",
|
||||
data->msgprefix, data->version_major, data->version_minor);
|
||||
goto err;
|
||||
}
|
||||
|
@ -2144,6 +2204,13 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
|||
ret = hwloc__xml_import_cpukind(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
/* move 3.x topology info back to the root object */
|
||||
hwloc_obj_add_info(topology->levels[0][0], infoname, infovalue);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n",
|
||||
|
|
207
src/3rdparty/hwloc/src/topology.c
vendored
207
src/3rdparty/hwloc/src/topology.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2022 IBM Corporation. All rights reserved.
|
||||
|
@ -146,21 +146,24 @@ report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
|
|||
char typestr[64];
|
||||
char *cpusetstr;
|
||||
char *nodesetstr = NULL;
|
||||
char indexstr[64] = "";
|
||||
char groupstr[64] = "";
|
||||
|
||||
hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
|
||||
hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(indexstr, sizeof(indexstr), "P#%u ", obj->os_index);
|
||||
if (obj->type == HWLOC_OBJ_GROUP)
|
||||
snprintf(groupstr, sizeof(groupstr), "groupkind %u-%u ", obj->attr->group.kind, obj->attr->group.subkind);
|
||||
if (obj->nodeset) /* may be missing during insert */
|
||||
hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
|
||||
typestr, obj->os_index, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
else
|
||||
snprintf(buf, buflen, "%s (cpuset %s%s%s)",
|
||||
typestr, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
snprintf(buf, buflen, "%s (%s%s%s%s%scpuset %s%s%s)",
|
||||
typestr,
|
||||
indexstr,
|
||||
obj->subtype ? "subtype " : "", obj->subtype ? obj->subtype : "", obj->subtype ? " " : "",
|
||||
groupstr,
|
||||
cpusetstr,
|
||||
nodesetstr ? " nodeset " : "", nodesetstr ? nodesetstr : "");
|
||||
free(cpusetstr);
|
||||
free(nodesetstr);
|
||||
}
|
||||
|
@ -178,8 +181,9 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
|
|||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Failed with: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr);
|
||||
fprintf(stderr, "* Failed with error: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s\n", newstr);
|
||||
fprintf(stderr, "* at %s\n", oldstr);
|
||||
fprintf(stderr, "* coming from: %s\n", reason);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n");
|
||||
|
@ -679,7 +683,8 @@ unlink_and_free_object_and_children(hwloc_obj_t *pobj)
|
|||
void
|
||||
hwloc_free_object_and_children(hwloc_obj_t obj)
|
||||
{
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
if (obj)
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
}
|
||||
|
||||
/* Free an object, its next siblings and their children without unlinking from parent.
|
||||
|
@ -1925,6 +1930,22 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
|
|||
return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX);
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_free_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
if (!topology->is_loaded) {
|
||||
/* this could actually work, see insert() below */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
errno = EPERM;
|
||||
return -1;
|
||||
}
|
||||
hwloc_free_unlinked_object(obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
|
||||
static void propagate_total_memory(hwloc_obj_t obj);
|
||||
static void hwloc_set_group_depth(hwloc_topology_t topology);
|
||||
|
@ -1935,7 +1956,7 @@ static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
|||
hwloc_obj_t
|
||||
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
hwloc_obj_t res, root;
|
||||
hwloc_obj_t res, root, child;
|
||||
int cmp;
|
||||
|
||||
if (!topology->is_loaded) {
|
||||
|
@ -1945,6 +1966,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
return NULL;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
hwloc_free_unlinked_object(obj);
|
||||
errno = EPERM;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1998,6 +2020,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
res = hwloc__insert_object_by_cpuset(topology, NULL, obj, NULL /* do not show errors on stdout */);
|
||||
} else {
|
||||
/* just merge root */
|
||||
hwloc_free_unlinked_object(obj);
|
||||
res = root;
|
||||
}
|
||||
|
||||
|
@ -2024,6 +2047,13 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
||||
return NULL;
|
||||
|
||||
/* Compute group total_memory. */
|
||||
res->total_memory = 0;
|
||||
for_each_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
for_each_memory_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
hwloc_set_group_depth(topology);
|
||||
|
||||
|
@ -2254,11 +2284,13 @@ fixup_sets(hwloc_obj_t obj)
|
|||
int
|
||||
hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src)
|
||||
{
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set); \
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
if (!(_dst)->_set \
|
||||
|| hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set) < 0) \
|
||||
return -1; \
|
||||
}
|
||||
ADD_OTHER_OBJ_SET(dst, src, cpuset);
|
||||
ADD_OTHER_OBJ_SET(dst, src, complete_cpuset);
|
||||
|
@ -3730,6 +3762,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
|
|||
|
||||
hwloc__topology_filter_init(topology);
|
||||
|
||||
/* always initialize since we don't know flags to disable those yet */
|
||||
hwloc_internal_distances_init(topology);
|
||||
hwloc_internal_memattrs_init(topology);
|
||||
hwloc_internal_cpukinds_init(topology);
|
||||
|
@ -3942,8 +3975,12 @@ int
|
|||
hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3951,17 +3988,25 @@ int
|
|||
hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3982,9 +4027,12 @@ hwloc_topology_clear (struct hwloc_topology *topology)
|
|||
{
|
||||
/* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */
|
||||
unsigned l;
|
||||
|
||||
/* always destroy cpukinds/distances/memattrs since there are always initialized during init() */
|
||||
hwloc_internal_cpukinds_destroy(topology);
|
||||
hwloc_internal_distances_destroy(topology);
|
||||
hwloc_internal_memattrs_destroy(topology);
|
||||
|
||||
hwloc_free_object_and_children(topology->levels[0][0]);
|
||||
hwloc_bitmap_free(topology->allowed_cpuset);
|
||||
hwloc_bitmap_free(topology->allowed_nodeset);
|
||||
|
@ -4024,6 +4072,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
{
|
||||
struct hwloc_disc_status dstatus;
|
||||
const char *env;
|
||||
unsigned i;
|
||||
int err;
|
||||
|
||||
if (topology->is_loaded) {
|
||||
|
@ -4032,8 +4081,18 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
}
|
||||
|
||||
/* initialize envvar-related things */
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
|
||||
/* check if any cpu cache filter is not NONE */
|
||||
topology->want_some_cpu_caches = 0;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
if (topology->type_filter[i] != HWLOC_TYPE_FILTER_KEEP_NONE) {
|
||||
topology->want_some_cpu_caches = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (getenv("HWLOC_XML_USERDATA_NOT_DECODED"))
|
||||
topology->userdata_not_decoded = 1;
|
||||
|
@ -4110,23 +4169,32 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
#endif
|
||||
hwloc_topology_check(topology);
|
||||
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
}
|
||||
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) {
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
}
|
||||
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
hwloc_internal_memattrs_guess_memory_tiers(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) {
|
||||
int force_memtiers = (getenv("HWLOC_MEMTIERS_REFRESH") != NULL);
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
/* update memtiers unless XML */
|
||||
if (force_memtiers || strcmp(topology->backends->component->name, "xml"))
|
||||
hwloc_internal_memattrs_guess_memory_tiers(topology, force_memtiers);
|
||||
}
|
||||
|
||||
topology->is_loaded = 1;
|
||||
|
||||
|
@ -4185,20 +4253,11 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_
|
|||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_cpuset)) {
|
||||
/* we're empty, there's a NUMAnode below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* nodeset cannot intersect unless cpuset intersects or is empty */
|
||||
if (droppednodeset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_cpuset));
|
||||
}
|
||||
if (droppednodeset) {
|
||||
if (droppednodeset && hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) {
|
||||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
|
@ -4251,20 +4310,11 @@ restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc
|
|||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_nodeset)) {
|
||||
/* we're empty, there's a PU below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* cpuset cannot intersect unless nodeset intersects or is empty */
|
||||
if (droppedcpuset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_nodeset));
|
||||
}
|
||||
if (droppedcpuset) {
|
||||
if (droppedcpuset && hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)) {
|
||||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
|
@ -4433,13 +4483,18 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
|||
if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
|
||||
goto out;
|
||||
|
||||
/* some objects may have disappeared, we need to update distances objs arrays */
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
/* some objects may have disappeared and sets were modified,
|
||||
* we need to update distances, etc */
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
propagate_total_memory(topology->levels[0][0]);
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
#ifndef HWLOC_DEBUG
|
||||
if (getenv("HWLOC_DEBUG_CHECK"))
|
||||
|
@ -4527,9 +4582,12 @@ hwloc_topology_allow(struct hwloc_topology *topology,
|
|||
int
|
||||
hwloc_topology_refresh(struct hwloc_topology *topology)
|
||||
{
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5081,6 +5139,9 @@ hwloc_topology_check(struct hwloc_topology *topology)
|
|||
for(i=HWLOC_OBJ_TYPE_MIN; i<HWLOC_OBJ_TYPE_MAX; i++)
|
||||
assert(obj_type_order[obj_order_type[i]] == i);
|
||||
|
||||
if (!topology->is_loaded)
|
||||
return;
|
||||
|
||||
depth = hwloc_topology_get_depth(topology);
|
||||
|
||||
assert(!topology->modified);
|
||||
|
|
2
src/3rdparty/libethash/CMakeLists.txt
vendored
2
src/3rdparty/libethash/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project (ethash C)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(GhostRider)
|
||||
|
||||
set(HEADERS
|
||||
|
|
Loading…
Reference in a new issue