Merge branch 'dev'

This commit is contained in:
XMRig 2019-12-11 14:06:21 +07:00
commit a0046e325c
No known key found for this signature in database
GPG key ID: 446A53638BE94409
122 changed files with 4694 additions and 2073 deletions

View file

@ -1,3 +1,15 @@
# v5.2.0
- **[#1388](https://github.com/xmrig/xmrig/pull/1388) Added [1GB huge pages support](https://xmrig.com/docs/miner/hugepages#onegb-huge-pages) for Linux.**
- Added new option `1gb-pages` in `randomx` object with command line equivalent `--randomx-1gb-pages`.
- Added automatic huge pages configuration on Linux if use the miner with root privileges.
- **Added [automatic Intel prefetchers configuration](https://xmrig.com/docs/miner/randomx-optimization-guide#intel-specific-optimizations) on Linux.**
- Added new option `wrmsr` in `randomx` object with command line equivalent `--randomx-wrmsr`.
- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs.
- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads.
- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.
- For official builds all dependencies (libuv, hwloc, openssl) updated to recent versions.
- Windows `msvc` builds now use Visual Studio 2019 instead of 2017.
# v5.1.1 # v5.1.1
- [#1365](https://github.com/xmrig/xmrig/issues/1365) Fixed various system response/stability issues. - [#1365](https://github.com/xmrig/xmrig/issues/1365) Fixed various system response/stability issues.
- Added new CPU option `yield` and command line equivalent `--cpu-no-yield`. - Added new CPU option `yield` and command line equivalent `--cpu-no-yield`.

View file

@ -30,6 +30,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
include (CheckIncludeFile) include (CheckIncludeFile)
include (cmake/cpu.cmake) include (cmake/cpu.cmake)
include (cmake/os.cmake)
include (src/base/base.cmake) include (src/base/base.cmake)
include (src/backend/backend.cmake) include (src/backend/backend.cmake)
@ -75,6 +76,7 @@ set(HEADERS_CRYPTO
src/crypto/cn/soft_aes.h src/crypto/cn/soft_aes.h
src/crypto/common/Algorithm.h src/crypto/common/Algorithm.h
src/crypto/common/Coin.h src/crypto/common/Coin.h
src/crypto/common/HugePagesInfo.h
src/crypto/common/keccak.h src/crypto/common/keccak.h
src/crypto/common/MemoryPool.h src/crypto/common/MemoryPool.h
src/crypto/common/Nonce.h src/crypto/common/Nonce.h
@ -114,6 +116,7 @@ set(SOURCES_CRYPTO
src/crypto/cn/CnHash.cpp src/crypto/cn/CnHash.cpp
src/crypto/common/Algorithm.cpp src/crypto/common/Algorithm.cpp
src/crypto/common/Coin.cpp src/crypto/common/Coin.cpp
src/crypto/common/HugePagesInfo.cpp
src/crypto/common/keccak.cpp src/crypto/common/keccak.cpp
src/crypto/common/MemoryPool.cpp src/crypto/common/MemoryPool.cpp
src/crypto/common/Nonce.cpp src/crypto/common/Nonce.cpp
@ -131,40 +134,36 @@ if (WITH_HWLOC)
) )
endif() endif()
if (WIN32) if (XMRIG_OS_WIN)
set(SOURCES_OS list(APPEND SOURCES_OS
"${SOURCES_OS}"
res/app.rc res/app.rc
src/App_win.cpp src/App_win.cpp
src/crypto/common/VirtualMemory_win.cpp src/crypto/common/VirtualMemory_win.cpp
) )
add_definitions(/DWIN32)
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
elseif (APPLE) elseif (XMRIG_OS_APPLE)
set(SOURCES_OS list(APPEND SOURCES_OS
"${SOURCES_OS}"
src/App_unix.cpp src/App_unix.cpp
src/crypto/common/VirtualMemory_unix.cpp src/crypto/common/VirtualMemory_unix.cpp
) )
else() else()
set(SOURCES_OS list(APPEND SOURCES_OS
"${SOURCES_OS}"
src/App_unix.cpp src/App_unix.cpp
src/crypto/common/VirtualMemory_unix.cpp src/crypto/common/VirtualMemory_unix.cpp
) )
if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD) if (XMRIG_OS_ANDROID)
set(EXTRA_LIBS kvm pthread) set(EXTRA_LIBS pthread rt dl log)
else() elseif (XMRIG_OS_LINUX)
set(EXTRA_LIBS pthread rt dl) list(APPEND SOURCES_OS
endif() src/crypto/common/LinuxMemory.h
endif() src/crypto/common/LinuxMemory.cpp
)
if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android") set(EXTRA_LIBS pthread rt dl)
EXECUTE_PROCESS(COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM) elseif (XMRIG_OS_FREEBSD)
if (OPERATING_SYSTEM MATCHES "Android") set(EXTRA_LIBS kvm pthread)
set(EXTRA_LIBS ${EXTRA_LIBS} log)
endif() endif()
endif() endif()

View file

@ -16,7 +16,7 @@ XMRig High performance, open source, cross platform RandomX, CryptoNight and Arg
- **OpenCL** for AMD GPUs. - **OpenCL** for AMD GPUs.
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda). - **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
<img src="doc/screenshot.png" width="808" > <img src="doc/screenshot_v5_2_0.png" width="833" >
## Download ## Download
* Binary releases: https://github.com/xmrig/xmrig/releases * Binary releases: https://github.com/xmrig/xmrig/releases
@ -65,6 +65,8 @@ CPU backend:
--randomx-init=N threads count to initialize RandomX dataset --randomx-init=N threads count to initialize RandomX dataset
--randomx-no-numa disable NUMA support for RandomX --randomx-no-numa disable NUMA support for RandomX
--randomx-mode=MODE RandomX mode: auto, fast, light --randomx-mode=MODE RandomX mode: auto, fast, light
--randomx-1gb-pages use 1GB hugepages for dataset (Linux only)
--randomx-wrmsr=N write value (0-15) to Intel MSR register 0x1a4 or do nothing (-1) (Linux only)
API: API:
--api-worker-id=ID custom worker-id for API --api-worker-id=ID custom worker-id for API

View file

@ -57,9 +57,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE) add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_SECURE_NO_WARNINGS)
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS) add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
add_definitions(/DNOMINMAX) add_definitions(/DNOMINMAX)

45
cmake/os.cmake Normal file
View file

@ -0,0 +1,45 @@
if (WIN32)
set(XMRIG_OS_WIN ON)
elseif (APPLE)
set(XMRIG_OS_APPLE ON)
if (IOS OR CMAKE_SYSTEM_NAME STREQUAL iOS)
set(XMRIG_OS_IOS ON)
else()
set(XMRIG_OS_MACOS ON)
endif()
else()
set(XMRIG_OS_UNIX ON)
if (ANDROID OR CMAKE_SYSTEM_NAME MATCHES "Android")
set(XMRIG_OS_ANDROID ON)
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(XMRIG_OS_LINUX ON)
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
set(XMRIG_OS_FREEBSD ON)
endif()
endif()
if (XMRIG_OS_WIN)
add_definitions(/DWIN32)
add_definitions(/DXMRIG_OS_WIN)
elseif(XMRIG_OS_APPLE)
add_definitions(/DXMRIG_OS_APPLE)
if (XMRIG_OS_IOS)
add_definitions(/DXMRIG_OS_IOS)
else()
add_definitions(/DXMRIG_OS_MACOS)
endif()
elseif(XMRIG_OS_UNIX)
add_definitions(/DXMRIG_OS_UNIX)
if (XMRIG_OS_ANDROID)
add_definitions(/DXMRIG_OS_ANDROID)
elseif (XMRIG_OS_LINUX)
add_definitions(/DXMRIG_OS_LINUX)
elseif (XMRIG_OS_FREEBSD)
add_definitions(/DXMRIG_OS_FREEBSD)
endif()
endif()

View file

@ -75,13 +75,12 @@ if (WITH_RANDOMX)
) )
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO
src/crypto/rx/RxConfig_hwloc.cpp
src/crypto/rx/RxNUMAStorage.cpp src/crypto/rx/RxNUMAStorage.cpp
) )
else() endif()
list(APPEND SOURCES_CRYPTO
src/crypto/rx/RxConfig_basic.cpp if (XMRIG_OS_LINUX)
) list(APPEND SOURCES_CRYPTO src/crypto/rx/Rx_linux.cpp)
endif() endif()
else() else()
remove_definitions(/DXMRIG_ALGO_RANDOMX) remove_definitions(/DXMRIG_ALGO_RANDOMX)

BIN
doc/screenshot_v5_2_0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View file

@ -1,6 +1,6 @@
#!/bin/bash -e #!/bin/bash -e
UV_VERSION="1.33.1" UV_VERSION="1.34.0"
OPENSSL_VERSION="1.1.1d" OPENSSL_VERSION="1.1.1d"
HWLOC_VERSION="2.1.0" HWLOC_VERSION="2.1.0"

View file

@ -21,6 +21,7 @@ Nathalie Furmento CNRS
Bryon Gloden Bryon Gloden
Brice Goglin Inria Brice Goglin Inria
Gilles Gouaillardet RIST Gilles Gouaillardet RIST
Valentin Hoyet Inria
Joshua Hursey UWL Joshua Hursey UWL
Alexey Kardashevskiy IBM Alexey Kardashevskiy IBM
Rob Latham ANL Rob Latham ANL

View file

@ -5,7 +5,7 @@ include_directories(include)
include_directories(src) include_directories(src)
add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_SECURE_NO_WARNINGS)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT") set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Ob2 /DNDEBUG")
set(HEADERS set(HEADERS
include/hwloc.h include/hwloc.h

View file

@ -13,8 +13,96 @@ $HEADER$
This file contains the main features as well as overviews of specific This file contains the main features as well as overviews of specific
bug fixes (and other actions) for each version of hwloc since version bug fixes (and other actions) for each version of hwloc since version
0.9 (as initially released as "libtopology", then re-branded to "hwloc" 0.9.
in v0.9.1).
Version 2.1.0
-------------
* API
+ Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors
with multiple dies per package, in the x86 and Linux backends.
+ Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches.
- They are filtered-out by default, except in command-line tools.
- They are only available on very recent platforms running Linux 5.2+
and uptodate ACPI tables.
- The KNL MCDRAM in cache mode is still exposed as a L3 unless
HWLOC_KNL_MSCACHE_L3=0 in the environment.
+ Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting
topologies based on some memory nodes.
+ Add hwloc_topology_set_components() for blacklisting some components
from being enabled in a topology.
+ Add hwloc_bitmap_nr_ulongs() and hwloc_bitmap_from/to_ulongs(),
thanks to Junchao Zhang for the suggestion.
+ Improve the API for dealing with disallowed resources
- HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED
and --whole-system command-line options with --disallowed.
. Former names are still accepted for backward compatibility.
- Add hwloc_topology_allow() for changing allowed sets after load().
- Add the HWLOC_ALLOW=all environment variable to totally ignore
administrative restrictions such as Linux Cgroups.
- Add disallowed_pu and disallowed_numa bits to the discovery support
structure.
+ Group objects have a new "dont_merge" attribute to prevent them from
being automatically merged with identical parent or children.
+ Add more distances-related features:
- Add hwloc_distances_get_name() to retrieve a string describing
what a distances structure contain.
- Add hwloc_distances_get_by_name() to retrieve distances structures
based on their name.
- Add hwloc_distances_release_remove()
- Distances may now cover objects of different types with new kind
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES.
* Backends
+ Add support for Linux 5.3 new sysfs cpu topology files with Die information.
+ Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
+ Improve memory locality on Linux by using HMAT initiators (exposed
since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes.
+ The x86 backend now properly handles offline CPUs.
+ Detect the locality of NVIDIA GPU OpenCL devices.
+ Ignore NUMA nodes that correspond to NVIDIA GPU by default.
- They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment.
- Fix their CPU locality and add info attributes to identify them.
Thanks to Max Katz and Edgar Leon for the help.
+ Add support for IBM S/390 drawers.
+ Rework the heuristics for discovering KNL Cluster and Memory modes
to stop assuming all CPUs are online (required for mOS support).
Thanks to Sharath K Bhat for testing patches.
+ Ignore NUMA node information from AMD topoext in the x86 backend,
unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
+ Expose Linux DAX devices as hwloc Block OS devices.
+ Remove support for /proc/cpuinfo-only topology discovery in Linux
kernel prior to 2.6.16.
+ Disable POWER device-tree-based topology on Linux by default.
- It may be reenabled by setting HWLOC_USE_DT=1 in the environment.
+ Discovery components are now divided in phases that may be individually
blacklisted.
- The linuxio component has been merged back into the linux component.
* Tools
+ lstopo
- lstopo factorizes objects by default in the graphical output when
there are more than 4 identical children.
. New options --no-factorize and --factorize may be used to configure this.
. Hit the 'f' key to disable factorizing in interactive outputs.
- Both logical and OS/physical indexes are now displayed by default
for PU and NUMA nodes.
- The X11 and Windows interactive outputs support many keyboard
shortcuts to dynamically customize the attributes, legend, etc.
- Add --linespacing and change default margins and linespacing.
- Add --allow for changing allowed sets.
- Add a native SVG backend. Its graphical output may be slightly less
pretty than Cairo (still used by default if available) but the SVG
code provides attributes to manipulate objects from HTML/JS.
See dynamic_SVG_example.html for an example.
+ Add --nodeset options to hwloc-calc for converting between cpusets and
nodesets.
+ Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple
PU in SMT cores.
+ hwloc-annotate may annotate multiple locations at once.
+ Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README.
+ Add bash completions.
* Misc
+ Add several FAQ entries in "Compatibility between hwloc versions"
about API version, ABI, XML, Synthetic strings, and shmem topologies.
Version 2.0.4 (also included in 1.11.13 when appropriate) Version 2.0.4 (also included in 1.11.13 when appropriate)
@ -214,6 +302,54 @@ Version 2.0.0
+ hwloc now requires a C99 compliant compiler. + hwloc now requires a C99 compliant compiler.
Version 1.11.13 (also included in 2.0.4)
---------------
* Add support for Linux 5.3 new sysfs cpu topology files with Die information.
* Add support for Intel v2 Extended Topology Enumeration in the x86 backend.
* Tiles, Modules and Dies are exposed as Groups for now.
+ HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent
Die groups from being automatically merged with identical parent or children.
* Ignore NUMA node information from AMD topoext in the x86 backend,
unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment.
* Group objects have a new "dont_merge" attribute to prevent them from
being automatically merged with identical parent or children.
Version 1.11.12 (also included in 2.0.3)
---------------
* Fix a corner case of hwloc_topology_restrict() where children would
become out-of-order.
* Fix the return length of export_xmlbuffer() functions to always
include the ending \0.
Version 1.11.11 (also included in 2.0.2)
---------------
* Add support for Hygon Dhyana processors in the x86 backend,
thanks to Pu Wen for the patch.
* Fix symbol renaming to also rename internal components,
thanks to Evan Ramos for the patch.
* Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues.
* Detect PCI link speed without being root on Linux >= 4.13.
Version 1.11.10 (also included in 2.0.1)
---------------
* Fix detection of cores and hyperthreads on Mac OS X.
* Serialize pciaccess discovery to fix concurrent topology loads in
multiple threads.
* Fix first touch area memory binding on Linux when thread memory
binding is different.
* Some minor fixes to memory binding.
* Fix hwloc-dump-hwdata to only process SMBIOS information that correspond
to the KNL and KNM configuration.
* Add a heuristic for guessing KNL/KNM memory and cluster modes when
hwloc-dump-hwdata could not run as root earlier.
* Fix discovery of NVMe OS devices on Linux >= 4.0.
* Add get_area_memlocation() on Windows.
* Add CPUVendor, Model, ... attributes on Mac OS X.
Version 1.11.9 Version 1.11.9
-------------- --------------
* Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend, * Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend,
@ -941,7 +1077,7 @@ Version 1.6.0
+ Add a section about Synthetic topologies in the documentation. + Add a section about Synthetic topologies in the documentation.
Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) Version 1.5.2 (some of these changes are in 1.6.2 but not in 1.6)
------------- -------------
* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. * Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
pciutils/libpci is only used if --enable-libpci is given to configure pciutils/libpci is only used if --enable-libpci is given to configure
@ -1076,9 +1212,8 @@ Version 1.4.2
for most of them. for most of them.
Version 1.4.1 Version 1.4.1 (contains all 1.3.2 changes)
------------- -------------
* This release contains all changes from v1.3.2.
* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. * Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue.
* Fix memory leaks in some get_membind() functions. * Fix memory leaks in some get_membind() functions.
* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) * Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h)
@ -1091,7 +1226,7 @@ Version 1.4.1
issues. issues.
Version 1.4.0 (does not contain all v1.3.2 changes) Version 1.4.0 (does not contain all 1.3.2 changes)
------------- -------------
* Major features * Major features
+ Add "custom" interface and "assembler" tools to build multi-node + Add "custom" interface and "assembler" tools to build multi-node
@ -1536,7 +1671,7 @@ Version 1.0.0
Version 0.9.4 (unreleased) Version 0.9.4 (unreleased)
-------------------------- -------------
* Fix reseting colors to normal in lstopo -.txt output. * Fix reseting colors to normal in lstopo -.txt output.
* Fix Linux pthread_t binding error report. * Fix Linux pthread_t binding error report.
@ -1593,7 +1728,7 @@ Version 0.9.1
the physical location of IB devices. the physical location of IB devices.
Version 0.9 (libtopology) Version 0.9 (formerly named "libtopology")
------------------------- -----------
* First release. * First release.

View file

@ -8,8 +8,8 @@
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
major=2 major=2
minor=0 minor=1
release=4 release=0
# greek is used for alpha or beta release tags. If it is non-empty, # greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be # it will be appended to the version number. It does not have to be
@ -22,7 +22,7 @@ greek=
# The date when this release was created # The date when this release was created
date="Jun 03, 2019" date="Sep 30, 2019"
# If snapshot=1, then use the value from snapshot_version as the # If snapshot=1, then use the value from snapshot_version as the
# entire hwloc version (i.e., ignore major, minor, release, and # entire hwloc version (i.e., ignore major, minor, release, and
@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
# 2. Version numbers are described in the Libtool current:revision:age # 2. Version numbers are described in the Libtool current:revision:age
# format. # format.
libhwloc_so_version=15:3:0 libhwloc_so_version=16:0:1
libnetloc_so_version=0:0:0 libnetloc_so_version=0:0:0
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj # Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj

View file

@ -53,7 +53,8 @@
#ifndef HWLOC_H #ifndef HWLOC_H
#define HWLOC_H #define HWLOC_H
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <sys/types.h> #include <sys/types.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@ -62,13 +63,13 @@
/* /*
* Symbol transforms * Symbol transforms
*/ */
#include <hwloc/rename.h> #include "hwloc/rename.h"
/* /*
* Bitmap definitions * Bitmap definitions
*/ */
#include <hwloc/bitmap.h> #include "hwloc/bitmap.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -86,13 +87,13 @@ extern "C" {
* actually modifies the API. * actually modifies the API.
* *
* Users may check for available features at build time using this number * Users may check for available features at build time using this number
* (see \ref faq_upgrade). * (see \ref faq_version_api).
* *
* \note This should not be confused with HWLOC_VERSION, the library version. * \note This should not be confused with HWLOC_VERSION, the library version.
* Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
* even if their HWLOC_VERSION are different. * even if their HWLOC_VERSION are different.
*/ */
#define HWLOC_API_VERSION 0x00020000 #define HWLOC_API_VERSION 0x00020100
/** \brief Indicate at runtime which hwloc API version was used at build time. /** \brief Indicate at runtime which hwloc API version was used at build time.
* *
@ -101,7 +102,7 @@ extern "C" {
HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);
/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ /** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
#define HWLOC_COMPONENT_ABI 5 #define HWLOC_COMPONENT_ABI 6
/** @} */ /** @} */
@ -186,7 +187,8 @@ typedef enum {
HWLOC_OBJ_PACKAGE, /**< \brief Physical package. HWLOC_OBJ_PACKAGE, /**< \brief Physical package.
* The physical package that usually gets inserted * The physical package that usually gets inserted
* into a socket on the motherboard. * into a socket on the motherboard.
* A processor package usually contains multiple cores. * A processor package usually contains multiple cores,
* and possibly some dies.
*/ */
HWLOC_OBJ_CORE, /**< \brief Core. HWLOC_OBJ_CORE, /**< \brief Core.
* A computation unit (may be shared by several * A computation unit (may be shared by several
@ -233,6 +235,10 @@ typedef enum {
* It is usually close to some cores (the corresponding objects * It is usually close to some cores (the corresponding objects
* are descendants of the NUMA node object in the hwloc tree). * are descendants of the NUMA node object in the hwloc tree).
* *
* This is the smallest object representing Memory resources,
* it cannot have any child except Misc objects.
* However it may have Memory-side cache parents.
*
* There is always at least one such object in the topology * There is always at least one such object in the topology
* even if the machine is not NUMA. * even if the machine is not NUMA.
* *
@ -279,6 +285,24 @@ typedef enum {
* Misc objects have NULL CPU and node sets. * Misc objects have NULL CPU and node sets.
*/ */
HWLOC_OBJ_MEMCACHE, /**< \brief Memory-side cache (filtered out by default).
* A cache in front of a specific NUMA node.
*
* This object always has at least one NUMA node as a memory child.
*
* Memory objects are not listed in the main children list,
* but rather in the dedicated Memory children list.
*
* Memory-side cache have a special depth ::HWLOC_TYPE_DEPTH_MEMCACHE
* instead of a normal depth just like other objects in the
* main tree.
*/
HWLOC_OBJ_DIE, /**< \brief Die within a physical package.
* A subpart of the physical package, that contains multiple cores.
* \hideinitializer
*/
HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
} hwloc_obj_type_t; } hwloc_obj_type_t;
@ -297,8 +321,8 @@ typedef enum hwloc_obj_bridge_type_e {
/** \brief Type of a OS device. */ /** \brief Type of a OS device. */
typedef enum hwloc_obj_osdev_type_e { typedef enum hwloc_obj_osdev_type_e {
HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device, or non-volatile memory device.
* For instance "sda" on Linux. */ * For instance "sda" or "dax2.0" on Linux. */
HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device.
* For instance ":0.0" for a GL display, * For instance ":0.0" for a GL display,
* "card0" for a Linux DRM device. */ * "card0" for a Linux DRM device. */
@ -434,9 +458,15 @@ struct hwloc_obj {
* These children are listed in \p memory_first_child. * These children are listed in \p memory_first_child.
*/ */
struct hwloc_obj *memory_first_child; /**< \brief First Memory child. struct hwloc_obj *memory_first_child; /**< \brief First Memory child.
* NUMA nodes are listed here (\p memory_arity and \p memory_first_child) * NUMA nodes and Memory-side caches are listed here
* (\p memory_arity and \p memory_first_child)
* instead of in the normal children list. * instead of in the normal children list.
* See also hwloc_obj_type_is_memory(). * See also hwloc_obj_type_is_memory().
*
* A memory hierarchy starts from a normal CPU-side object
* (e.g. Package) and ends with NUMA nodes as leaves.
* There might exist some memory-side caches between them
* in the middle of the memory subtree.
*/ */
/**@}*/ /**@}*/
@ -471,7 +501,7 @@ struct hwloc_obj {
* object and known how (the children path between this object and the PU * object and known how (the children path between this object and the PU
* objects). * objects).
* *
* If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set,
* some of these CPUs may not be allowed for binding, * some of these CPUs may not be allowed for binding,
* see hwloc_topology_get_allowed_cpuset(). * see hwloc_topology_get_allowed_cpuset().
* *
@ -483,7 +513,7 @@ struct hwloc_obj {
* *
* This may include not only the same as the cpuset field, but also some CPUs for * This may include not only the same as the cpuset field, but also some CPUs for
* which topology information is unknown or incomplete, some offlines CPUs, and * which topology information is unknown or incomplete, some offlines CPUs, and
* the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED flag
* is not set. * is not set.
* Thus no corresponding PU object may be found in the topology, because the * Thus no corresponding PU object may be found in the topology, because the
* precise position is undefined. It is however known that it would be somewhere * precise position is undefined. It is however known that it would be somewhere
@ -501,7 +531,7 @@ struct hwloc_obj {
* *
* In the end, these nodes are those that are close to the current object. * In the end, these nodes are those that are close to the current object.
* *
* If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set,
* some of these nodes may not be allowed for allocation, * some of these nodes may not be allowed for allocation,
* see hwloc_topology_get_allowed_nodeset(). * see hwloc_topology_get_allowed_nodeset().
* *
@ -516,7 +546,7 @@ struct hwloc_obj {
* *
* This may include not only the same as the nodeset field, but also some NUMA * This may include not only the same as the nodeset field, but also some NUMA
* nodes for which topology information is unknown or incomplete, some offlines * nodes for which topology information is unknown or incomplete, some offlines
* nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
* flag is not set. * flag is not set.
* Thus no corresponding NUMA node object may be found in the topology, because the * Thus no corresponding NUMA node object may be found in the topology, because the
* precise position is undefined. It is however known that it would be * precise position is undefined. It is however known that it would be
@ -770,7 +800,8 @@ enum hwloc_get_type_depth_e {
HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */ HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */
HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */ HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */
HWLOC_TYPE_DEPTH_MISC = -7 /**< \brief Virtual depth for Misc object. \hideinitializer */ HWLOC_TYPE_DEPTH_MISC = -7, /**< \brief Virtual depth for Misc object. \hideinitializer */
HWLOC_TYPE_DEPTH_MEMCACHE = -8 /**< \brief Virtual depth for MemCache object. \hideinitializer */
}; };
/** \brief Return the depth of parents where memory objects are attached. /** \brief Return the depth of parents where memory objects are attached.
@ -1781,6 +1812,31 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo
*/ */
HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size);
/** \brief Flags to be passed to hwloc_topology_set_components()
*/
enum hwloc_topology_components_flag_e {
/** \brief Blacklist the target component from being used.
* \hideinitializer
*/
HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST = (1UL<<0)
};
/** \brief Prevent a discovery component from being used for a topology.
*
* \p name is the name of the discovery component that should not be used
* when loading topology \p topology. The name is a string such as "cuda".
*
* For components with multiple phases, it may also be suffixed with the name
* of a phase, for instance "linux:io".
*
* \p flags should be ::HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST.
*
* This may be used to avoid expensive parts of the discovery process.
* For instance, CUDA-specific discovery may be expensive and unneeded
* while generic I/O discovery could still be useful.
*/
HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restrict topology, unsigned long flags, const char * __hwloc_restrict name);
/** @} */ /** @} */
@ -1800,28 +1856,27 @@ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restric
* They may also be returned by hwloc_topology_get_flags(). * They may also be returned by hwloc_topology_get_flags().
*/ */
enum hwloc_topology_flags_e { enum hwloc_topology_flags_e {
/** \brief Detect the whole system, ignore reservations. /** \brief Detect the whole system, ignore reservations, include disallowed objects.
* *
* Gather all resources, even if some were disabled by the administrator. * Gather all resources, even if some were disabled by the administrator.
* For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes. * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes.
* *
* When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology. * When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology.
* Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed.
* All existing PUs and NUMA nodes in the topology are allowed.
* hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset()
* are equal to the root object cpuset and nodeset.
* *
* When this flag is set, the actual sets of allowed PUs and NUMA nodes are given * When this flag is set, the actual sets of allowed PUs and NUMA nodes are given
* by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset(). * by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset().
* They may be smaller than the root object cpuset and nodeset. * They may be smaller than the root object cpuset and nodeset.
* *
* When this flag is not set, all existing PUs and NUMA nodes in the topology
* are allowed. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset()
* are equal to the root object cpuset and nodeset.
*
* If the current topology is exported to XML and reimported later, this flag * If the current topology is exported to XML and reimported later, this flag
* should be set again in the reimported topology so that disallowed resources * should be set again in the reimported topology so that disallowed resources
* are reimported as well. * are reimported as well.
* \hideinitializer * \hideinitializer
*/ */
HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED = (1UL<<0),
/** \brief Assume that the selected backend provides the topology for the /** \brief Assume that the selected backend provides the topology for the
* system on which we are running. * system on which we are running.
@ -1901,6 +1956,10 @@ struct hwloc_topology_discovery_support {
unsigned char numa; unsigned char numa;
/** \brief Detecting the amount of memory in NUMA nodes is supported. */ /** \brief Detecting the amount of memory in NUMA nodes is supported. */
unsigned char numa_memory; unsigned char numa_memory;
/** \brief Detecting and identifying PU objects that are not available to the current process is supported. */
unsigned char disallowed_pu;
/** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */
unsigned char disallowed_numa;
}; };
/** \brief Flags describing actual PU binding support for this topology. /** \brief Flags describing actual PU binding support for this topology.
@ -1998,7 +2057,7 @@ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(h
* *
* By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL).
* Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE).
* Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE).
* *
* Note that group objects are also ignored individually (without the entire level) * Note that group objects are also ignored individually (without the entire level)
* when they do not bring structure. * when they do not bring structure.
@ -2063,11 +2122,15 @@ HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwl
*/ */
HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
/** \brief Set the filtering for all cache object types. /** \brief Set the filtering for all CPU cache object types.
*
* Memory-side caches are not involved since they are not CPU caches.
*/ */
HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
/** \brief Set the filtering for all instruction cache object types. /** \brief Set the filtering for all CPU instruction cache object types.
*
* Memory-side caches are not involved since they are not CPU caches.
*/ */
HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
@ -2110,6 +2173,19 @@ enum hwloc_restrict_flags_e {
*/ */
HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0), HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0),
/** \brief Restrict by nodeset instead of CPU set.
* Only keep objects whose nodeset is included or partially included in the given set.
* This flag may not be used with ::HWLOC_RESTRICT_FLAG_BYNODESET.
*/
HWLOC_RESTRICT_FLAG_BYNODESET = (1UL<<3),
/** \brief Remove all objects that became Memory-less.
* By default, only objects that contain no PU and no memory are removed.
* This flag may only be used with ::HWLOC_RESTRICT_FLAG_BYNODESET.
* \hideinitializer
*/
HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS = (1UL<<4),
/** \brief Move Misc objects to ancestors if their parents are removed during restriction. /** \brief Move Misc objects to ancestors if their parents are removed during restriction.
* If this flag is not set, Misc objects are removed when their parents are removed. * If this flag is not set, Misc objects are removed when their parents are removed.
* \hideinitializer * \hideinitializer
@ -2123,28 +2199,70 @@ enum hwloc_restrict_flags_e {
HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2) HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2)
}; };
/** \brief Restrict the topology to the given CPU set. /** \brief Restrict the topology to the given CPU set or nodeset.
* *
* Topology \p topology is modified so as to remove all objects that * Topology \p topology is modified so as to remove all objects that
* are not included (or partially included) in the CPU set \p cpuset. * are not included (or partially included) in the CPU set \p set.
* All objects CPU and node sets are restricted accordingly. * All objects CPU and node sets are restricted accordingly.
* *
* If ::HWLOC_RESTRICT_FLAG_BYNODESET is passed in \p flags,
* \p set is considered a nodeset instead of a CPU set.
*
* \p flags is a OR'ed set of ::hwloc_restrict_flags_e. * \p flags is a OR'ed set of ::hwloc_restrict_flags_e.
* *
* \note This call may not be reverted by restricting back to a larger * \note This call may not be reverted by restricting back to a larger
* cpuset. Once dropped during restriction, objects may not be brought * set. Once dropped during restriction, objects may not be brought
* back, except by loading another topology with hwloc_topology_load(). * back, except by loading another topology with hwloc_topology_load().
* *
* \return 0 on success. * \return 0 on success.
* *
* \return -1 with errno set to EINVAL if the input cpuset is invalid. * \return -1 with errno set to EINVAL if the input set is invalid.
* The topology is not modified in this case. * The topology is not modified in this case.
* *
* \return -1 with errno set to ENOMEM on failure to allocate internal data. * \return -1 with errno set to ENOMEM on failure to allocate internal data.
* The topology is reinitialized in this case. It should be either * The topology is reinitialized in this case. It should be either
* destroyed with hwloc_topology_destroy() or configured and loaded again. * destroyed with hwloc_topology_destroy() or configured and loaded again.
*/ */
HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_bitmap_t set, unsigned long flags);
/** \brief Flags to be given to hwloc_topology_allow(). */
enum hwloc_allow_flags_e {
/** \brief Mark all objects as allowed in the topology.
*
* \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL.
* \hideinitializer */
HWLOC_ALLOW_FLAG_ALL = (1UL<<0),
/** \brief Only allow objects that are available to the current process.
*
* The topology must have ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM so that the set
* of available resources can actually be retrieved from the operating system.
*
* \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL.
* \hideinitializer */
HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS = (1UL<<1),
/** \brief Allow a custom set of objects, given to hwloc_topology_allow() as \p cpuset and/or \p nodeset parameters.
* \hideinitializer */
HWLOC_ALLOW_FLAG_CUSTOM = (1UL<<2)
};
/** \brief Change the sets of allowed PUs and NUMA nodes in the topology.
*
* This function only works if the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
* was set on the topology. It does not modify any object, it only changes
* the sets returned by hwloc_topology_get_allowed_cpuset() and
* hwloc_topology_get_allowed_nodeset().
*
* It is notably useful when importing a topology from another process
* running in a different Linux Cgroup.
*
* \p flags must be set to one flag among ::hwloc_allow_flags_e.
*
* \note Removing objects from a topology should rather be performed with
* hwloc_topology_restrict().
*/
HWLOC_DECLSPEC int hwloc_topology_allow(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, unsigned long flags);
/** \brief Add a MISC object as a leaf of the topology /** \brief Add a MISC object as a leaf of the topology
* *
@ -2250,21 +2368,21 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src
/* high-level helpers */ /* high-level helpers */
#include <hwloc/helper.h> #include "hwloc/helper.h"
/* inline code of some functions above */ /* inline code of some functions above */
#include <hwloc/inlines.h> #include "hwloc/inlines.h"
/* exporting to XML or synthetic */ /* exporting to XML or synthetic */
#include <hwloc/export.h> #include "hwloc/export.h"
/* distances */ /* distances */
#include <hwloc/distances.h> #include "hwloc/distances.h"
/* topology diffs */ /* topology diffs */
#include <hwloc/diff.h> #include "hwloc/diff.h"
/* deprecated headers */ /* deprecated headers */
#include <hwloc/deprecated.h> #include "hwloc/deprecated.h"
#endif /* HWLOC_H */ #endif /* HWLOC_H */

View file

@ -11,10 +11,10 @@
#ifndef HWLOC_CONFIG_H #ifndef HWLOC_CONFIG_H
#define HWLOC_CONFIG_H #define HWLOC_CONFIG_H
#define HWLOC_VERSION "2.0.4" #define HWLOC_VERSION "2.1.0"
#define HWLOC_VERSION_MAJOR 2 #define HWLOC_VERSION_MAJOR 2
#define HWLOC_VERSION_MINOR 0 #define HWLOC_VERSION_MINOR 1
#define HWLOC_VERSION_RELEASE 4 #define HWLOC_VERSION_RELEASE 0
#define HWLOC_VERSION_GREEK "" #define HWLOC_VERSION_GREEK ""
#define __hwloc_restrict #define __hwloc_restrict

View file

@ -13,7 +13,8 @@
#ifndef HWLOC_BITMAP_H #ifndef HWLOC_BITMAP_H
#define HWLOC_BITMAP_H #define HWLOC_BITMAP_H
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <assert.h> #include <assert.h>
@ -198,6 +199,9 @@ HWLOC_DECLSPEC int hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long
/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */ /** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */
HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
/** \brief Setup bitmap \p bitmap from unsigned longs \p masks used as first \p nr subsets */
HWLOC_DECLSPEC int hwloc_bitmap_from_ulongs(hwloc_bitmap_t bitmap, unsigned nr, const unsigned long *masks);
/* /*
* Modifying bitmaps. * Modifying bitmaps.
@ -256,6 +260,29 @@ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap)
/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */ /** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */
HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure; HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure;
/** \brief Convert the first \p nr subsets of bitmap \p bitmap into the array of \p nr unsigned long \p masks
*
* \p nr may be determined earlier with hwloc_bitmap_nr_ulongs().
*
* \return 0
*/
HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned nr, unsigned long *masks);
/** \brief Return the number of unsigned longs required for storing bitmap \p bitmap entirely
*
* This is the number of contiguous unsigned longs from the very first bit of the bitmap
* (even if unset) up to the last set bit.
* This is useful for knowing the \p nr parameter to pass to hwloc_bitmap_to_ulongs()
* (or which calls to hwloc_bitmap_to_ith_ulong() are needed)
* to entirely convert a bitmap into multiple unsigned longs.
*
* When called on the output of hwloc_topology_get_topology_cpuset(),
* the returned number is large enough for all cpusets of the topology.
*
* \return -1 if \p bitmap is infinite.
*/
HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
/** \brief Test whether index \p id is part of bitmap \p bitmap. /** \brief Test whether index \p id is part of bitmap \p bitmap.
* *
* \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise. * \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise.

View file

@ -16,11 +16,11 @@
#ifndef HWLOC_CUDA_H #ifndef HWLOC_CUDA_H
#define HWLOC_CUDA_H #define HWLOC_CUDA_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#endif #endif
#include <cuda.h> #include <cuda.h>

View file

@ -16,11 +16,11 @@
#ifndef HWLOC_CUDART_H #ifndef HWLOC_CUDART_H
#define HWLOC_CUDART_H #define HWLOC_CUDART_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#endif #endif
#include <cuda.h> /* for CUDA_VERSION */ #include <cuda.h> /* for CUDA_VERSION */

View file

@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@ -21,6 +21,8 @@
extern "C" { extern "C" {
#endif #endif
/* backward compat with v2.0 before WHOLE_SYSTEM renaming */
#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
/* backward compat with v1.11 before System removal */ /* backward compat with v1.11 before System removal */
#define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE #define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE
/* backward compat with v1.10 before Socket->Package renaming */ /* backward compat with v1.10 before Socket->Package renaming */

View file

@ -87,7 +87,12 @@ enum hwloc_distances_kind_e {
* Such values are currently ignored for distance-based grouping. * Such values are currently ignored for distance-based grouping.
* \hideinitializer * \hideinitializer
*/ */
HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3) HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
/** \brief This distances structure covers objects of different types.
* \hideinitializer
*/
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
}; };
/** \brief Retrieve distance matrices. /** \brief Retrieve distance matrices.
@ -131,20 +136,32 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
* *
* Identical to hwloc_distances_get() with the additional \p type filter. * Identical to hwloc_distances_get() with the additional \p type filter.
*/ */
static __hwloc_inline int HWLOC_DECLSPEC int
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
unsigned *nr, struct hwloc_distances_s **distances, unsigned *nr, struct hwloc_distances_s **distances,
unsigned long kind, unsigned long flags) unsigned long kind, unsigned long flags);
{
int depth = hwloc_get_type_depth(topology, type);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
*nr = 0;
return 0;
}
return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags);
}
/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */ /** \brief Retrieve a distance matrix with the given name.
*
* Usually only one distances structure may match a given name.
*/
HWLOC_DECLSPEC int
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
unsigned *nr, struct hwloc_distances_s **distances,
unsigned long flags);
/** \brief Get a description of what a distances structure contains.
*
* For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
* or NULL if unknown.
*/
HWLOC_DECLSPEC const char *
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
/** \brief Release a distance matrix structure previously returned by hwloc_distances_get().
*
* \note This function is not required if the structure is removed with hwloc_distances_release_remove().
*/
HWLOC_DECLSPEC void HWLOC_DECLSPEC void
hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
@ -221,11 +238,11 @@ enum hwloc_distances_add_flag_e {
* The distance from object i to object j is in slot i*nbobjs+j. * The distance from object i to object j is in slot i*nbobjs+j.
* *
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
* Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added
* if objects of different types are given.
* *
* \p flags configures the behavior of the function using an optional OR'ed set of * \p flags configures the behavior of the function using an optional OR'ed set of
* ::hwloc_distances_add_flag_e. * ::hwloc_distances_add_flag_e.
*
* Objects must be of the same type. They cannot be of type Group.
*/ */
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
@ -237,7 +254,7 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
* gathered through the OS. * gathered through the OS.
* *
* If these distances were used to group objects, these additional * If these distances were used to group objects, these additional
*Group objects are not removed from the topology. * Group objects are not removed from the topology.
*/ */
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
@ -260,6 +277,12 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
return hwloc_distances_remove_by_depth(topology, depth); return hwloc_distances_remove_by_depth(topology, depth);
} }
/** \brief Release and remove the given distance matrice from the topology.
*
* This function includes a call to hwloc_distances_release().
*/
HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
/** @} */ /** @} */

View file

@ -14,7 +14,7 @@
#ifndef HWLOC_GL_H #ifndef HWLOC_GL_H
#define HWLOC_GL_H #define HWLOC_GL_H
#include <hwloc.h> #include "hwloc.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>

View file

@ -17,8 +17,9 @@
#ifndef HWLOC_GLIBC_SCHED_H #ifndef HWLOC_GLIBC_SCHED_H
#define HWLOC_GLIBC_SCHED_H #define HWLOC_GLIBC_SCHED_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#include <assert.h> #include <assert.h>
#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) #if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)

View file

@ -527,30 +527,36 @@ hwloc_obj_type_is_io(hwloc_obj_type_t type);
* *
* Memory objects are objects attached to their parents * Memory objects are objects attached to their parents
* in the Memory children list. * in the Memory children list.
* This current only includes NUMA nodes. * This current includes NUMA nodes and Memory-side caches.
* *
* \return 1 if an object of type \p type is a Memory object, 0 otherwise. * \return 1 if an object of type \p type is a Memory object, 0 otherwise.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_obj_type_is_memory(hwloc_obj_type_t type); hwloc_obj_type_is_memory(hwloc_obj_type_t type);
/** \brief Check whether an object type is a Cache (Data, Unified or Instruction). /** \brief Check whether an object type is a CPU Cache (Data, Unified or Instruction).
*
* Memory-side caches are not CPU caches.
* *
* \return 1 if an object of type \p type is a Cache, 0 otherwise. * \return 1 if an object of type \p type is a Cache, 0 otherwise.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_obj_type_is_cache(hwloc_obj_type_t type); hwloc_obj_type_is_cache(hwloc_obj_type_t type);
/** \brief Check whether an object type is a Data or Unified Cache. /** \brief Check whether an object type is a CPU Data or Unified Cache.
* *
* \return 1 if an object of type \p type is a Data or Unified Cache, 0 otherwise. * Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a CPU Data or Unified Cache, 0 otherwise.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_obj_type_is_dcache(hwloc_obj_type_t type); hwloc_obj_type_is_dcache(hwloc_obj_type_t type);
/** \brief Check whether an object type is a Instruction Cache, /** \brief Check whether an object type is a CPU Instruction Cache,
* *
* \return 1 if an object of type \p type is a Instruction Cache, 0 otherwise. * Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a CPU Instruction Cache, 0 otherwise.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_obj_type_is_icache(hwloc_obj_type_t type); hwloc_obj_type_is_icache(hwloc_obj_type_t type);
@ -914,7 +920,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_
* \note The returned cpuset is not newly allocated and should thus not be * \note The returned cpuset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
* *
* \note This is equivalent to retrieving the root object complete CPU-set. * \note This is equivalent to retrieving the root object CPU-set.
*/ */
HWLOC_DECLSPEC hwloc_const_cpuset_t HWLOC_DECLSPEC hwloc_const_cpuset_t
hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
@ -923,11 +929,11 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_
* *
* \return the CPU set of allowed logical processors of the system. * \return the CPU set of allowed logical processors of the system.
* *
* \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set,
* this is identical to hwloc_topology_get_topology_cpuset(), which means * this is identical to hwloc_topology_get_topology_cpuset(), which means
* all PUs are allowed. * all PUs are allowed.
* *
* \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying
* hwloc_bitmap_intersects() on the result of this function and on an object * hwloc_bitmap_intersects() on the result of this function and on an object
* cpuset checks whether there are allowed PUs inside that object. * cpuset checks whether there are allowed PUs inside that object.
* Applying hwloc_bitmap_and() returns the list of these allowed PUs. * Applying hwloc_bitmap_and() returns the list of these allowed PUs.
@ -945,7 +951,7 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p
* \note The returned nodeset is not newly allocated and should thus not be * \note The returned nodeset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
* *
* \note This is equivalent to retrieving the root object complete CPU-set. * \note This is equivalent to retrieving the root object complete nodeset.
*/ */
HWLOC_DECLSPEC hwloc_const_nodeset_t HWLOC_DECLSPEC hwloc_const_nodeset_t
hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
@ -959,7 +965,7 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute
* \note The returned nodeset is not newly allocated and should thus not be * \note The returned nodeset is not newly allocated and should thus not be
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
* *
* \note This is equivalent to retrieving the root object complete CPU-set. * \note This is equivalent to retrieving the root object nodeset.
*/ */
HWLOC_DECLSPEC hwloc_const_nodeset_t HWLOC_DECLSPEC hwloc_const_nodeset_t
hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
@ -968,11 +974,11 @@ hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute
* *
* \return the node set of allowed memory of the system. * \return the node set of allowed memory of the system.
* *
* \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set,
* this is identical to hwloc_topology_get_topology_nodeset(), which means * this is identical to hwloc_topology_get_topology_nodeset(), which means
* all NUMA nodes are allowed. * all NUMA nodes are allowed.
* *
* \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying
* hwloc_bitmap_intersects() on the result of this function and on an object * hwloc_bitmap_intersects() on the result of this function and on an object
* nodeset checks whether there are allowed NUMA nodes inside that object. * nodeset checks whether there are allowed NUMA nodes inside that object.
* Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes. * Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes.

View file

@ -13,11 +13,13 @@
#ifndef HWLOC_INTEL_MIC_H #ifndef HWLOC_INTEL_MIC_H
#define HWLOC_INTEL_MIC_H #define HWLOC_INTEL_MIC_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#include <dirent.h> #include <dirent.h>
#include <string.h> #include <string.h>
#endif #endif

View file

@ -15,7 +15,8 @@
#ifndef HWLOC_LINUX_LIBNUMA_H #ifndef HWLOC_LINUX_LIBNUMA_H
#define HWLOC_LINUX_LIBNUMA_H #define HWLOC_LINUX_LIBNUMA_H
#include <hwloc.h> #include "hwloc.h"
#include <numa.h> #include <numa.h>

View file

@ -15,7 +15,8 @@
#ifndef HWLOC_LINUX_H #ifndef HWLOC_LINUX_H
#define HWLOC_LINUX_H #define HWLOC_LINUX_H
#include <hwloc.h> #include "hwloc.h"
#include <stdio.h> #include <stdio.h>

View file

@ -13,11 +13,11 @@
#ifndef HWLOC_NVML_H #ifndef HWLOC_NVML_H
#define HWLOC_NVML_H #define HWLOC_NVML_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#endif #endif
#include <nvml.h> #include <nvml.h>

View file

@ -14,19 +14,17 @@
#ifndef HWLOC_OPENCL_H #ifndef HWLOC_OPENCL_H
#define HWLOC_OPENCL_H #define HWLOC_OPENCL_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
#include <OpenCL/cl.h> #include <OpenCL/cl.h>
#include <OpenCL/cl_ext.h>
#else #else
#include <CL/cl.h> #include <CL/cl.h>
#include <CL/cl_ext.h>
#endif #endif
#include <stdio.h> #include <stdio.h>
@ -37,17 +35,75 @@ extern "C" {
#endif #endif
/* OpenCL extensions aren't always shipped with default headers, and
* they don't always reflect what the installed implementations support.
* Try everything and let the implementation return errors when non supported.
*/
/* Copyright (c) 2008-2018 The Khronos Group Inc. */
/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037
typedef union {
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} hwloc_cl_device_topology_amd;
#define HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
/* needs "cl_nv_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
#define HWLOC_CL_DEVICE_PCI_BUS_ID_NV 0x4008
#define HWLOC_CL_DEVICE_PCI_SLOT_ID_NV 0x4009
/** \defgroup hwlocality_opencl Interoperability with OpenCL /** \defgroup hwlocality_opencl Interoperability with OpenCL
* *
* This interface offers ways to retrieve topology information about * This interface offers ways to retrieve topology information about
* OpenCL devices. * OpenCL devices.
* *
* Only the AMD OpenCL interface currently offers useful locality information * Only AMD and NVIDIA OpenCL implementations currently offer useful locality
* about its devices. * information about their devices.
* *
* @{ * @{
*/ */
/** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
*
* Device \p device must match the local machine.
*/
static __hwloc_inline int
hwloc_opencl_get_device_pci_busid(cl_device_id device,
unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func)
{
hwloc_cl_device_topology_amd amdtopo;
cl_uint nvbus, nvslot;
cl_int clret;
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
if (CL_SUCCESS == clret
&& HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) {
*domain = 0; /* can't do anything better */
*bus = (unsigned) amdtopo.pcie.bus;
*dev = (unsigned) amdtopo.pcie.device;
*func = (unsigned) amdtopo.pcie.function;
return 0;
}
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_ID_NV, sizeof(nvbus), &nvbus, NULL);
if (CL_SUCCESS == clret) {
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_SLOT_ID_NV, sizeof(nvslot), &nvslot, NULL);
if (CL_SUCCESS == clret) {
/* FIXME: PCI bus only uses 8bit, assume nvidia hardcodes the domain in higher bits */
*domain = nvbus >> 8;
*bus = nvbus & 0xff;
/* non-documented but used in many other projects */
*dev = nvslot >> 3;
*func = nvslot & 0x7;
return 0;
}
}
return -1;
}
/** \brief Get the CPU set of logical processors that are physically /** \brief Get the CPU set of logical processors that are physically
* close to OpenCL device \p device. * close to OpenCL device \p device.
* *
@ -62,7 +118,7 @@ extern "C" {
* and hwloc_opencl_get_device_osdev_by_index(). * and hwloc_opencl_get_device_osdev_by_index().
* *
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux with the AMD OpenCL implementation; other systems will simply * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
* get a full cpuset. * get a full cpuset.
*/ */
static __hwloc_inline int static __hwloc_inline int
@ -70,35 +126,28 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
cl_device_id device __hwloc_attribute_unused, cl_device_id device __hwloc_attribute_unused,
hwloc_cpuset_t set) hwloc_cpuset_t set)
{ {
#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD) #if (defined HWLOC_LINUX_SYS)
/* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */ /* If we're on Linux, try AMD/NVIDIA extensions + the sysfs mechanism to get the local cpus */
#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128 #define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX]; char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
cl_device_topology_amd amdtopo; unsigned pcidomain, pcibus, pcidev, pcifunc;
cl_int clret;
if (!hwloc_topology_is_thissystem(topology)) { if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidev, &pcifunc) < 0) {
if (CL_SUCCESS != clret) {
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
return 0;
}
if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
return 0; return 0;
} }
sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", pcidomain, pcibus, pcidev, pcifunc);
(unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0 if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set)) || hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#else #else
/* Non-Linux + AMD OpenCL systems simply get a full cpuset */ /* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif #endif
return 0; return 0;
@ -140,8 +189,8 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
* Use OpenCL device attributes to find the corresponding hwloc OS device object. * Use OpenCL device attributes to find the corresponding hwloc OS device object.
* Return NULL if there is none or if useful attributes are not available. * Return NULL if there is none or if useful attributes are not available.
* *
* This function currently only works on AMD OpenCL devices that support * This function currently only works on AMD and NVIDIA OpenCL devices that support
* the CL_DEVICE_TOPOLOGY_AMD extension. hwloc_opencl_get_device_osdev_by_index() * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index()
* should be preferred whenever possible, i.e. when platform and device index * should be preferred whenever possible, i.e. when platform and device index
* are known. * are known.
* *
@ -159,17 +208,10 @@ static __hwloc_inline hwloc_obj_t
hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
cl_device_id device __hwloc_attribute_unused) cl_device_id device __hwloc_attribute_unused)
{ {
#ifdef CL_DEVICE_TOPOLOGY_AMD
hwloc_obj_t osdev; hwloc_obj_t osdev;
cl_device_topology_amd amdtopo; unsigned pcidomain, pcibus, pcidevice, pcifunc;
cl_int clret;
clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidevice, &pcifunc) < 0) {
if (CL_SUCCESS != clret) {
errno = EINVAL;
return NULL;
}
if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
errno = EINVAL; errno = EINVAL;
return NULL; return NULL;
} }
@ -181,18 +223,15 @@ hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused
continue; continue;
if (pcidev if (pcidev
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE && pcidev->type == HWLOC_OBJ_PCI_DEVICE
&& pcidev->attr->pcidev.domain == 0 && pcidev->attr->pcidev.domain == pcidomain
&& pcidev->attr->pcidev.bus == amdtopo.pcie.bus && pcidev->attr->pcidev.bus == pcibus
&& pcidev->attr->pcidev.dev == amdtopo.pcie.device && pcidev->attr->pcidev.dev == pcidevice
&& pcidev->attr->pcidev.func == amdtopo.pcie.function) && pcidev->attr->pcidev.func == pcifunc)
return osdev; return osdev;
/* if PCI are filtered out, we need a info attr to match on */ /* if PCI are filtered out, we need a info attr to match on */
} }
return NULL; return NULL;
#else
return NULL;
#endif
} }
/** @} */ /** @} */

View file

@ -19,10 +19,10 @@
#ifndef HWLOC_OPENFABRICS_VERBS_H #ifndef HWLOC_OPENFABRICS_VERBS_H
#define HWLOC_OPENFABRICS_VERBS_H #define HWLOC_OPENFABRICS_VERBS_H
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#ifdef HWLOC_LINUX_SYS #ifdef HWLOC_LINUX_SYS
#include <hwloc/linux.h> #include "hwloc/linux.h"
#endif #endif
#include <infiniband/verbs.h> #include <infiniband/verbs.h>

View file

@ -1,5 +1,5 @@
/* /*
* Copyright © 2013-2017 Inria. All rights reserved. * Copyright © 2013-2019 Inria. All rights reserved.
* Copyright © 2016 Cisco Systems, Inc. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@ -13,7 +13,8 @@
struct hwloc_backend; struct hwloc_backend;
#include <hwloc.h> #include "hwloc.h"
#ifdef HWLOC_INSIDE_PLUGIN #ifdef HWLOC_INSIDE_PLUGIN
/* needed for hwloc_plugin_check_namespace() */ /* needed for hwloc_plugin_check_namespace() */
#include <ltdl.h> #include <ltdl.h>
@ -25,52 +26,36 @@ struct hwloc_backend;
* @{ * @{
*/ */
/** \brief Discovery component type */
typedef enum hwloc_disc_component_type_e {
/** \brief CPU-only discovery through the OS, or generic no-OS support.
* \hideinitializer */
HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0),
/** \brief xml or synthetic,
* platform-specific components such as bgq.
* Anything the discovers CPU and everything else.
* No misc backend is expected to complement a global component.
* \hideinitializer */
HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1),
/** \brief OpenCL, Cuda, etc.
* \hideinitializer */
HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2)
} hwloc_disc_component_type_t;
/** \brief Discovery component structure /** \brief Discovery component structure
* *
* This is the major kind of components, taking care of the discovery. * This is the major kind of components, taking care of the discovery.
* They are registered by generic components, either statically-built or as plugins. * They are registered by generic components, either statically-built or as plugins.
*/ */
struct hwloc_disc_component { struct hwloc_disc_component {
/** \brief Discovery component type */
hwloc_disc_component_type_t type;
/** \brief Name. /** \brief Name.
* If this component is built as a plugin, this name does not have to match the plugin filename. * If this component is built as a plugin, this name does not have to match the plugin filename.
*/ */
const char *name; const char *name;
/** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. /** \brief Discovery phases performed by this component.
* OR'ed set of ::hwloc_disc_phase_t
*/
unsigned phases;
/** \brief Component phases to exclude, as an OR'ed set of ::hwloc_disc_phase_t.
* *
* For a GLOBAL component, this usually includes all other types (~0). * For a GLOBAL component, this usually includes all other phases (\c ~UL).
* *
* Other components only exclude types that may bring conflicting * Other components only exclude types that may bring conflicting
* topology information. MISC components should likely not be excluded * topology information. MISC components should likely not be excluded
* since they usually bring non-primary additional information. * since they usually bring non-primary additional information.
*/ */
unsigned excludes; unsigned excluded_phases;
/** \brief Instantiate callback to create a backend from the component. /** \brief Instantiate callback to create a backend from the component.
* Parameters data1, data2, data3 are NULL except for components * Parameters data1, data2, data3 are NULL except for components
* that have special enabling routines such as hwloc_topology_set_xml(). */ * that have special enabling routines such as hwloc_topology_set_xml(). */
struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, unsigned excluded_phases, const void *data1, const void *data2, const void *data3);
/** \brief Component priority. /** \brief Component priority.
* Used to sort topology->components, higher priority first. * Used to sort topology->components, higher priority first.
@ -107,6 +92,72 @@ struct hwloc_disc_component {
* @{ * @{
*/ */
/** \brief Discovery phase */
typedef enum hwloc_disc_phase_e {
/** \brief xml or synthetic, platform-specific components such as bgq.
* Discovers everything including CPU, memory, I/O and everything else.
* A component with a Global phase usually excludes all other phases.
* \hideinitializer */
HWLOC_DISC_PHASE_GLOBAL = (1U<<0),
/** \brief CPU discovery.
* \hideinitializer */
HWLOC_DISC_PHASE_CPU = (1U<<1),
/** \brief Attach memory to existing CPU objects.
* \hideinitializer */
HWLOC_DISC_PHASE_MEMORY = (1U<<2),
/** \brief Attach PCI devices and bridges to existing CPU objects.
* \hideinitializer */
HWLOC_DISC_PHASE_PCI = (1U<<3),
/** \brief I/O discovery that requires PCI devices (OS devices such as OpenCL, CUDA, etc.).
* \hideinitializer */
HWLOC_DISC_PHASE_IO = (1U<<4),
/** \brief Misc objects that gets added below anything else.
* \hideinitializer */
HWLOC_DISC_PHASE_MISC = (1U<<5),
/** \brief Annotating existing objects, adding distances, etc.
* \hideinitializer */
HWLOC_DISC_PHASE_ANNOTATE = (1U<<6),
/** \brief Final tweaks to a ready-to-use topology.
* This phase runs once the topology is loaded, before it is returned to the topology.
* Hence it may only use the main hwloc API for modifying the topology,
* for instance by restricting it, adding info attributes, etc.
* \hideinitializer */
HWLOC_DISC_PHASE_TWEAK = (1U<<7)
} hwloc_disc_phase_t;
/** \brief Discovery status flags */
enum hwloc_disc_status_flag_e {
/** \brief The sets of allowed resources were already retrieved \hideinitializer */
HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES = (1UL<<1)
};
/** \brief Discovery status structure
*
* Used by the core and backends to inform about what has been/is being done
* during the discovery process.
*/
struct hwloc_disc_status {
/** \brief The current discovery phase that is performed.
* Must match one of the phases in the component phases field.
*/
hwloc_disc_phase_t phase;
/** \brief Dynamically excluded phases.
* If a component decides during discovery that some phases are no longer needed.
*/
unsigned excluded_phases;
/** \brief OR'ed set of hwloc_disc_status_flag_e */
unsigned long flags;
};
/** \brief Discovery backend structure /** \brief Discovery backend structure
* *
* A backend is the instantiation of a discovery component. * A backend is the instantiation of a discovery component.
@ -116,6 +167,14 @@ struct hwloc_disc_component {
* hwloc_backend_alloc() initializes all fields to default values * hwloc_backend_alloc() initializes all fields to default values
* that the component may change (except "component" and "next") * that the component may change (except "component" and "next")
* before enabling the backend with hwloc_backend_enable(). * before enabling the backend with hwloc_backend_enable().
*
* Most backends assume that the topology is_thissystem flag is
* set because they talk to the underlying operating system.
* However they may still be used in topologies without the
* is_thissystem flag for debugging reasons.
* In practice, they are usually auto-disabled in such cases
* (excluded by xml or synthetic backends, or by environment
* variables when changing the Linux fsroot or the x86 cpuid path).
*/ */
struct hwloc_backend { struct hwloc_backend {
/** \private Reserved for the core, set by hwloc_backend_alloc() */ /** \private Reserved for the core, set by hwloc_backend_alloc() */
@ -127,12 +186,20 @@ struct hwloc_backend {
/** \private Reserved for the core. Used internally to list backends topology->backends. */ /** \private Reserved for the core. Used internally to list backends topology->backends. */
struct hwloc_backend * next; struct hwloc_backend * next;
/** \brief Discovery phases performed by this component, possibly without some of them if excluded by other components.
* OR'ed set of ::hwloc_disc_phase_t
*/
unsigned phases;
/** \brief Backend flags, currently always 0. */ /** \brief Backend flags, currently always 0. */
unsigned long flags; unsigned long flags;
/** \brief Backend-specific 'is_thissystem' property. /** \brief Backend-specific 'is_thissystem' property.
* Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled. * Set to 0 if the backend disables the thissystem flag for this topology
* Set to -1 if the backend doesn't care (default). */ * (e.g. loading from xml or synthetic string,
* or using a different fsroot on Linux, or a x86 CPUID dump).
* Set to -1 if the backend doesn't care (default).
*/
int is_thissystem; int is_thissystem;
/** \brief Backend private data, or NULL if none. */ /** \brief Backend private data, or NULL if none. */
@ -147,20 +214,22 @@ struct hwloc_backend {
* or because of an actual discovery/gathering failure. * or because of an actual discovery/gathering failure.
* May be NULL. * May be NULL.
*/ */
int (*discover)(struct hwloc_backend *backend); int (*discover)(struct hwloc_backend *backend, struct hwloc_disc_status *status);
/** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. /** \brief Callback to retrieve the locality of a PCI object.
* May be NULL. */ * Called by the PCI core when attaching PCI hierarchy to CPU objects.
* May be NULL.
*/
int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset); int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset);
}; };
/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc. /** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc.
* The caller will then modify whatever needed, and call hwloc_backend_enable(). * The caller will then modify whatever needed, and call hwloc_backend_enable().
*/ */
HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component); HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_topology *topology, struct hwloc_disc_component *component);
/** \brief Enable a previously allocated and setup backend. */ /** \brief Enable a previously allocated and setup backend. */
HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend); HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend);
/** @} */ /** @} */
@ -480,7 +549,9 @@ HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pcidisc_check_bridge_type(unsigned device_
* *
* Returns -1 and destroys /p obj if bridge fields are invalid. * Returns -1 and destroys /p obj if bridge fields are invalid.
*/ */
HWLOC_DECLSPEC int hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config); HWLOC_DECLSPEC int hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
unsigned *secondary_busp, unsigned *subordinate_busp,
const unsigned char *config);
/** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs. /** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs.
* *
@ -490,10 +561,7 @@ HWLOC_DECLSPEC void hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
/** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology. /** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology.
* *
* For now, they will be attached to the root object. The core will move them to their actual PCI * Other backends may lookup PCI objects or localities (for instance to attach OS devices)
* locality using hwloc_pci_belowroot_apply_locality() at the end of the discovery.
*
* In the meantime, other backends lookup PCI objects or localities (for instance to attach OS devices)
* by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent(). * by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent().
*/ */
HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree); HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree);
@ -507,32 +575,14 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st
* @{ * @{
*/ */
/** \brief Find the PCI object that matches the bus ID.
*
* To be used after a PCI backend added PCI devices with hwloc_pcidisc_tree_attach()
* and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality().
*
* If no exactly matching object is found, return the container bridge if any, or NULL.
*
* On failure, it may be possible to find the PCI locality (instead of the PCI device)
* by calling hwloc_pcidisc_find_busid_parent().
*
* \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works
* after the topology is fully loaded.
*/
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/** \brief Find the normal parent of a PCI bus ID. /** \brief Find the normal parent of a PCI bus ID.
* *
* Look at PCI affinity to find out where the given PCI bus ID should be attached. * Look at PCI affinity to find out where the given PCI bus ID should be attached.
* *
* This function should be used to attach an I/O device directly under a normal * This function should be used to attach an I/O device under the corresponding
* (non-I/O) object, instead of below a PCI object. * PCI object (if any), or under a normal (non-I/O) object with same locality.
* It is usually used by backends when hwloc_pcidisc_find_by_busid() failed
* to find the hwloc object corresponding to this bus ID, for instance because
* PCI discovery is not supported on this platform.
*/ */
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/** @} */ /** @} */

View file

@ -1,13 +1,13 @@
/* /*
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* Copyright © 2010-2018 Inria. All rights reserved. * Copyright © 2010-2019 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#ifndef HWLOC_RENAME_H #ifndef HWLOC_RENAME_H
#define HWLOC_RENAME_H #define HWLOC_RENAME_H
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -49,7 +49,9 @@ extern "C" {
#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) #define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE)
#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) #define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE)
#define HWLOC_OBJ_MEMCACHE HWLOC_NAME_CAPS(OBJ_MEMCACHE)
#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) #define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE)
#define HWLOC_OBJ_DIE HWLOC_NAME_CAPS(OBJ_DIE)
#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) #define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE)
#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) #define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU)
#define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE) #define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE)
@ -116,7 +118,7 @@ extern "C" {
#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) #define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e)
#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) #define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED)
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
@ -124,6 +126,9 @@ extern "C" {
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) #define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml)
#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) #define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer)
#define hwloc_topology_components_flag_e HWLOC_NAME(hwloc_topology_components_flag_e)
#define HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST HWLOC_NAME_CAPS(TOPOLOGY_COMPONENTS_FLAG_BLACKLIST)
#define hwloc_topology_set_components HWLOC_NAME(topology_set_components)
#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) #define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags)
#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) #define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem)
@ -151,10 +156,18 @@ extern "C" {
#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) #define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e)
#define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS) #define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS)
#define HWLOC_RESTRICT_FLAG_BYNODESET HWLOC_NAME_CAPS(RESTRICT_FLAG_BYNODESET)
#define HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_MEMLESS)
#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) #define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC)
#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) #define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO)
#define hwloc_topology_restrict HWLOC_NAME(topology_restrict) #define hwloc_topology_restrict HWLOC_NAME(topology_restrict)
#define hwloc_allow_flags_e HWLOC_NAME(allow_flags_e)
#define HWLOC_ALLOW_FLAG_ALL HWLOC_NAME_CAPS(ALLOW_FLAG_ALL)
#define HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS HWLOC_NAME_CAPS(ALLOW_FLAG_LOCAL_RESTRICTIONS)
#define HWLOC_ALLOW_FLAG_CUSTOM HWLOC_NAME_CAPS(ALLOW_FLAG_CUSTOM)
#define hwloc_topology_allow HWLOC_NAME(topology_allow)
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) #define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
@ -172,6 +185,7 @@ extern "C" {
#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) #define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE)
#define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC) #define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC)
#define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE) #define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE)
#define HWLOC_TYPE_DEPTH_MEMCACHE HWLOC_NAME_CAPS(TYPE_DEPTH_MEMCACHE)
#define hwloc_get_depth_type HWLOC_NAME(get_depth_type) #define hwloc_get_depth_type HWLOC_NAME(get_depth_type)
#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) #define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth)
@ -266,10 +280,12 @@ extern "C" {
#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) #define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero)
#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) #define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill)
#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) #define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong)
#define hwloc_bitmap_from_ulongs HWLOC_NAME(bitmap_from_ulongs)
#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) #define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong)
#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) #define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong)
#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) #define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong)
#define hwloc_bitmap_to_ulongs HWLOC_NAME(bitmap_to_ulongs)
#define hwloc_bitmap_nr_ulongs HWLOC_NAME(bitmap_nr_ulongs)
#define hwloc_bitmap_only HWLOC_NAME(bitmap_only) #define hwloc_bitmap_only HWLOC_NAME(bitmap_only)
#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) #define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut)
#define hwloc_bitmap_set HWLOC_NAME(bitmap_set) #define hwloc_bitmap_set HWLOC_NAME(bitmap_set)
@ -380,10 +396,13 @@ extern "C" {
#define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER)
#define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY) #define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY)
#define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH) #define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH)
#define HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES HWLOC_NAME_CAPS(DISTANCES_KIND_HETEROGENEOUS_TYPES)
#define hwloc_distances_get HWLOC_NAME(distances_get) #define hwloc_distances_get HWLOC_NAME(distances_get)
#define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth) #define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth)
#define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type) #define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type)
#define hwloc_distances_get_by_name HWLOC_NAME(distances_get_by_name)
#define hwloc_distances_get_name HWLOC_NAME(distances_get_name)
#define hwloc_distances_release HWLOC_NAME(distances_release) #define hwloc_distances_release HWLOC_NAME(distances_release)
#define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index)
#define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values)
@ -396,6 +415,7 @@ extern "C" {
#define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove HWLOC_NAME(distances_remove)
#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth)
#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type)
#define hwloc_distances_release_remove HWLOC_NAME(distances_release_remove)
/* diff.h */ /* diff.h */
@ -469,6 +489,8 @@ extern "C" {
/* opencl.h */ /* opencl.h */
#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd)
#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids)
#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) #define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) #define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev)
#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) #define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index)
@ -502,13 +524,22 @@ extern "C" {
/* hwloc/plugins.h */ /* hwloc/plugins.h */
#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) #define hwloc_disc_phase_e HWLOC_NAME(disc_phase_e)
#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) #define HWLOC_DISC_PHASE_GLOBAL HWLOC_NAME_CAPS(DISC_PHASE_GLOBAL)
#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) #define HWLOC_DISC_PHASE_CPU HWLOC_NAME_CAPS(DISC_PHASE_CPU)
#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) #define HWLOC_DISC_PHASE_MEMORY HWLOC_NAME_CAPS(DISC_PHASE_MEMORY)
#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) #define HWLOC_DISC_PHASE_PCI HWLOC_NAME_CAPS(DISC_PHASE_PCI)
#define HWLOC_DISC_PHASE_IO HWLOC_NAME_CAPS(DISC_PHASE_IO)
#define HWLOC_DISC_PHASE_MISC HWLOC_NAME_CAPS(DISC_PHASE_MISC)
#define HWLOC_DISC_PHASE_ANNOTATE HWLOC_NAME_CAPS(DISC_PHASE_ANNOTATE)
#define HWLOC_DISC_PHASE_TWEAK HWLOC_NAME_CAPS(DISC_PHASE_TWEAK)
#define hwloc_disc_phase_t HWLOC_NAME(disc_phase_t)
#define hwloc_disc_component HWLOC_NAME(disc_component) #define hwloc_disc_component HWLOC_NAME(disc_component)
#define hwloc_disc_status_flag_e HWLOC_NAME(disc_status_flag_e)
#define HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES HWLOC_NAME_CAPS(DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES)
#define hwloc_disc_status HWLOC_NAME(disc_status)
#define hwloc_backend HWLOC_NAME(backend) #define hwloc_backend HWLOC_NAME(backend)
#define hwloc_backend_alloc HWLOC_NAME(backend_alloc) #define hwloc_backend_alloc HWLOC_NAME(backend_alloc)
@ -540,12 +571,11 @@ extern "C" {
#define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap) #define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap)
#define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed) #define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed)
#define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type) #define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type)
#define hwloc_pcidisc_setup_bridge_attr HWLOC_NAME(pcidisc_setup_bridge_attr) #define hwloc_pcidisc_find_bridge_buses HWLOC_NAME(pcidisc_find_bridge_buses)
#define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) #define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid)
#define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) #define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach)
#define hwloc_pcidisc_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) #define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent)
#define hwloc_pcidisc_find_busid_parent HWLOC_NAME(pcidisc_find_busid_parent)
/* hwloc/deprecated.h */ /* hwloc/deprecated.h */
@ -571,8 +601,9 @@ extern "C" {
/* private/misc.h */ /* private/misc.h */
#ifndef HWLOC_HAVE_CORRECT_SNPRINTF
#define hwloc_snprintf HWLOC_NAME(snprintf) #define hwloc_snprintf HWLOC_NAME(snprintf)
#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) #endif
#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) #define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual)
#define hwloc_ffs32 HWLOC_NAME(ffs32) #define hwloc_ffs32 HWLOC_NAME(ffs32)
#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) #define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32)
@ -631,8 +662,9 @@ extern "C" {
#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) #define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem)
#define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks) #define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks)
#define hwloc_backends_init HWLOC_NAME(backends_init) #define hwloc_topology_components_init HWLOC_NAME(topology_components_init)
#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) #define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all)
#define hwloc_topology_components_fini HWLOC_NAME(topology_components_fini)
#define hwloc_components_init HWLOC_NAME(components_init) #define hwloc_components_init HWLOC_NAME(components_init)
#define hwloc_components_fini HWLOC_NAME(components_fini) #define hwloc_components_fini HWLOC_NAME(components_fini)
@ -656,7 +688,6 @@ extern "C" {
#define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_cuda_component HWLOC_NAME(cuda_component)
#define hwloc_gl_component HWLOC_NAME(gl_component) #define hwloc_gl_component HWLOC_NAME(gl_component)
#define hwloc_linuxio_component HWLOC_NAME(linuxio_component)
#define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component)
#define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component)
#define hwloc_pci_component HWLOC_NAME(pci_component) #define hwloc_pci_component HWLOC_NAME(pci_component)
@ -669,6 +700,9 @@ extern "C" {
#define hwloc_special_level_s HWLOC_NAME(special_level_s) #define hwloc_special_level_s HWLOC_NAME(special_level_s)
#define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) #define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s)
#define hwloc_pci_locality_s HWLOC_NAME(pci_locality_s)
#define hwloc_topology_forced_component_s HWLOC_NAME(topology_forced_component)
#define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets) #define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets)
#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) #define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level)
@ -687,8 +721,8 @@ extern "C" {
#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) #define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset)
#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality)
#define hwloc__add_info HWLOC_NAME(_add_info) #define hwloc__add_info HWLOC_NAME(_add_info)
#define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) #define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup)

View file

@ -10,7 +10,7 @@
#ifndef HWLOC_SHMEM_H #ifndef HWLOC_SHMEM_H
#define HWLOC_SHMEM_H #define HWLOC_SHMEM_H
#include <hwloc.h> #include "hwloc.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright © 2012-2015 Inria. All rights reserved. * Copyright © 2012-2019 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@ -16,13 +16,13 @@
#ifndef PRIVATE_COMPONENTS_H #ifndef PRIVATE_COMPONENTS_H
#define PRIVATE_COMPONENTS_H 1 #define PRIVATE_COMPONENTS_H 1
#include <hwloc/plugins.h> #include "hwloc/plugins.h"
struct hwloc_topology; struct hwloc_topology;
extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology, extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology,
int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */ int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */
int type, const char *name, const char *name,
const void *data1, const void *data2, const void *data3); const void *data1, const void *data2, const void *data3);
extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology); extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology);
@ -30,10 +30,12 @@ extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology)
extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology); extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology);
extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology); extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology);
/* Initialize the list of backends used by a topology */ /* Initialize the lists of components and backends used by a topology */
extern void hwloc_backends_init(struct hwloc_topology *topology); extern void hwloc_topology_components_init(struct hwloc_topology *topology);
/* Disable and destroy all backends used by a topology */ /* Disable and destroy all backends used by a topology */
extern void hwloc_backends_disable_all(struct hwloc_topology *topology); extern void hwloc_backends_disable_all(struct hwloc_topology *topology);
/* Cleanup the lists of components used by a topology */
extern void hwloc_topology_components_fini(struct hwloc_topology *topology);
/* Used by the core to setup/destroy the list of components */ /* Used by the core to setup/destroy the list of components */
extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */ extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */

View file

@ -11,8 +11,8 @@
#ifndef HWLOC_DEBUG_H #ifndef HWLOC_DEBUG_H
#define HWLOC_DEBUG_H #define HWLOC_DEBUG_H
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <private/misc.h> #include "private/misc.h"
#ifdef HWLOC_DEBUG #ifdef HWLOC_DEBUG
#include <stdarg.h> #include <stdarg.h>

View file

@ -1,5 +1,5 @@
/* /*
* Copyright © 2018 Inria. All rights reserved. * Copyright © 2018-2019 Inria. All rights reserved.
* *
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@ -29,7 +29,6 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
/* I/O discovery */ /* I/O discovery */
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;

View file

@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@ -11,9 +11,9 @@
#ifndef HWLOC_PRIVATE_MISC_H #ifndef HWLOC_PRIVATE_MISC_H
#define HWLOC_PRIVATE_MISC_H #define HWLOC_PRIVATE_MISC_H
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#ifdef HWLOC_HAVE_DECL_STRNCASECMP #ifdef HWLOC_HAVE_DECL_STRNCASECMP
#ifdef HAVE_STRINGS_H #ifdef HAVE_STRINGS_H
@ -439,14 +439,14 @@ hwloc_linux_pci_link_speed_from_string(const char *string)
static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type) static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type)
{ {
/* type contiguity is asserted in topology_check() */ /* type contiguity is asserted in topology_check() */
return type <= HWLOC_OBJ_GROUP; return type <= HWLOC_OBJ_GROUP || type == HWLOC_OBJ_DIE;
} }
/* Any object attached to memory children, currently only NUMA nodes */ /* Any object attached to memory children, currently NUMA nodes or Memory-side caches */
static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type) static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type)
{ {
/* type contiguity is asserted in topology_check() */ /* type contiguity is asserted in topology_check() */
return type == HWLOC_OBJ_NUMANODE; return type == HWLOC_OBJ_NUMANODE || type == HWLOC_OBJ_MEMCACHE;
} }
/* I/O or Misc object, without cpusets or nodesets. */ /* I/O or Misc object, without cpusets or nodesets. */
@ -463,6 +463,7 @@ static __hwloc_inline int hwloc__obj_type_is_io (hwloc_obj_type_t type)
return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE; return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE;
} }
/* Any CPU caches (not Memory-side caches) */
static __hwloc_inline int static __hwloc_inline int
hwloc__obj_type_is_cache(hwloc_obj_type_t type) hwloc__obj_type_is_cache(hwloc_obj_type_t type)
{ {
@ -572,12 +573,4 @@ typedef SSIZE_T ssize_t;
# endif # endif
#endif #endif
#if defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined(__CYGWIN__)
/* MSVC doesn't support C99 variable-length array */
#include <malloc.h>
#define HWLOC_VLA(_type, _name, _nb) _type *_name = (_type*) _alloca((_nb)*sizeof(_type))
#else
#define HWLOC_VLA(_type, _name, _nb) _type _name[_nb]
#endif
#endif /* HWLOC_PRIVATE_MISC_H */ #endif /* HWLOC_PRIVATE_MISC_H */

View file

@ -22,11 +22,12 @@
#ifndef HWLOC_PRIVATE_H #ifndef HWLOC_PRIVATE_H
#define HWLOC_PRIVATE_H #define HWLOC_PRIVATE_H
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/bitmap.h> #include "hwloc/bitmap.h"
#include <private/components.h> #include "private/components.h"
#include <private/misc.h> #include "private/misc.h"
#include <sys/types.h> #include <sys/types.h>
#ifdef HAVE_UNISTD_H #ifdef HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
@ -39,7 +40,7 @@
#endif #endif
#include <string.h> #include <string.h>
#define HWLOC_TOPOLOGY_ABI 0x20000 /* version of the layout of struct topology */ #define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */
/***************************************************** /*****************************************************
* WARNING: * WARNING:
@ -67,12 +68,13 @@ struct hwloc_topology {
void *adopted_shmem_addr; void *adopted_shmem_addr;
size_t adopted_shmem_length; size_t adopted_shmem_length;
#define HWLOC_NR_SLEVELS 5 #define HWLOC_NR_SLEVELS 6
#define HWLOC_SLEVEL_NUMANODE 0 #define HWLOC_SLEVEL_NUMANODE 0
#define HWLOC_SLEVEL_BRIDGE 1 #define HWLOC_SLEVEL_BRIDGE 1
#define HWLOC_SLEVEL_PCIDEV 2 #define HWLOC_SLEVEL_PCIDEV 2
#define HWLOC_SLEVEL_OSDEV 3 #define HWLOC_SLEVEL_OSDEV 3
#define HWLOC_SLEVEL_MISC 4 #define HWLOC_SLEVEL_MISC 4
#define HWLOC_SLEVEL_MEMCACHE 5
/* order must match negative depth, it's asserted in setup_defaults() */ /* order must match negative depth, it's asserted in setup_defaults() */
#define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) #define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x))
#define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) #define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x))
@ -86,6 +88,7 @@ struct hwloc_topology {
hwloc_bitmap_t allowed_nodeset; hwloc_bitmap_t allowed_nodeset;
struct hwloc_binding_hooks { struct hwloc_binding_hooks {
/* These are actually rather OS hooks since some of them are not about binding */
int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
@ -127,20 +130,35 @@ struct hwloc_topology {
int userdata_not_decoded; int userdata_not_decoded;
struct hwloc_internal_distances_s { struct hwloc_internal_distances_s {
hwloc_obj_type_t type; char *name; /* FIXME: needs an API to set it from user */
unsigned id; /* to match the container id field of public distances structure
* not exported to XML, regenerated during _add()
*/
/* if all objects have the same type, different_types is NULL and unique_type is valid.
* otherwise unique_type is HWLOC_OBJ_TYPE_NONE and different_types contains individual objects types.
*/
hwloc_obj_type_t unique_type;
hwloc_obj_type_t *different_types;
/* add union hwloc_obj_attr_u if we ever support groups */ /* add union hwloc_obj_attr_u if we ever support groups */
unsigned nbobjs; unsigned nbobjs;
uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */ uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs.
* OS indexes for distances covering only PUs or only NUMAnodes.
*/
#define HWLOC_DIST_TYPE_USE_OS_INDEX(_type) ((_type) == HWLOC_OBJ_PU || (_type == HWLOC_OBJ_NUMANODE))
uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array. uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array.
* distance from i to j is stored in slot i*nbnodes+j. * distance from i to j is stored in slot i*nbnodes+j.
*/ */
unsigned long kind; unsigned long kind;
#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */
unsigned iflags;
/* objects are currently stored in physical_index order */ /* objects are currently stored in physical_index order */
hwloc_obj_t *objs; /* array of objects */ hwloc_obj_t *objs; /* array of objects */
int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */
unsigned id; /* to match the container id field of public distances structure */
struct hwloc_internal_distances_s *prev, *next; struct hwloc_internal_distances_s *prev, *next;
} *first_dist, *last_dist; } *first_dist, *last_dist;
unsigned next_dist_id; unsigned next_dist_id;
@ -153,8 +171,9 @@ struct hwloc_topology {
/* list of enabled backends. */ /* list of enabled backends. */
struct hwloc_backend * backends; struct hwloc_backend * backends;
struct hwloc_backend * get_pci_busid_cpuset_backend; struct hwloc_backend * get_pci_busid_cpuset_backend; /* first backend that provides get_pci_busid_cpuset() callback */
unsigned backend_excludes; unsigned backend_phases;
unsigned backend_excluded_phases;
/* memory allocator for topology objects */ /* memory allocator for topology objects */
struct hwloc_tma * tma; struct hwloc_tma * tma;
@ -176,7 +195,6 @@ struct hwloc_topology {
struct hwloc_numanode_attr_s machine_memory; struct hwloc_numanode_attr_s machine_memory;
/* pci stuff */ /* pci stuff */
int need_pci_belowroot_apply_locality;
int pci_has_forced_locality; int pci_has_forced_locality;
unsigned pci_forced_locality_nr; unsigned pci_forced_locality_nr;
struct hwloc_pci_forced_locality_s { struct hwloc_pci_forced_locality_s {
@ -185,13 +203,32 @@ struct hwloc_topology {
hwloc_bitmap_t cpuset; hwloc_bitmap_t cpuset;
} * pci_forced_locality; } * pci_forced_locality;
/* component blacklisting */
unsigned nr_blacklisted_components;
struct hwloc_topology_forced_component_s {
struct hwloc_disc_component *component;
unsigned phases;
} *blacklisted_components;
/* FIXME: keep until topo destroy and reuse for finding specific buses */
struct hwloc_pci_locality_s {
unsigned domain;
unsigned bus_min;
unsigned bus_max;
hwloc_bitmap_t cpuset;
hwloc_obj_t parent;
struct hwloc_pci_locality_s *prev, *next;
} *first_pci_locality, *last_pci_locality;
}; };
extern void hwloc_alloc_root_sets(hwloc_obj_t root); extern void hwloc_alloc_root_sets(hwloc_obj_t root);
extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus); extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus);
extern int hwloc_get_sysctlbyname(const char *name, int64_t *n); extern int hwloc_get_sysctlbyname(const char *name, int64_t *n);
extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n); extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n);
extern int hwloc_fallback_nbprocessors(struct hwloc_topology *topology);
/* returns the number of CPU from the OS (only valid if thissystem) */
#define HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE 1 /* by default we try to get only the online CPUs */
extern int hwloc_fallback_nbprocessors(unsigned flags);
extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2); extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2);
extern void hwloc__reorder_children(hwloc_obj_t parent); extern void hwloc__reorder_children(hwloc_obj_t parent);
@ -208,19 +245,17 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology);
/* Look for an object matching the given domain/bus/func,
* either exactly or return the smallest container bridge
*/
extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/* Look for an object matching complete cpuset exactly, or insert one. /* Look for an object matching complete cpuset exactly, or insert one.
* Return NULL on failure. * Return NULL on failure.
* Return a good fallback (object above) on failure to insert. * Return a good fallback (object above) on failure to insert.
*/ */
extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset); extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset);
/* Move PCI objects currently attached to the root object ot their actual location.
* Called by the core at the end of hwloc_topology_load().
* Prior to this call, all PCI objects may be found below the root object.
* After this call and a reconnect of levels, all PCI objects are available through levels.
*/
extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology);
extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value);
extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace);
extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp);
@ -313,8 +348,8 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology);
extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology);
extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old);
extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology);
extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
/* encode src buffer into target buffer. /* encode src buffer into target buffer.
@ -330,13 +365,19 @@ extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *targe
*/ */
extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize); extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize);
/* Check whether needle matches the beginning of haystack, at least n, and up
* to a colon or \0 */
extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n);
/* On some systems, snprintf returns the size of written data, not the actually /* On some systems, snprintf returns the size of written data, not the actually
* required size. hwloc_snprintf always report the actually required size. */ * required size. Sometimes it returns -1 on truncation too.
* And sometimes it doesn't like NULL output buffers.
* http://www.gnu.org/software/gnulib/manual/html_node/snprintf.html
*
* hwloc_snprintf behaves properly, but it's a bit overkill on the vast majority
* of platforms, so don't enable it unless really needed.
*/
#ifdef HWLOC_HAVE_CORRECT_SNPRINTF
#define hwloc_snprintf snprintf
#else
extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4); extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4);
#endif
/* Return the name of the currently running program, if supported. /* Return the name of the currently running program, if supported.
* If not NULL, must be freed by the caller. * If not NULL, must be freed by the caller.
@ -356,7 +397,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
#define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */
#define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* no subkind */ #define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ #define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
/* then, OS-specific groups */ /* then, OS-specific groups */
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ #define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */

View file

@ -1,12 +1,12 @@
/* /*
* Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2017 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#ifndef PRIVATE_XML_H #ifndef PRIVATE_XML_H
#define PRIVATE_XML_H 1 #define PRIVATE_XML_H 1
#include <hwloc.h> #include "hwloc.h"
#include <sys/types.h> #include <sys/types.h>
@ -54,7 +54,6 @@ struct hwloc_xml_backend_data_s {
unsigned nbnumanodes; unsigned nbnumanodes;
hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */ hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */
struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist; struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist;
int dont_merge_die_groups;
}; };
/************** /**************

View file

@ -11,7 +11,7 @@
/* include hwloc's config before anything else /* include hwloc's config before anything else
* so that extensions and features are properly enabled * so that extensions and features are properly enabled
*/ */
#include <private/private.h> #include "private/private.h"
/* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */ /* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */

View file

@ -1,15 +1,16 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2009-2010, 2012 Université Bordeaux
* Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <hwloc/helper.h> #include "hwloc/helper.h"
#ifdef HAVE_SYS_MMAN_H #ifdef HAVE_SYS_MMAN_H
# include <sys/mman.h> # include <sys/mman.h>
#endif #endif
@ -885,6 +886,8 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology)
} else { } else {
/* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */ /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */
hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support); hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support);
/* Linux has some hooks that also work in this case, but they are not strictly needed yet. */
} }
/* if not is_thissystem, set_cpubind is fake /* if not is_thissystem, set_cpubind is fake

View file

@ -1,18 +1,18 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc/autogen/config.h> #include "hwloc/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/misc.h> #include "private/misc.h"
#include <private/private.h> #include "private/private.h"
#include <private/debug.h> #include "private/debug.h"
#include <hwloc/bitmap.h> #include "hwloc/bitmap.h"
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
@ -766,6 +766,21 @@ int hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned
return 0; return 0;
} }
int hwloc_bitmap_from_ulongs(struct hwloc_bitmap_s *set, unsigned nr, const unsigned long *masks)
{
unsigned j;
HWLOC__BITMAP_CHECK(set);
if (hwloc_bitmap_reset_by_ulongs(set, nr) < 0)
return -1;
for(j=0; j<nr; j++)
set->ulongs[j] = masks[j];
set->infinite = 0;
return 0;
}
unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set) unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set)
{ {
HWLOC__BITMAP_CHECK(set); HWLOC__BITMAP_CHECK(set);
@ -780,6 +795,30 @@ unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsign
return HWLOC_SUBBITMAP_READULONG(set, i); return HWLOC_SUBBITMAP_READULONG(set, i);
} }
int hwloc_bitmap_to_ulongs(const struct hwloc_bitmap_s *set, unsigned nr, unsigned long *masks)
{
unsigned j;
HWLOC__BITMAP_CHECK(set);
for(j=0; j<nr; j++)
masks[j] = HWLOC_SUBBITMAP_READULONG(set, j);
return 0;
}
int hwloc_bitmap_nr_ulongs(const struct hwloc_bitmap_s *set)
{
unsigned last;
HWLOC__BITMAP_CHECK(set);
if (set->infinite)
return -1;
last = hwloc_bitmap_last(set);
return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG;
}
int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu)
{ {
unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);

View file

@ -1,18 +1,19 @@
/* /*
* Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2012 Université Bordeaux * Copyright © 2012 Université Bordeaux
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <private/xml.h> #include "private/xml.h"
#include <private/misc.h> #include "private/misc.h"
#define HWLOC_COMPONENT_STOP_NAME "stop" #define HWLOC_COMPONENT_STOP_NAME "stop"
#define HWLOC_COMPONENT_EXCLUDE_CHAR '-' #define HWLOC_COMPONENT_EXCLUDE_CHAR '-'
#define HWLOC_COMPONENT_SEPS "," #define HWLOC_COMPONENT_SEPS ","
#define HWLOC_COMPONENT_PHASESEP_CHAR ':'
/* list of all registered discovery components, sorted by priority, higher priority first. /* list of all registered discovery components, sorted by priority, higher priority first.
* noos is last because its priority is 0. * noos is last because its priority is 0.
@ -232,17 +233,6 @@ hwloc_plugins_init(void)
#endif /* HWLOC_HAVE_PLUGINS */ #endif /* HWLOC_HAVE_PLUGINS */
static const char *
hwloc_disc_component_type_string(hwloc_disc_component_type_t type)
{
switch (type) {
case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu";
case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global";
case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc";
default: return "**unknown**";
}
}
static int static int
hwloc_disc_component_register(struct hwloc_disc_component *component, hwloc_disc_component_register(struct hwloc_disc_component *component,
const char *filename) const char *filename)
@ -256,21 +246,26 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
return -1; return -1;
} }
if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR)
|| strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR)
|| strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); component->name, HWLOC_COMPONENT_EXCLUDE_CHAR);
return -1; return -1;
} }
/* check that the component type is valid */
switch ((unsigned) component->type) { /* check that the component phases are valid */
case HWLOC_DISC_COMPONENT_TYPE_CPU: if (!component->phases
case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: || (component->phases != HWLOC_DISC_PHASE_GLOBAL
case HWLOC_DISC_COMPONENT_TYPE_MISC: && component->phases & ~(HWLOC_DISC_PHASE_CPU
break; |HWLOC_DISC_PHASE_MEMORY
default: |HWLOC_DISC_PHASE_PCI
fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n", |HWLOC_DISC_PHASE_IO
component->name, (unsigned) component->type); |HWLOC_DISC_PHASE_MISC
|HWLOC_DISC_PHASE_ANNOTATE
|HWLOC_DISC_PHASE_TWEAK))) {
fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n",
component->name, component->phases);
return -1; return -1;
} }
@ -295,8 +290,8 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
prev = &((*prev)->next); prev = &((*prev)->next);
} }
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n", fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n",
hwloc_disc_component_type_string(component->type), component->name, component->priority, component->name, component->phases, component->priority,
filename ? "from plugin " : "statically build", filename ? filename : ""); filename ? "from plugin " : "statically build", filename ? filename : "");
prev = &hwloc_disc_components; prev = &hwloc_disc_components;
@ -310,7 +305,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
return 0; return 0;
} }
#include <static-components.h> #include "static-components.h"
static void (**hwloc_component_finalize_cbs)(unsigned long); static void (**hwloc_component_finalize_cbs)(unsigned long);
static unsigned hwloc_component_finalize_cb_count; static unsigned hwloc_component_finalize_cb_count;
@ -415,31 +410,152 @@ hwloc_components_init(void)
} }
void void
hwloc_backends_init(struct hwloc_topology *topology) hwloc_topology_components_init(struct hwloc_topology *topology)
{ {
topology->nr_blacklisted_components = 0;
topology->blacklisted_components = NULL;
topology->backends = NULL; topology->backends = NULL;
topology->backend_excludes = 0; topology->backend_phases = 0;
topology->backend_excluded_phases = 0;
} }
/* look for name among components, ignoring things after `:' */
static struct hwloc_disc_component * static struct hwloc_disc_component *
hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */, hwloc_disc_component_find(const char *name, const char **endp)
const char *name /* name of NULL if any */)
{ {
struct hwloc_disc_component *comp = hwloc_disc_components; struct hwloc_disc_component *comp;
size_t length;
const char *end = strchr(name, HWLOC_COMPONENT_PHASESEP_CHAR);
if (end) {
length = end-name;
if (endp)
*endp = end+1;
} else {
length = strlen(name);
if (endp)
*endp = NULL;
}
comp = hwloc_disc_components;
while (NULL != comp) { while (NULL != comp) {
if ((-1 == type || type == (int) comp->type) if (!strncmp(name, comp->name, length))
&& (NULL == name || !strcmp(name, comp->name)))
return comp; return comp;
comp = comp->next; comp = comp->next;
} }
return NULL; return NULL;
} }
static unsigned
hwloc_phases_from_string(const char *s)
{
if (!s)
return ~0U;
if (s[0]<'0' || s[0]>'9') {
if (!strcasecmp(s, "global"))
return HWLOC_DISC_PHASE_GLOBAL;
else if (!strcasecmp(s, "cpu"))
return HWLOC_DISC_PHASE_CPU;
if (!strcasecmp(s, "memory"))
return HWLOC_DISC_PHASE_MEMORY;
if (!strcasecmp(s, "pci"))
return HWLOC_DISC_PHASE_PCI;
if (!strcasecmp(s, "io"))
return HWLOC_DISC_PHASE_IO;
if (!strcasecmp(s, "misc"))
return HWLOC_DISC_PHASE_MISC;
if (!strcasecmp(s, "annotate"))
return HWLOC_DISC_PHASE_ANNOTATE;
if (!strcasecmp(s, "tweak"))
return HWLOC_DISC_PHASE_TWEAK;
return 0;
}
return (unsigned) strtoul(s, NULL, 0);
}
static int
hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
const char *name)
{
struct hwloc_topology_forced_component_s *blacklisted;
struct hwloc_disc_component *comp;
unsigned phases;
unsigned i;
if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) {
/* replace linuxpci and linuxio with linux (with IO phases)
* for backward compatibility with pre-v2.0 and v2.0 respectively */
if (hwloc_components_verbose)
fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name);
comp = hwloc_disc_component_find("linux", NULL);
phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE;
} else {
/* normal lookup */
const char *end;
comp = hwloc_disc_component_find(name, &end);
phases = hwloc_phases_from_string(end);
}
if (!comp) {
errno = EINVAL;
return -1;
}
if (hwloc_components_verbose)
fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases);
for(i=0; i<topology->nr_blacklisted_components; i++) {
if (topology->blacklisted_components[i].component == comp) {
topology->blacklisted_components[i].phases |= phases;
return 0;
}
}
blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted));
if (!blacklisted)
return -1;
blacklisted[topology->nr_blacklisted_components].component = comp;
blacklisted[topology->nr_blacklisted_components].phases = phases;
topology->blacklisted_components = blacklisted;
topology->nr_blacklisted_components++;
return 0;
}
int
hwloc_topology_set_components(struct hwloc_topology *topology,
unsigned long flags,
const char *name)
{
if (topology->is_loaded) {
errno = EBUSY;
return -1;
}
if (flags & ~HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) {
errno = EINVAL;
return -1;
}
/* this flag is strictly required for now */
if (flags != HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) {
errno = EINVAL;
return -1;
}
if (!strncmp(name, "all", 3) && name[3] == HWLOC_COMPONENT_PHASESEP_CHAR) {
topology->backend_excluded_phases = hwloc_phases_from_string(name+4);
return 0;
}
return hwloc_disc_component_blacklist_one(topology, name);
}
/* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */ /* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */
int int
hwloc_disc_component_force_enable(struct hwloc_topology *topology, hwloc_disc_component_force_enable(struct hwloc_topology *topology,
int envvar_forced, int envvar_forced,
int type, const char *name, const char *name,
const void *data1, const void *data2, const void *data3) const void *data1, const void *data2, const void *data3)
{ {
struct hwloc_disc_component *comp; struct hwloc_disc_component *comp;
@ -450,18 +566,28 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology,
return -1; return -1;
} }
comp = hwloc_disc_component_find(type, name); comp = hwloc_disc_component_find(name, NULL);
if (!comp) { if (!comp) {
errno = ENOSYS; errno = ENOSYS;
return -1; return -1;
} }
backend = comp->instantiate(comp, data1, data2, data3); backend = comp->instantiate(topology, comp, 0U /* force-enabled don't get any phase blacklisting */,
data1, data2, data3);
if (backend) { if (backend) {
int err;
backend->envvar_forced = envvar_forced; backend->envvar_forced = envvar_forced;
if (topology->backends) if (topology->backends)
hwloc_backends_disable_all(topology); hwloc_backends_disable_all(topology);
return hwloc_backend_enable(topology, backend); err = hwloc_backend_enable(backend);
if (comp->phases == HWLOC_DISC_PHASE_GLOBAL) {
char *env = getenv("HWLOC_ANNOTATE_GLOBAL_COMPONENTS");
if (env && atoi(env))
topology->backend_excluded_phases &= ~HWLOC_DISC_PHASE_ANNOTATE;
}
return err;
} else } else
return -1; return -1;
} }
@ -469,29 +595,32 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology,
static int static int
hwloc_disc_component_try_enable(struct hwloc_topology *topology, hwloc_disc_component_try_enable(struct hwloc_topology *topology,
struct hwloc_disc_component *comp, struct hwloc_disc_component *comp,
const char *comparg, int envvar_forced,
int envvar_forced) unsigned blacklisted_phases)
{ {
struct hwloc_backend *backend; struct hwloc_backend *backend;
if (topology->backend_excludes & comp->type) { if (!(comp->phases & ~(topology->backend_excluded_phases | blacklisted_phases))) {
/* all this backend phases are already excluded, exclude the backend entirely */
if (hwloc_components_verbose) if (hwloc_components_verbose)
/* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc.
*/ */
fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n", fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n",
hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes); comp->name, comp->phases, topology->backend_excluded_phases);
return -1; return -1;
} }
backend = comp->instantiate(comp, comparg, NULL, NULL); backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases,
NULL, NULL, NULL);
if (!backend) { if (!backend) {
if (hwloc_components_verbose || envvar_forced) if (hwloc_components_verbose || envvar_forced)
fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name);
return -1; return -1;
} }
backend->phases &= ~blacklisted_phases;
backend->envvar_forced = envvar_forced; backend->envvar_forced = envvar_forced;
return hwloc_backend_enable(topology, backend); return hwloc_backend_enable(backend);
} }
void void
@ -502,11 +631,12 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
int tryall = 1; int tryall = 1;
const char *_env; const char *_env;
char *env; /* we'll to modify the env value, so duplicate it */ char *env; /* we'll to modify the env value, so duplicate it */
unsigned i;
_env = getenv("HWLOC_COMPONENTS"); _env = getenv("HWLOC_COMPONENTS");
env = _env ? strdup(_env) : NULL; env = _env ? strdup(_env) : NULL;
/* enable explicitly listed components */ /* blacklist disabled components */
if (env) { if (env) {
char *curenv = env; char *curenv = env;
size_t s; size_t s;
@ -516,21 +646,41 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
if (s) { if (s) {
char c; char c;
/* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */ if (curenv[0] != HWLOC_COMPONENT_EXCLUDE_CHAR)
if (!strncmp(curenv, "linuxpci", 8) && s == 8) {
curenv[5] = 'i';
curenv[6] = 'o';
curenv[7] = *HWLOC_COMPONENT_SEPS;
} else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) {
curenv[6] = 'i';
curenv[7] = 'o';
curenv[8] = *HWLOC_COMPONENT_SEPS;
/* skip this name, it's a negated one */
goto nextname; goto nextname;
}
if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR) /* save the last char and replace with \0 */
goto nextname; c = curenv[s];
curenv[s] = '\0';
/* blacklist it, and just ignore failures to allocate */
hwloc_disc_component_blacklist_one(topology, curenv+1);
/* remove that blacklisted name from the string */
for(i=0; i<s; i++)
curenv[i] = *HWLOC_COMPONENT_SEPS;
/* restore chars (the second loop below needs env to be unmodified) */
curenv[s] = c;
}
nextname:
curenv += s;
if (*curenv)
/* Skip comma */
curenv++;
}
}
/* enable explicitly listed components */
if (env) {
char *curenv = env;
size_t s;
while (*curenv) {
s = strcspn(curenv, HWLOC_COMPONENT_SEPS);
if (s) {
char c, *name;
if (!strncmp(curenv, HWLOC_COMPONENT_STOP_NAME, s)) { if (!strncmp(curenv, HWLOC_COMPONENT_STOP_NAME, s)) {
tryall = 0; tryall = 0;
@ -541,18 +691,31 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
c = curenv[s]; c = curenv[s];
curenv[s] = '\0'; curenv[s] = '\0';
comp = hwloc_disc_component_find(-1, curenv); name = curenv;
if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) {
if (hwloc_components_verbose)
fprintf(stderr, "Replacing deprecated component `%s' with `linux' in envvar forcing\n", name);
name = "linux";
}
comp = hwloc_disc_component_find(name, NULL /* we enable the entire component, phases must be blacklisted separately */);
if (comp) { if (comp) {
hwloc_disc_component_try_enable(topology, comp, NULL, 1 /* envvar forced */); unsigned blacklisted_phases = 0U;
for(i=0; i<topology->nr_blacklisted_components; i++)
if (comp == topology->blacklisted_components[i].component) {
blacklisted_phases = topology->blacklisted_components[i].phases;
break;
}
if (comp->phases & ~blacklisted_phases)
hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases);
} else { } else {
fprintf(stderr, "Cannot find discovery component `%s'\n", curenv); fprintf(stderr, "Cannot find discovery component `%s'\n", name);
} }
/* restore chars (the second loop below needs env to be unmodified) */ /* restore chars (the second loop below needs env to be unmodified) */
curenv[s] = c; curenv[s] = c;
} }
nextname:
curenv += s; curenv += s;
if (*curenv) if (*curenv)
/* Skip comma */ /* Skip comma */
@ -566,26 +729,24 @@ nextname:
if (tryall) { if (tryall) {
comp = hwloc_disc_components; comp = hwloc_disc_components;
while (NULL != comp) { while (NULL != comp) {
unsigned blacklisted_phases = 0U;
if (!comp->enabled_by_default) if (!comp->enabled_by_default)
goto nextcomp; goto nextcomp;
/* check if this component was explicitly excluded in env */ /* check if this component was blacklisted by the application */
if (env) { for(i=0; i<topology->nr_blacklisted_components; i++)
char *curenv = env; if (comp == topology->blacklisted_components[i].component) {
while (*curenv) { blacklisted_phases = topology->blacklisted_components[i].phases;
size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); break;
if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) {
if (hwloc_components_verbose)
fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n",
hwloc_disc_component_type_string(comp->type), comp->name);
goto nextcomp;
}
curenv += s;
if (*curenv)
/* Skip comma */
curenv++;
} }
if (!(comp->phases & ~blacklisted_phases)) {
if (hwloc_components_verbose)
fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n",
comp->name, comp->phases);
goto nextcomp;
} }
hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */);
hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */, blacklisted_phases);
nextcomp: nextcomp:
comp = comp->next; comp = comp->next;
} }
@ -597,7 +758,7 @@ nextcomp:
backend = topology->backends; backend = topology->backends;
fprintf(stderr, "Final list of enabled discovery components: "); fprintf(stderr, "Final list of enabled discovery components: ");
while (backend != NULL) { while (backend != NULL) {
fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name); fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases);
backend = backend->next; backend = backend->next;
first = 0; first = 0;
} }
@ -638,7 +799,8 @@ hwloc_components_fini(void)
} }
struct hwloc_backend * struct hwloc_backend *
hwloc_backend_alloc(struct hwloc_disc_component *component) hwloc_backend_alloc(struct hwloc_topology *topology,
struct hwloc_disc_component *component)
{ {
struct hwloc_backend * backend = malloc(sizeof(*backend)); struct hwloc_backend * backend = malloc(sizeof(*backend));
if (!backend) { if (!backend) {
@ -646,6 +808,12 @@ hwloc_backend_alloc(struct hwloc_disc_component *component)
return NULL; return NULL;
} }
backend->component = component; backend->component = component;
backend->topology = topology;
/* filter-out component phases that are excluded */
backend->phases = component->phases & ~topology->backend_excluded_phases;
if (backend->phases != component->phases && hwloc_components_verbose)
fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n",
component->name, backend->phases, component->phases);
backend->flags = 0; backend->flags = 0;
backend->discover = NULL; backend->discover = NULL;
backend->get_pci_busid_cpuset = NULL; backend->get_pci_busid_cpuset = NULL;
@ -665,14 +833,15 @@ hwloc_backend_disable(struct hwloc_backend *backend)
} }
int int
hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend) hwloc_backend_enable(struct hwloc_backend *backend)
{ {
struct hwloc_topology *topology = backend->topology;
struct hwloc_backend **pprev; struct hwloc_backend **pprev;
/* check backend flags */ /* check backend flags */
if (backend->flags) { if (backend->flags) {
fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n", fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n",
hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags); backend->component->name, backend->component->phases, backend->flags);
return -1; return -1;
} }
@ -681,8 +850,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
while (NULL != *pprev) { while (NULL != *pprev) {
if ((*pprev)->component == backend->component) { if ((*pprev)->component == backend->component) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n", fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n",
hwloc_disc_component_type_string(backend->component->type), backend->component->name); backend->component->name, backend->component->phases);
hwloc_backend_disable(backend); hwloc_backend_disable(backend);
errno = EBUSY; errno = EBUSY;
return -1; return -1;
@ -691,8 +860,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
} }
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Enabling %s discovery component `%s'\n", fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n",
hwloc_disc_component_type_string(backend->component->type), backend->component->name); backend->component->name, backend->phases, backend->component->phases);
/* enqueue at the end */ /* enqueue at the end */
pprev = &topology->backends; pprev = &topology->backends;
@ -701,8 +870,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back
backend->next = *pprev; backend->next = *pprev;
*pprev = backend; *pprev = backend;
backend->topology = topology; topology->backend_phases |= backend->component->phases;
topology->backend_excludes |= backend->component->excludes; topology->backend_excluded_phases |= backend->component->excluded_phases;
return 0; return 0;
} }
@ -712,7 +881,7 @@ hwloc_backends_is_thissystem(struct hwloc_topology *topology)
struct hwloc_backend *backend; struct hwloc_backend *backend;
const char *local_env; const char *local_env;
/* Apply is_thissystem topology flag before we enforce envvar backends. /*
* If the application changed the backend with set_foo(), * If the application changed the backend with set_foo(),
* it may use set_flags() update the is_thissystem flag here. * it may use set_flags() update the is_thissystem flag here.
* If it changes the backend with environment variables below, * If it changes the backend with environment variables below,
@ -775,11 +944,20 @@ hwloc_backends_disable_all(struct hwloc_topology *topology)
while (NULL != (backend = topology->backends)) { while (NULL != (backend = topology->backends)) {
struct hwloc_backend *next = backend->next; struct hwloc_backend *next = backend->next;
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Disabling %s discovery component `%s'\n", fprintf(stderr, "Disabling discovery component `%s'\n",
hwloc_disc_component_type_string(backend->component->type), backend->component->name); backend->component->name);
hwloc_backend_disable(backend); hwloc_backend_disable(backend);
topology->backends = next; topology->backends = next;
} }
topology->backends = NULL; topology->backends = NULL;
topology->backend_excludes = 0; topology->backend_excluded_phases = 0;
}
void
hwloc_topology_components_fini(struct hwloc_topology *topology)
{
/* hwloc_backends_disable_all() must have been called earlier */
assert(!topology->backends);
free(topology->blacklisted_components);
} }

View file

@ -1,11 +1,11 @@
/* /*
* Copyright © 2013-2018 Inria. All rights reserved. * Copyright © 2013-2019 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff) int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff)
{ {
@ -351,7 +351,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
err = 1; err = 1;
break; break;
} }
if (dist1->type != dist2->type if (dist1->unique_type != dist2->unique_type
|| dist1->different_types || dist2->different_types /* too lazy to support this case */
|| dist1->nbobjs != dist2->nbobjs || dist1->nbobjs != dist2->nbobjs
|| dist1->kind != dist2->kind || dist1->kind != dist2->kind
|| memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) {
@ -463,6 +464,10 @@ int hwloc_topology_diff_apply(hwloc_topology_t topology,
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
if (topology->adopted_shmem_addr) {
errno = EPERM;
return -1;
}
if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) { if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) {
errno = EINVAL; errno = EINVAL;

View file

@ -1,19 +1,22 @@
/* /*
* Copyright © 2010-2018 Inria. All rights reserved. * Copyright © 2010-2019 Inria. All rights reserved.
* Copyright © 2011-2012 Université Bordeaux * Copyright © 2011-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <private/debug.h> #include "private/debug.h"
#include <private/misc.h> #include "private/misc.h"
#include <float.h> #include <float.h>
#include <math.h> #include <math.h>
static struct hwloc_internal_distances_s *
hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances);
/****************************************************** /******************************************************
* Global init, prepare, destroy, dup * Global init, prepare, destroy, dup
*/ */
@ -70,6 +73,8 @@ void hwloc_internal_distances_prepare(struct hwloc_topology *topology)
static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist) static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist)
{ {
free(dist->name);
free(dist->different_types);
free(dist->indexes); free(dist->indexes);
free(dist->objs); free(dist->objs);
free(dist->values); free(dist->values);
@ -96,15 +101,35 @@ static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct h
newdist = hwloc_tma_malloc(tma, sizeof(*newdist)); newdist = hwloc_tma_malloc(tma, sizeof(*newdist));
if (!newdist) if (!newdist)
return -1; return -1;
if (olddist->name) {
newdist->name = hwloc_tma_strdup(tma, olddist->name);
if (!newdist->name) {
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
hwloc_internal_distances_free(newdist);
return -1;
}
} else {
newdist->name = NULL;
}
newdist->type = olddist->type; if (olddist->different_types) {
newdist->different_types = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->different_types));
if (!newdist->different_types) {
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
hwloc_internal_distances_free(newdist);
return -1;
}
memcpy(newdist->different_types, olddist->different_types, nbobjs * sizeof(*newdist->different_types));
} else
newdist->different_types = NULL;
newdist->unique_type = olddist->unique_type;
newdist->nbobjs = nbobjs; newdist->nbobjs = nbobjs;
newdist->kind = olddist->kind; newdist->kind = olddist->kind;
newdist->id = olddist->id; newdist->id = olddist->id;
newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes)); newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes));
newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs)); newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs));
newdist->objs_are_valid = 0; newdist->iflags = olddist->iflags & ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; /* must be revalidated after dup() */
newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values)); newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values));
if (!newdist->indexes || !newdist->objs || !newdist->values) { if (!newdist->indexes || !newdist->objs || !newdist->values) {
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
@ -150,6 +175,10 @@ int hwloc_distances_remove(hwloc_topology_t topology)
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
if (topology->adopted_shmem_addr) {
errno = EPERM;
return -1;
}
hwloc_internal_distances_destroy(topology); hwloc_internal_distances_destroy(topology);
return 0; return 0;
} }
@ -163,6 +192,10 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
if (topology->adopted_shmem_addr) {
errno = EPERM;
return -1;
}
/* switch back to types since we don't support groups for now */ /* switch back to types since we don't support groups for now */
type = hwloc_get_depth_type(topology, depth); type = hwloc_get_depth_type(topology, depth);
@ -174,7 +207,7 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
next = topology->first_dist; next = topology->first_dist;
while ((dist = next) != NULL) { while ((dist = next) != NULL) {
next = dist->next; next = dist->next;
if (dist->type == type) { if (dist->unique_type == type) {
if (next) if (next)
next->prev = dist->prev; next->prev = dist->prev;
else else
@ -190,6 +223,27 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
return 0; return 0;
} }
int hwloc_distances_release_remove(hwloc_topology_t topology,
struct hwloc_distances_s *distances)
{
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
if (!dist) {
errno = EINVAL;
return -1;
}
if (dist->prev)
dist->prev->next = dist->next;
else
topology->first_dist = dist->next;
if (dist->next)
dist->next->prev = dist->prev;
else
topology->last_dist = dist->prev;
hwloc_internal_distances_free(dist);
hwloc_distances_release(topology, distances);
return 0;
}
/****************************************************** /******************************************************
* Add distances to the topology * Add distances to the topology
*/ */
@ -201,17 +255,34 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, str
* the caller gives us the distances and objs pointers, we'll free them later. * the caller gives us the distances and objs pointers, we'll free them later.
*/ */
static int static int
hwloc_internal_distances__add(hwloc_topology_t topology, hwloc_internal_distances__add(hwloc_topology_t topology, const char *name,
hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types,
unsigned long kind) unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
unsigned long kind, unsigned iflags)
{ {
struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist)); struct hwloc_internal_distances_s *dist;
if (different_types) {
kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */
} else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) {
errno = EINVAL;
goto err;
}
dist = calloc(1, sizeof(*dist));
if (!dist) if (!dist)
goto err; goto err;
dist->type = type; if (name)
dist->name = strdup(name); /* ignore failure */
dist->unique_type = unique_type;
dist->different_types = different_types;
dist->nbobjs = nbobjs; dist->nbobjs = nbobjs;
dist->kind = kind; dist->kind = kind;
dist->iflags = iflags;
assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs);
if (!objs) { if (!objs) {
assert(indexes); assert(indexes);
@ -220,18 +291,16 @@ hwloc_internal_distances__add(hwloc_topology_t topology,
dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
if (!dist->objs) if (!dist->objs)
goto err_with_dist; goto err_with_dist;
dist->objs_are_valid = 0;
} else { } else {
unsigned i; unsigned i;
assert(!indexes); assert(!indexes);
/* we only have objs, generate the indexes arrays so that we can refresh objs later */ /* we only have objs, generate the indexes arrays so that we can refresh objs later */
dist->objs = objs; dist->objs = objs;
dist->objs_are_valid = 1;
dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
if (!dist->indexes) if (!dist->indexes)
goto err_with_dist; goto err_with_dist;
if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) { if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
for(i=0; i<nbobjs; i++) for(i=0; i<nbobjs; i++)
dist->indexes[i] = objs[i]->os_index; dist->indexes[i] = objs[i]->os_index;
} else { } else {
@ -256,16 +325,19 @@ hwloc_internal_distances__add(hwloc_topology_t topology,
err_with_dist: err_with_dist:
free(dist); free(dist);
err: err:
free(different_types);
free(objs); free(objs);
free(indexes); free(indexes);
free(values); free(values);
return -1; return -1;
} }
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name,
hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
unsigned long kind, unsigned long flags) unsigned long kind, unsigned long flags)
{ {
unsigned iflags = 0; /* objs not valid */
if (nbobjs < 2) { if (nbobjs < 2) {
errno = EINVAL; errno = EINVAL;
goto err; goto err;
@ -279,24 +351,71 @@ int hwloc_internal_distances_add_by_index(hwloc_topology_t topology,
goto err; goto err;
} }
return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind); return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags);
err: err:
free(indexes); free(indexes);
free(values); free(values);
free(different_types);
return -1; return -1;
} }
int hwloc_internal_distances_add(hwloc_topology_t topology, static void
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
uint64_t *indexes,
uint64_t *values,
unsigned nbobjs, unsigned disappeared);
int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
unsigned long kind, unsigned long flags) unsigned long kind, unsigned long flags)
{ {
hwloc_obj_type_t unique_type, *different_types;
unsigned i, disappeared = 0;
unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
if (nbobjs < 2) { if (nbobjs < 2) {
errno = EINVAL; errno = EINVAL;
goto err; goto err;
} }
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) { /* is there any NULL object? (useful in case of problem during insert in backends) */
for(i=0; i<nbobjs; i++)
if (!objs[i])
disappeared++;
if (disappeared) {
/* some objects are NULL */
if (disappeared == nbobjs) {
/* nothing left, drop the matrix */
free(objs);
free(values);
return 0;
}
/* restrict the matrix */
hwloc_internal_distances_restrict(objs, NULL, values, nbobjs, disappeared);
nbobjs -= disappeared;
}
unique_type = objs[0]->type;
for(i=1; i<nbobjs; i++)
if (objs[i]->type != unique_type) {
unique_type = HWLOC_OBJ_TYPE_NONE;
break;
}
if (unique_type == HWLOC_OBJ_TYPE_NONE) {
/* heterogeneous types */
different_types = malloc(nbobjs * sizeof(*different_types));
if (!different_types)
goto err;
for(i=0; i<nbobjs; i++)
different_types[i] = objs[i]->type;
} else {
/* homogeneous types */
different_types = NULL;
}
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) {
float full_accuracy = 0.f; float full_accuracy = 0.f;
float *accuracies; float *accuracies;
unsigned nbaccuracies; unsigned nbaccuracies;
@ -310,8 +429,8 @@ int hwloc_internal_distances_add(hwloc_topology_t topology,
} }
if (topology->grouping_verbose) { if (topology->grouping_verbose) {
unsigned i, j; unsigned j;
int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU); int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type);
fprintf(stderr, "Trying to group objects using distance matrix:\n"); fprintf(stderr, "Trying to group objects using distance matrix:\n");
fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
for(j=0; j<nbobjs; j++) for(j=0; j<nbobjs; j++)
@ -329,7 +448,7 @@ int hwloc_internal_distances_add(hwloc_topology_t topology,
kind, nbaccuracies, accuracies, 1 /* check the first matrice */); kind, nbaccuracies, accuracies, 1 /* check the first matrice */);
} }
return hwloc_internal_distances__add(topology, objs[0]->type, nbobjs, objs, NULL, values, kind); return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags);
err: err:
free(objs); free(objs);
@ -348,7 +467,6 @@ int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
unsigned long kind, unsigned long flags) unsigned long kind, unsigned long flags)
{ {
hwloc_obj_type_t type;
unsigned i; unsigned i;
uint64_t *_values; uint64_t *_values;
hwloc_obj_t *_objs; hwloc_obj_t *_objs;
@ -358,6 +476,10 @@ int hwloc_distances_add(hwloc_topology_t topology,
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
if (topology->adopted_shmem_addr) {
errno = EPERM;
return -1;
}
if ((kind & ~HWLOC_DISTANCES_KIND_ALL) if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1 || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
@ -368,15 +490,8 @@ int hwloc_distances_add(hwloc_topology_t topology,
/* no strict need to check for duplicates, things shouldn't break */ /* no strict need to check for duplicates, things shouldn't break */
type = objs[0]->type;
if (type == HWLOC_OBJ_GROUP) {
/* not supported yet, would require we save the subkind together with the type. */
errno = EINVAL;
return -1;
}
for(i=1; i<nbobjs; i++) for(i=1; i<nbobjs; i++)
if (!objs[i] || objs[i]->type != type) { if (!objs[i]) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
@ -389,7 +504,7 @@ int hwloc_distances_add(hwloc_topology_t topology,
memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags); err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags);
if (err < 0) if (err < 0)
goto out; /* _objs and _values freed in hwloc_internal_distances_add() */ goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
@ -409,9 +524,9 @@ int hwloc_distances_add(hwloc_topology_t topology,
* Refresh objects in distances * Refresh objects in distances
*/ */
static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index)
{ {
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0); hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
while (obj) { while (obj) {
if (obj->gp_index == gp_index) if (obj->gp_index == gp_index)
return obj; return obj;
@ -420,12 +535,31 @@ static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology
return NULL; return NULL;
} }
static void static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, {
hwloc_obj_t *objs, int depth = hwloc_get_type_depth(topology, type);
unsigned disappeared) if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
return NULL;
if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
int topodepth = hwloc_topology_get_depth(topology);
for(depth=0; depth<topodepth; depth++) {
if (hwloc_get_depth_type(topology, depth) == type) {
hwloc_obj_t obj = hwloc_find_obj_by_depth_and_gp_index(topology, depth, gp_index);
if (obj)
return obj;
}
}
return NULL;
}
return hwloc_find_obj_by_depth_and_gp_index(topology, depth, gp_index);
}
static void
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
uint64_t *indexes,
uint64_t *values,
unsigned nbobjs, unsigned disappeared)
{ {
unsigned nbobjs = dist->nbobjs;
unsigned i, newi; unsigned i, newi;
unsigned j, newj; unsigned j, newj;
@ -433,7 +567,7 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
if (objs[i]) { if (objs[i]) {
for(j=0, newj=0; j<nbobjs; j++) for(j=0, newj=0; j<nbobjs; j++)
if (objs[j]) { if (objs[j]) {
dist->values[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j]; values[newi*(nbobjs-disappeared)+newj] = values[i*nbobjs+j];
newj++; newj++;
} }
newi++; newi++;
@ -442,25 +576,25 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
for(i=0, newi=0; i<nbobjs; i++) for(i=0, newi=0; i<nbobjs; i++)
if (objs[i]) { if (objs[i]) {
objs[newi] = objs[i]; objs[newi] = objs[i];
dist->indexes[newi] = dist->indexes[i]; if (indexes)
indexes[newi] = indexes[i];
newi++; newi++;
} }
dist->nbobjs -= disappeared;
} }
static int static int
hwloc_internal_distances_refresh_one(hwloc_topology_t topology, hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
struct hwloc_internal_distances_s *dist) struct hwloc_internal_distances_s *dist)
{ {
hwloc_obj_type_t type = dist->type; hwloc_obj_type_t unique_type = dist->unique_type;
hwloc_obj_type_t *different_types = dist->different_types;
unsigned nbobjs = dist->nbobjs; unsigned nbobjs = dist->nbobjs;
hwloc_obj_t *objs = dist->objs; hwloc_obj_t *objs = dist->objs;
uint64_t *indexes = dist->indexes; uint64_t *indexes = dist->indexes;
unsigned disappeared = 0; unsigned disappeared = 0;
unsigned i; unsigned i;
if (dist->objs_are_valid) if (dist->iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID)
return 0; return 0;
for(i=0; i<nbobjs; i++) { for(i=0; i<nbobjs; i++) {
@ -468,12 +602,16 @@ hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
/* TODO use cpuset/nodeset to find pus/numas from the root? /* TODO use cpuset/nodeset to find pus/numas from the root?
* faster than traversing the entire level? * faster than traversing the entire level?
*/ */
if (type == HWLOC_OBJ_PU) if (HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type)) {
obj = hwloc_get_pu_obj_by_os_index(topology, (unsigned) indexes[i]); if (unique_type == HWLOC_OBJ_PU)
else if (type == HWLOC_OBJ_NUMANODE) obj = hwloc_get_pu_obj_by_os_index(topology, (unsigned) indexes[i]);
obj = hwloc_get_numanode_obj_by_os_index(topology, (unsigned) indexes[i]); else if (unique_type == HWLOC_OBJ_NUMANODE)
else obj = hwloc_get_numanode_obj_by_os_index(topology, (unsigned) indexes[i]);
obj = hwloc_find_obj_by_type_and_gp_index(topology, type, indexes[i]); else
abort();
} else {
obj = hwloc_find_obj_by_type_and_gp_index(topology, different_types ? different_types[i] : unique_type, indexes[i]);
}
objs[i] = obj; objs[i] = obj;
if (!obj) if (!obj)
disappeared++; disappeared++;
@ -483,10 +621,12 @@ hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
/* became useless, drop */ /* became useless, drop */
return -1; return -1;
if (disappeared) if (disappeared) {
hwloc_internal_distances_restrict(dist, objs, disappeared); hwloc_internal_distances_restrict(objs, dist->indexes, dist->values, nbobjs, disappeared);
dist->nbobjs -= disappeared;
}
dist->objs_are_valid = 1; dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
return 0; return 0;
} }
@ -520,32 +660,64 @@ hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology)
{ {
struct hwloc_internal_distances_s *dist; struct hwloc_internal_distances_s *dist;
for(dist = topology->first_dist; dist; dist = dist->next) for(dist = topology->first_dist; dist; dist = dist->next)
dist->objs_are_valid = 0; dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
} }
/****************************************************** /******************************************************
* User API for getting distances * User API for getting distances
*/ */
/* what we actually allocate for user queries, even if we only
* return the distances part of it.
*/
struct hwloc_distances_container_s {
unsigned id;
struct hwloc_distances_s distances;
};
#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL)
#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET )
static struct hwloc_internal_distances_s *
hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances)
{
struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances);
struct hwloc_internal_distances_s *dist;
for(dist = topology->first_dist; dist; dist = dist->next)
if (dist->id == cont->id)
return dist;
return NULL;
}
void void
hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused,
struct hwloc_distances_s *distances) struct hwloc_distances_s *distances)
{ {
struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances);
free(distances->values); free(distances->values);
free(distances->objs); free(distances->objs);
free(distances); free(cont);
}
const char *
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances)
{
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
return dist ? dist->name : NULL;
} }
static struct hwloc_distances_s * static struct hwloc_distances_s *
hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
struct hwloc_internal_distances_s *dist) struct hwloc_internal_distances_s *dist)
{ {
struct hwloc_distances_container_s *cont;
struct hwloc_distances_s *distances; struct hwloc_distances_s *distances;
unsigned nbobjs; unsigned nbobjs;
distances = malloc(sizeof(*distances)); cont = malloc(sizeof(*cont));
if (!distances) if (!cont)
return NULL; return NULL;
distances = &cont->distances;
nbobjs = distances->nbobjs = dist->nbobjs; nbobjs = distances->nbobjs = dist->nbobjs;
@ -560,18 +732,20 @@ hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values)); memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values));
distances->kind = dist->kind; distances->kind = dist->kind;
cont->id = dist->id;
return distances; return distances;
out_with_objs: out_with_objs:
free(distances->objs); free(distances->objs);
out: out:
free(distances); free(cont);
return NULL; return NULL;
} }
static int static int
hwloc__distances_get(hwloc_topology_t topology, hwloc__distances_get(hwloc_topology_t topology,
hwloc_obj_type_t type, const char *name, hwloc_obj_type_t type,
unsigned *nrp, struct hwloc_distances_s **distancesp, unsigned *nrp, struct hwloc_distances_s **distancesp,
unsigned long kind, unsigned long flags __hwloc_attribute_unused) unsigned long kind, unsigned long flags __hwloc_attribute_unused)
{ {
@ -602,7 +776,10 @@ hwloc__distances_get(hwloc_topology_t topology,
unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL; unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL;
unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL; unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL;
if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type) if (name && (!dist->name || strcmp(name, dist->name)))
continue;
if (type != HWLOC_OBJ_TYPE_NONE && type != dist->unique_type)
continue; continue;
if (kind_from && !(kind_from & dist->kind)) if (kind_from && !(kind_from & dist->kind))
@ -640,7 +817,7 @@ hwloc_distances_get(hwloc_topology_t topology,
return -1; return -1;
} }
return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); return hwloc__distances_get(topology, NULL, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags);
} }
int int
@ -655,14 +832,40 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
return -1; return -1;
} }
/* switch back to types since we don't support groups for now */ /* FIXME: passing the depth of a group level may return group distances at a different depth */
type = hwloc_get_depth_type(topology, depth); type = hwloc_get_depth_type(topology, depth);
if (type == (hwloc_obj_type_t)-1) { if (type == (hwloc_obj_type_t)-1) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags); return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags);
}
int
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
unsigned *nrp, struct hwloc_distances_s **distancesp,
unsigned long flags)
{
if (flags || !topology->is_loaded) {
errno = EINVAL;
return -1;
}
return hwloc__distances_get(topology, name, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, HWLOC_DISTANCES_KIND_ALL, flags);
}
int
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
unsigned *nrp, struct hwloc_distances_s **distancesp,
unsigned long kind, unsigned long flags)
{
if (flags || !topology->is_loaded) {
errno = EINVAL;
return -1;
}
return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags);
} }
/****************************************************** /******************************************************
@ -823,10 +1026,14 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
float *accuracies, float *accuracies,
int needcheck) int needcheck)
{ {
HWLOC_VLA(unsigned, groupids, nbobjs); unsigned *groupids;
unsigned nbgroups = 0; unsigned nbgroups = 0;
unsigned i,j; unsigned i,j;
int verbose = topology->grouping_verbose; int verbose = topology->grouping_verbose;
hwloc_obj_t *groupobjs;
unsigned * groupsizes;
uint64_t *groupvalues;
unsigned failed = 0;
if (nbobjs <= 2) if (nbobjs <= 2)
return; return;
@ -836,6 +1043,10 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
/* TODO hwloc__find_groups_by_max_distance() for bandwidth */ /* TODO hwloc__find_groups_by_max_distance() for bandwidth */
return; return;
groupids = malloc(nbobjs * sizeof(*groupids));
if (!groupids)
return;
for(i=0; i<nbaccuracies; i++) { for(i=0; i<nbaccuracies; i++) {
if (verbose) if (verbose)
fprintf(stderr, "Trying to group %u %s objects according to physical distances with accuracy %f\n", fprintf(stderr, "Trying to group %u %s objects according to physical distances with accuracy %f\n",
@ -847,13 +1058,13 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
break; break;
} }
if (!nbgroups) if (!nbgroups)
return; goto out_with_groupids;
{ groupobjs = malloc(nbgroups * sizeof(*groupobjs));
HWLOC_VLA(hwloc_obj_t, groupobjs, nbgroups); groupsizes = malloc(nbgroups * sizeof(*groupsizes));
HWLOC_VLA(unsigned, groupsizes, nbgroups); groupvalues = malloc(nbgroups * nbgroups * sizeof(*groupvalues));
HWLOC_VLA(uint64_t, groupvalues, nbgroups*nbgroups); if (!groupobjs || !groupsizes || !groupvalues)
unsigned failed = 0; goto out_with_groups;
/* create new Group objects and record their size */ /* create new Group objects and record their size */
memset(&(groupsizes[0]), 0, sizeof(groupsizes[0]) * nbgroups); memset(&(groupsizes[0]), 0, sizeof(groupsizes[0]) * nbgroups);
@ -884,7 +1095,7 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
if (failed) if (failed)
/* don't try to group above if we got a NULL group here, just keep this incomplete level */ /* don't try to group above if we got a NULL group here, just keep this incomplete level */
return; goto out_with_groups;
/* factorize values */ /* factorize values */
memset(&(groupvalues[0]), 0, sizeof(groupvalues[0]) * nbgroups * nbgroups); memset(&(groupvalues[0]), 0, sizeof(groupvalues[0]) * nbgroups * nbgroups);
@ -916,5 +1127,11 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
#endif #endif
hwloc__groups_by_distances(topology, nbgroups, groupobjs, groupvalues, kind, nbaccuracies, accuracies, 0 /* no need to check generated matrix */); hwloc__groups_by_distances(topology, nbgroups, groupobjs, groupvalues, kind, nbaccuracies, accuracies, 0 /* no need to check generated matrix */);
}
out_with_groups:
free(groupobjs);
free(groupsizes);
free(groupvalues);
out_with_groupids:
free(groupids);
} }

View file

@ -1,14 +1,14 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2018 Inria. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2010 Université Bordeaux
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
#include <stdarg.h> #include <stdarg.h>
#ifdef HAVE_SYS_UTSNAME_H #ifdef HAVE_SYS_UTSNAME_H
@ -28,6 +28,7 @@ extern char *program_invocation_name;
extern char *__progname; extern char *__progname;
#endif #endif
#ifndef HWLOC_HAVE_CORRECT_SNPRINTF
int hwloc_snprintf(char *str, size_t size, const char *format, ...) int hwloc_snprintf(char *str, size_t size, const char *format, ...)
{ {
int ret; int ret;
@ -77,21 +78,7 @@ int hwloc_snprintf(char *str, size_t size, const char *format, ...)
return ret; return ret;
} }
#endif
int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n)
{
size_t i = 0;
while (*haystack && *haystack != ':') {
int ha = *haystack++;
int low_h = tolower(ha);
int ne = *needle++;
int low_n = tolower(ne);
if (low_h != low_n)
return 1;
i++;
}
return i < n;
}
void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused, void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused,
void *cached_uname __hwloc_attribute_unused) void *cached_uname __hwloc_attribute_unused)

View file

@ -1,14 +1,14 @@
/* /*
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/plugins.h> #include "hwloc/plugins.h"
#include <private/private.h> #include "private/private.h"
#include <private/debug.h> #include "private/debug.h"
#include <private/misc.h> #include "private/misc.h"
#include <fcntl.h> #include <fcntl.h>
#ifdef HAVE_UNISTD_H #ifdef HAVE_UNISTD_H
@ -23,6 +23,11 @@
#define close _close #define close _close
#endif #endif
/**************************************
* Init/Exit and Forced PCI localities
*/
static void static void
hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology, hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
const char *string /* must contain a ' ' */, const char *string /* must contain a ' ' */,
@ -109,11 +114,11 @@ hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_en
void void
hwloc_pci_discovery_init(struct hwloc_topology *topology) hwloc_pci_discovery_init(struct hwloc_topology *topology)
{ {
topology->need_pci_belowroot_apply_locality = 0;
topology->pci_has_forced_locality = 0; topology->pci_has_forced_locality = 0;
topology->pci_forced_locality_nr = 0; topology->pci_forced_locality_nr = 0;
topology->pci_forced_locality = NULL; topology->pci_forced_locality = NULL;
topology->first_pci_locality = topology->last_pci_locality = NULL;
} }
void void
@ -135,7 +140,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
if (!err) { if (!err) {
if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */ if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
buffer = malloc(st.st_size+1); buffer = malloc(st.st_size+1);
if (read(fd, buffer, st.st_size) == st.st_size) { if (buffer && read(fd, buffer, st.st_size) == st.st_size) {
buffer[st.st_size] = '\0'; buffer[st.st_size] = '\0';
hwloc_pci_forced_locality_parse(topology, buffer); hwloc_pci_forced_locality_parse(topology, buffer);
} }
@ -152,16 +157,31 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
} }
void void
hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused) hwloc_pci_discovery_exit(struct hwloc_topology *topology)
{ {
struct hwloc_pci_locality_s *cur;
unsigned i; unsigned i;
for(i=0; i<topology->pci_forced_locality_nr; i++) for(i=0; i<topology->pci_forced_locality_nr; i++)
hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset); hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
free(topology->pci_forced_locality); free(topology->pci_forced_locality);
cur = topology->first_pci_locality;
while (cur) {
struct hwloc_pci_locality_s *next = cur->next;
hwloc_bitmap_free(cur->cpuset);
free(cur);
cur = next;
}
hwloc_pci_discovery_init(topology); hwloc_pci_discovery_init(topology);
} }
/******************************
* Inserting in Tree by Bus ID
*/
#ifdef HWLOC_DEBUG #ifdef HWLOC_DEBUG
static void static void
hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
@ -324,32 +344,16 @@ hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep,
hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj); hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
} }
int
hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree) /**********************
* Attaching PCI Trees
*/
static struct hwloc_obj *
hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
struct hwloc_obj *old_tree)
{ {
struct hwloc_obj **next_hb_p; struct hwloc_obj * new = NULL, **newp = &new;
enum hwloc_type_filter_e bfilter;
if (!old_tree)
/* found nothing, exit */
return 0;
#ifdef HWLOC_DEBUG
hwloc_debug("%s", "\nPCI hierarchy:\n");
hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb);
hwloc_debug("%s", "\n");
#endif
next_hb_p = &hwloc_get_root_obj(topology)->io_first_child;
while (*next_hb_p)
next_hb_p = &((*next_hb_p)->next_sibling);
bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) {
*next_hb_p = old_tree;
topology->modified = 1;
goto done;
}
/* /*
* tree points to all objects connected to any upstream bus in the machine. * tree points to all objects connected to any upstream bus in the machine.
@ -358,15 +362,29 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old
*/ */
while (old_tree) { while (old_tree) {
/* start a new host bridge */ /* start a new host bridge */
struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); struct hwloc_obj *hostbridge;
struct hwloc_obj **dstnextp = &hostbridge->io_first_child; struct hwloc_obj **dstnextp;
struct hwloc_obj **srcnextp = &old_tree; struct hwloc_obj **srcnextp;
struct hwloc_obj *child = *srcnextp; struct hwloc_obj *child;
unsigned short current_domain = child->attr->pcidev.domain; unsigned short current_domain;
unsigned char current_bus = child->attr->pcidev.bus; unsigned char current_bus;
unsigned char current_subordinate = current_bus; unsigned char current_subordinate;
hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus); hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX);
if (!hostbridge) {
/* just queue remaining things without hostbridges and return */
*newp = old_tree;
return new;
}
dstnextp = &hostbridge->io_first_child;
srcnextp = &old_tree;
child = *srcnextp;
current_domain = child->attr->pcidev.domain;
current_bus = child->attr->pcidev.bus;
current_subordinate = current_bus;
hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
next_child: next_child:
/* remove next child from tree */ /* remove next child from tree */
@ -395,19 +413,14 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old
hostbridge->attr->bridge.downstream.pci.domain = current_domain; hostbridge->attr->bridge.downstream.pci.domain = current_domain;
hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus; hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate; hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n", hwloc_debug(" new PCI hostbridge covers %04x:[%02x-%02x]\n",
current_domain, current_bus, current_subordinate); current_domain, current_bus, current_subordinate);
*next_hb_p = hostbridge; *newp = hostbridge;
next_hb_p = &hostbridge->next_sibling; newp = &hostbridge->next_sibling;
topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality()
* or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root.
*/
} }
done: return new;
topology->need_pci_belowroot_apply_locality = 1;
return 0;
} }
static struct hwloc_obj * static struct hwloc_obj *
@ -458,6 +471,9 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
unsigned i; unsigned i;
int err; int err;
hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n",
busid->domain, busid->bus, busid->dev, busid->func);
/* try to match a forced locality */ /* try to match a forced locality */
if (topology->pci_has_forced_locality) { if (topology->pci_has_forced_locality) {
for(i=0; i<topology->pci_forced_locality_nr; i++) { for(i=0; i<topology->pci_forced_locality_nr; i++) {
@ -489,7 +505,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
} }
if (*env) { if (*env) {
/* force the cpuset */ /* force the cpuset */
hwloc_debug("Overriding localcpus using %s in the environment\n", envname); hwloc_debug("Overriding PCI locality using %s in the environment\n", envname);
hwloc_bitmap_sscanf(cpuset, env); hwloc_bitmap_sscanf(cpuset, env);
forced = 1; forced = 1;
} }
@ -499,7 +515,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
} }
if (!forced) { if (!forced) {
/* get the cpuset by asking the OS backend. */ /* get the cpuset by asking the backend that provides the relevant hook, if any. */
struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
if (backend) if (backend)
err = backend->get_pci_busid_cpuset(backend, busid, cpuset); err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
@ -510,7 +526,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology)); hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
} }
hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset); hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset);
parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
if (parent) { if (parent) {
@ -526,11 +542,129 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
return parent; return parent;
} }
int
hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree)
{
enum hwloc_type_filter_e bfilter;
if (!tree)
/* found nothing, exit */
return 0;
#ifdef HWLOC_DEBUG
hwloc_debug("%s", "\nPCI hierarchy:\n");
hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb);
hwloc_debug("%s", "\n");
#endif
bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
tree = hwloc_pcidisc_add_hostbridges(topology, tree);
}
while (tree) {
struct hwloc_obj *obj, *pciobj;
struct hwloc_obj *parent;
struct hwloc_pci_locality_s *loc;
unsigned domain, bus_min, bus_max;
obj = tree;
/* hostbridges don't have a PCI busid for looking up locality, use their first child */
if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
pciobj = obj->io_first_child;
else
pciobj = obj;
/* now we have a pci device or a pci bridge */
assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
|| (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
if (obj->type == HWLOC_OBJ_BRIDGE) {
domain = obj->attr->bridge.downstream.pci.domain;
bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
} else {
domain = pciobj->attr->pcidev.domain;
bus_min = pciobj->attr->pcidev.bus;
bus_max = pciobj->attr->pcidev.bus;
}
/* find where to attach that PCI bus */
parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev);
/* reuse the previous locality if possible */
if (topology->last_pci_locality
&& parent == topology->last_pci_locality->parent
&& domain == topology->last_pci_locality->domain
&& (bus_min == topology->last_pci_locality->bus_max
|| bus_min == topology->last_pci_locality->bus_max+1)) {
hwloc_debug(" Reusing PCI locality up to bus %04x:%02x\n",
domain, bus_max);
topology->last_pci_locality->bus_max = bus_max;
goto done;
}
loc = malloc(sizeof(*loc));
if (!loc) {
/* fallback to attaching to root */
parent = hwloc_get_root_obj(topology);
goto done;
}
loc->domain = domain;
loc->bus_min = bus_min;
loc->bus_max = bus_max;
loc->parent = parent;
loc->cpuset = hwloc_bitmap_dup(parent->cpuset);
if (!loc->cpuset) {
/* fallback to attaching to root */
free(loc);
parent = hwloc_get_root_obj(topology);
goto done;
}
hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n",
hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max);
if (topology->last_pci_locality) {
loc->prev = topology->last_pci_locality;
loc->next = NULL;
topology->last_pci_locality->next = loc;
topology->last_pci_locality = loc;
} else {
loc->prev = NULL;
loc->next = NULL;
topology->first_pci_locality = loc;
topology->last_pci_locality = loc;
}
done:
/* dequeue this object */
tree = obj->next_sibling;
obj->next_sibling = NULL;
hwloc_insert_object_by_parent(topology, parent, obj);
}
return 0;
}
/*********************************
* Finding PCI objects or parents
*/
struct hwloc_obj * struct hwloc_obj *
hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology,
unsigned domain, unsigned bus, unsigned dev, unsigned func) unsigned domain, unsigned bus, unsigned dev, unsigned func)
{ {
struct hwloc_pcidev_attr_s busid; struct hwloc_pcidev_attr_s busid;
hwloc_obj_t parent;
/* try to find that exact busid */
parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func);
if (parent)
return parent;
/* try to find the locality of that bus instead */
busid.domain = domain; busid.domain = domain;
busid.bus = bus; busid.bus = bus;
busid.dev = dev; busid.dev = dev;
@ -538,66 +672,10 @@ hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology,
return hwloc__pci_find_busid_parent(topology, &busid); return hwloc__pci_find_busid_parent(topology, &busid);
} }
int /* return the smallest object that contains the desired busid */
hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology)
{
struct hwloc_obj *root = hwloc_get_root_obj(topology);
struct hwloc_obj **listp, *obj;
if (!topology->need_pci_belowroot_apply_locality)
return 0;
topology->need_pci_belowroot_apply_locality = 0;
/* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things.
* insert the PCI trees according to their PCI-locality.
*/
listp = &root->io_first_child;
while ((obj = *listp) != NULL) {
struct hwloc_pcidev_attr_s *busid;
struct hwloc_obj *parent;
/* skip non-PCI objects */
if (obj->type != HWLOC_OBJ_PCI_DEVICE
&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
listp = &obj->next_sibling;
continue;
}
if (obj->type == HWLOC_OBJ_PCI_DEVICE
|| (obj->type == HWLOC_OBJ_BRIDGE
&& obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
busid = &obj->attr->pcidev;
else {
/* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */
hwloc_obj_t child = obj->io_first_child;
if (child && (child->type == HWLOC_OBJ_PCI_DEVICE
|| (child->type == HWLOC_OBJ_BRIDGE
&& child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)))
busid = &obj->io_first_child->attr->pcidev;
else
continue;
}
/* attach the object (and children) where it belongs */
parent = hwloc__pci_find_busid_parent(topology, busid);
if (parent == root) {
/* keep this object here */
listp = &obj->next_sibling;
} else {
/* dequeue this object */
*listp = obj->next_sibling;
obj->next_sibling = NULL;
hwloc_insert_object_by_parent(topology, parent, obj);
}
}
return 0;
}
static struct hwloc_obj * static struct hwloc_obj *
hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, hwloc__pci_find_by_busid(hwloc_obj_t parent,
unsigned domain, unsigned bus, unsigned dev, unsigned func) unsigned domain, unsigned bus, unsigned dev, unsigned func)
{ {
hwloc_obj_t child; hwloc_obj_t child;
@ -622,7 +700,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
&& child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.secondary_bus <= bus
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus) && child->attr->bridge.downstream.pci.subordinate_bus >= bus)
/* not the right bus id, but it's included in the bus below that bridge */ /* not the right bus id, but it's included in the bus below that bridge */
return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
} else if (child->type == HWLOC_OBJ_BRIDGE } else if (child->type == HWLOC_OBJ_BRIDGE
&& child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
@ -632,7 +710,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
&& child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.secondary_bus <= bus
&& child->attr->bridge.downstream.pci.subordinate_bus >= bus) { && child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
/* contains our bus, recurse */ /* contains our bus, recurse */
return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); return hwloc__pci_find_by_busid(child, domain, bus, dev, func);
} }
} }
/* didn't find anything, return parent */ /* didn't find anything, return parent */
@ -640,17 +718,54 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
} }
struct hwloc_obj * struct hwloc_obj *
hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, hwloc_pci_find_by_busid(struct hwloc_topology *topology,
unsigned domain, unsigned bus, unsigned dev, unsigned func) unsigned domain, unsigned bus, unsigned dev, unsigned func)
{ {
struct hwloc_pci_locality_s *loc;
hwloc_obj_t root = hwloc_get_root_obj(topology); hwloc_obj_t root = hwloc_get_root_obj(topology);
hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func); hwloc_obj_t parent = NULL;
if (parent == root)
hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func);
loc = topology->first_pci_locality;
while (loc) {
if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) {
parent = loc->parent;
assert(parent);
hwloc_debug(" found pci locality for %04x:[%02x:%02x]\n",
loc->domain, loc->bus_min, loc->bus_max);
break;
}
loc = loc->next;
}
/* if we failed to insert localities, look at root too */
if (!parent)
parent = root;
hwloc_debug(" looking for bus %04x:%02x:%02x.%01x below %s P#%u\n",
domain, bus, dev, func,
hwloc_obj_type_string(parent->type), parent->os_index);
parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func);
if (parent == root) {
hwloc_debug(" found nothing better than root object, ignoring\n");
return NULL; return NULL;
else } else {
if (parent->type == HWLOC_OBJ_PCI_DEVICE
|| (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
hwloc_debug(" found busid %04x:%02x:%02x.%01x\n",
parent->attr->pcidev.domain, parent->attr->pcidev.bus,
parent->attr->pcidev.dev, parent->attr->pcidev.func);
else
hwloc_debug(" found parent %s P#%u\n",
hwloc_obj_type_string(parent->type), parent->os_index);
return parent; return parent;
}
} }
/*******************************
* Parsing the PCI Config Space
*/
#define HWLOC_PCI_STATUS 0x06 #define HWLOC_PCI_STATUS 0x06
#define HWLOC_PCI_STATUS_CAP_LIST 0x10 #define HWLOC_PCI_STATUS_CAP_LIST 0x10
#define HWLOC_PCI_CAPABILITY_LIST 0x34 #define HWLOC_PCI_CAPABILITY_LIST 0x34
@ -703,13 +818,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config,
* PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
* PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
*/ */
/* lanespeed in Gbit/s */ /* lanespeed in Gbit/s */
if (speed <= 2) if (speed <= 2)
lanespeed = 2.5f * speed * 0.8f; lanespeed = 2.5f * speed * 0.8f;
else else
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
/* linkspeed in GB/s */ /* linkspeed in GB/s */
*linkspeed = lanespeed * width / 8; *linkspeed = lanespeed * width / 8;
@ -738,30 +854,27 @@ hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *conf
#define HWLOC_PCI_SUBORDINATE_BUS 0x1a #define HWLOC_PCI_SUBORDINATE_BUS 0x1a
int int
hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func,
unsigned *secondary_busp, unsigned *subordinate_busp,
const unsigned char *config) const unsigned char *config)
{ {
struct hwloc_bridge_attr_s *battr = &obj->attr->bridge; unsigned secondary_bus, subordinate_bus;
struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci;
if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) { if (config[HWLOC_PCI_PRIMARY_BUS] != bus) {
/* Sometimes the config space contains 00 instead of the actual primary bus number. /* Sometimes the config space contains 00 instead of the actual primary bus number.
* Always trust the bus ID because it was built by the system which has more information * Always trust the bus ID because it was built by the system which has more information
* to workaround such problems (e.g. ACPI information about PCI parent/children). * to workaround such problems (e.g. ACPI information about PCI parent/children).
*/ */
hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n", hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]); domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]);
} }
battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI; secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI; subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
battr->downstream.pci.domain = pattr->domain;
battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
if (battr->downstream.pci.secondary_bus <= pattr->bus if (secondary_bus <= bus
|| battr->downstream.pci.subordinate_bus <= pattr->bus || subordinate_bus <= bus
|| battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) { || secondary_bus > subordinate_bus) {
/* This should catch most cases of invalid bridge information /* This should catch most cases of invalid bridge information
* (e.g. 00 for secondary and subordinate). * (e.g. 00 for secondary and subordinate).
* Ideally we would also check that [secondary-subordinate] is included * Ideally we would also check that [secondary-subordinate] is included
@ -769,15 +882,21 @@ hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj,
* because objects may be discovered out of order (especially in the fsroot case). * because objects may be discovered out of order (especially in the fsroot case).
*/ */
hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n", hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
pattr->domain, pattr->bus, pattr->dev, pattr->func, domain, bus, dev, func,
battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus); secondary_bus, subordinate_bus);
hwloc_free_unlinked_object(obj);
return -1; return -1;
} }
*secondary_busp = secondary_bus;
*subordinate_busp = subordinate_bus;
return 0; return 0;
} }
/****************
* Class Strings
*/
const char * const char *
hwloc_pci_class_string(unsigned short class_id) hwloc_pci_class_string(unsigned short class_id)
{ {

View file

@ -1,12 +1,12 @@
/* /*
* Copyright © 2017-2018 Inria. All rights reserved. * Copyright © 2017-2019 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/shmem.h> #include "hwloc/shmem.h"
#include <private/private.h> #include "private/private.h"
#ifndef HWLOC_WIN_SYS #ifndef HWLOC_WIN_SYS
@ -214,6 +214,8 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.discovery = malloc(sizeof(*new->support.discovery));
new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.cpubind = malloc(sizeof(*new->support.cpubind));
new->support.membind = malloc(sizeof(*new->support.membind)); new->support.membind = malloc(sizeof(*new->support.membind));
if (!new->support.discovery || !new->support.cpubind || !new->support.membind)
goto out_with_support;
memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery));
memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind));
memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind));
@ -230,6 +232,11 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
*topologyp = new; *topologyp = new;
return 0; return 0;
out_with_support:
free(new->support.discovery);
free(new->support.cpubind);
free(new->support.membind);
free(new);
out_with_components: out_with_components:
hwloc_components_fini(); hwloc_components_fini();
out_with_mmap: out_with_mmap:

View file

@ -1,26 +1,34 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
static int static int
hwloc_look_noos(struct hwloc_backend *backend) hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{ {
/*
* This backend uses the underlying OS.
* However we don't enforce topology->is_thissystem so that
* we may still force use this backend when debugging with !thissystem.
*/
struct hwloc_topology *topology = backend->topology; struct hwloc_topology *topology = backend->topology;
int nbprocs; int nbprocs;
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
if (topology->levels[0][0]->cpuset) if (topology->levels[0][0]->cpuset)
/* somebody discovered things */ /* somebody discovered things */
return -1; return -1;
nbprocs = hwloc_fallback_nbprocessors(topology); nbprocs = hwloc_fallback_nbprocessors(0);
if (nbprocs >= 1) if (nbprocs >= 1)
topology->support.discovery->pu = 1; topology->support.discovery->pu = 1;
else else
@ -33,13 +41,15 @@ hwloc_look_noos(struct hwloc_backend *backend)
} }
static struct hwloc_backend * static struct hwloc_backend *
hwloc_noos_component_instantiate(struct hwloc_disc_component *component, hwloc_noos_component_instantiate(struct hwloc_topology *topology,
struct hwloc_disc_component *component,
unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1 __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused) const void *_data3 __hwloc_attribute_unused)
{ {
struct hwloc_backend *backend; struct hwloc_backend *backend;
backend = hwloc_backend_alloc(component); backend = hwloc_backend_alloc(topology, component);
if (!backend) if (!backend)
return NULL; return NULL;
backend->discover = hwloc_look_noos; backend->discover = hwloc_look_noos;
@ -47,9 +57,9 @@ hwloc_noos_component_instantiate(struct hwloc_disc_component *component,
} }
static struct hwloc_disc_component hwloc_noos_disc_component = { static struct hwloc_disc_component hwloc_noos_disc_component = {
HWLOC_DISC_COMPONENT_TYPE_CPU,
"no_os", "no_os",
HWLOC_DISC_COMPONENT_TYPE_GLOBAL, HWLOC_DISC_PHASE_CPU,
HWLOC_DISC_PHASE_GLOBAL,
hwloc_noos_component_instantiate, hwloc_noos_component_instantiate,
40, /* lower than native OS component, higher than globals */ 40, /* lower than native OS component, higher than globals */
1, 1,

View file

@ -6,11 +6,11 @@
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
#include <private/debug.h> #include "private/debug.h"
#include <limits.h> #include <limits.h>
#include <assert.h> #include <assert.h>
@ -122,6 +122,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
unsigned long nbs = 1; unsigned long nbs = 1;
unsigned j, mul; unsigned j, mul;
const char *tmp; const char *tmp;
struct hwloc_synthetic_intlv_loop_s *loops;
tmp = attr; tmp = attr;
while (tmp) { while (tmp) {
@ -132,9 +133,10 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
tmp++; tmp++;
} }
{
/* nr_loops colon-separated fields, but we may need one more at the end */ /* nr_loops colon-separated fields, but we may need one more at the end */
HWLOC_VLA(struct hwloc_synthetic_intlv_loop_s, loops, nr_loops+1); loops = malloc((nr_loops+1) * sizeof(*loops));
if (!loops)
goto out_with_array;
if (*attr >= '0' && *attr <= '9') { if (*attr >= '0' && *attr <= '9') {
/* interleaving as x*y:z*t:... */ /* interleaving as x*y:z*t:... */
@ -148,11 +150,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (tmp2 == tmp || *tmp2 != '*') { if (tmp2 == tmp || *tmp2 != '*') {
if (verbose) if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp); fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
if (!step) { if (!step) {
if (verbose) if (verbose)
fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp); fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
tmp2++; tmp2++;
@ -160,11 +164,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) { if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) {
if (verbose) if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp); fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
if (!nb) { if (!nb) {
if (verbose) if (verbose)
fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2); fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2);
free(loops);
goto out_with_array; goto out_with_array;
} }
loops[cur_loop].step = step; loops[cur_loop].step = step;
@ -192,11 +198,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (err < 0) { if (err < 0) {
if (verbose) if (verbose)
fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp); fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) {
if (verbose) if (verbose)
fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp); fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
for(i=0; ; i++) { for(i=0; ; i++) {
@ -217,6 +225,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (verbose) if (verbose)
fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n", fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n",
tmp); tmp);
free(loops);
goto out_with_array; goto out_with_array;
} }
tmp = strchr(tmp, ':'); tmp = strchr(tmp, ':');
@ -235,6 +244,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
if (loops[i].level_depth == mydepth && i != cur_loop) { if (loops[i].level_depth == mydepth && i != cur_loop) {
if (verbose) if (verbose)
fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr); fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr);
free(loops);
goto out_with_array; goto out_with_array;
} }
if (loops[i].level_depth < mydepth if (loops[i].level_depth < mydepth
@ -264,6 +274,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
} else { } else {
if (verbose) if (verbose)
fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total); fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total);
free(loops);
goto out_with_array; goto out_with_array;
} }
} }
@ -278,6 +289,8 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
mul *= nb; mul *= nb;
} }
free(loops);
/* check that we have the right values (cannot pass total, cannot give duplicate 0) */ /* check that we have the right values (cannot pass total, cannot give duplicate 0) */
for(j=0; j<total; j++) { for(j=0; j<total; j++) {
if (array[j] >= total) { if (array[j] >= total) {
@ -293,7 +306,6 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data,
} }
indexes->array = array; indexes->array = array;
}
} }
return; return;
@ -527,7 +539,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
if (*pos < '0' || *pos > '9') { if (*pos < '0' || *pos > '9') {
if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) {
if (!strncmp(pos, "Die", 3) || !strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { if (!strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) {
/* possible future types */
type = HWLOC_OBJ_GROUP; type = HWLOC_OBJ_GROUP;
} else { } else {
/* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */
@ -645,6 +658,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
if (type_count[HWLOC_OBJ_DIE] > 1) {
if (verbose)
fprintf(stderr, "Synthetic string cannot have several die levels\n");
errno = EINVAL;
return -1;
}
if (type_count[HWLOC_OBJ_NUMANODE] > 1) { if (type_count[HWLOC_OBJ_NUMANODE] > 1) {
if (verbose) if (verbose)
fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n"); fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n");
@ -829,6 +848,7 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
break; break;
case HWLOC_OBJ_PACKAGE: case HWLOC_OBJ_PACKAGE:
case HWLOC_OBJ_DIE:
break; break;
case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L1CACHE:
case HWLOC_OBJ_L2CACHE: case HWLOC_OBJ_L2CACHE:
@ -953,13 +973,19 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
} }
static int static int
hwloc_look_synthetic(struct hwloc_backend *backend) hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{ {
/*
* This backend enforces !topology->is_thissystem by default.
*/
struct hwloc_topology *topology = backend->topology; struct hwloc_topology *topology = backend->topology;
struct hwloc_synthetic_backend_data_s *data = backend->private_data; struct hwloc_synthetic_backend_data_s *data = backend->private_data;
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
unsigned i; unsigned i;
assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL);
assert(!topology->levels[0][0]->cpuset); assert(!topology->levels[0][0]->cpuset);
hwloc_alloc_root_sets(topology->levels[0][0]); hwloc_alloc_root_sets(topology->levels[0][0]);
@ -1001,7 +1027,9 @@ hwloc_synthetic_backend_disable(struct hwloc_backend *backend)
} }
static struct hwloc_backend * static struct hwloc_backend *
hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, hwloc_synthetic_component_instantiate(struct hwloc_topology *topology,
struct hwloc_disc_component *component,
unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1, const void *_data1,
const void *_data2 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused) const void *_data3 __hwloc_attribute_unused)
@ -1021,7 +1049,7 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
} }
} }
backend = hwloc_backend_alloc(component); backend = hwloc_backend_alloc(topology, component);
if (!backend) if (!backend)
goto out; goto out;
@ -1051,8 +1079,8 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
} }
static struct hwloc_disc_component hwloc_synthetic_disc_component = { static struct hwloc_disc_component hwloc_synthetic_disc_component = {
HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
"synthetic", "synthetic",
HWLOC_DISC_PHASE_GLOBAL,
~0, ~0,
hwloc_synthetic_component_instantiate, hwloc_synthetic_component_instantiate,
30, 30,
@ -1267,6 +1295,12 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
/* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */ /* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */
res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys); res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys);
} else if (obj->type == HWLOC_OBJ_DIE
&& (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES
|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) {
/* if exporting to v1 or without extended-types, use all-v1-compatible Group name */
res = hwloc_snprintf(tmp, tmplen, "Group%s", aritys);
} else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */ } else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */
|| flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) { || flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) {
res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys); res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys);
@ -1323,16 +1357,26 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
} }
while (mchild) { while (mchild) {
/* v2: export all NUMA children */ /* FIXME: really recurse to export memcaches and numanode,
* but it requires clever parsing of [ memcache [numa] [numa] ] during import,
assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ * better attaching of things to describe the hierarchy.
*/
hwloc_obj_t numanode = mchild;
/* only export the first NUMA node leaf of each memory child
* FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms
*/
while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
assert(numanode->arity == 1);
numanode = numanode->memory_first_child;
}
assert(numanode); /* there's always a numanode at the bottom of the memory tree */
if (needprefix) if (needprefix)
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '['); hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '[');
res = hwloc__export_synthetic_obj(topology, flags, mchild, (unsigned)-1, tmp, tmplen); res = hwloc__export_synthetic_obj(topology, flags, numanode, (unsigned)-1, tmp, tmplen);
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
return -1; return -1;
@ -1366,9 +1410,8 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology)
assert(node); assert(node);
first_parent = node->parent; first_parent = node->parent;
assert(hwloc__obj_type_is_normal(first_parent->type)); /* only depth-1 memory children for now */
/* check whether all object on parent's level have same number of NUMA children */ /* check whether all object on parent's level have same number of NUMA bits */
for(i=0; i<hwloc_get_nbobjs_by_depth(topology, first_parent->depth); i++) { for(i=0; i<hwloc_get_nbobjs_by_depth(topology, first_parent->depth); i++) {
hwloc_obj_t parent, mchild; hwloc_obj_t parent, mchild;
@ -1379,10 +1422,9 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology)
if (parent->memory_arity != first_parent->memory_arity) if (parent->memory_arity != first_parent->memory_arity)
goto out_with_bitmap; goto out_with_bitmap;
/* clear these NUMA children from remaining_nodes */ /* clear children NUMA bits from remaining_nodes */
mchild = parent->memory_first_child; mchild = parent->memory_first_child;
while (mchild) { while (mchild) {
assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */
hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */ hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */
mchild = mchild->next_sibling; mchild = mchild->next_sibling;
} }

View file

@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@ -9,10 +9,10 @@
/* To try to get all declarations duplicated below. */ /* To try to get all declarations duplicated below. */
#define _WIN32_WINNT 0x0601 #define _WIN32_WINNT 0x0601
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <private/debug.h> #include "private/debug.h"
#include <windows.h> #include <windows.h>
@ -731,8 +731,14 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse
*/ */
static int static int
hwloc_look_windows(struct hwloc_backend *backend) hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{ {
/*
* This backend uses the underlying OS.
* However we don't enforce topology->is_thissystem so that
* we may still force use this backend when debugging with !thissystem.
*/
struct hwloc_topology *topology = backend->topology; struct hwloc_topology *topology = backend->topology;
hwloc_bitmap_t groups_pu_set = NULL; hwloc_bitmap_t groups_pu_set = NULL;
SYSTEM_INFO SystemInfo; SYSTEM_INFO SystemInfo;
@ -740,6 +746,8 @@ hwloc_look_windows(struct hwloc_backend *backend)
int gotnuma = 0; int gotnuma = 0;
int gotnumamemory = 0; int gotnumamemory = 0;
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
if (topology->levels[0][0]->cpuset) if (topology->levels[0][0]->cpuset)
/* somebody discovered things */ /* somebody discovered things */
return -1; return -1;
@ -1136,13 +1144,15 @@ static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribu
} }
static struct hwloc_backend * static struct hwloc_backend *
hwloc_windows_component_instantiate(struct hwloc_disc_component *component, hwloc_windows_component_instantiate(struct hwloc_topology *topology,
struct hwloc_disc_component *component,
unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1 __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused,
const void *_data2 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused,
const void *_data3 __hwloc_attribute_unused) const void *_data3 __hwloc_attribute_unused)
{ {
struct hwloc_backend *backend; struct hwloc_backend *backend;
backend = hwloc_backend_alloc(component); backend = hwloc_backend_alloc(topology, component);
if (!backend) if (!backend)
return NULL; return NULL;
backend->discover = hwloc_look_windows; backend->discover = hwloc_look_windows;
@ -1150,9 +1160,9 @@ hwloc_windows_component_instantiate(struct hwloc_disc_component *component,
} }
static struct hwloc_disc_component hwloc_windows_disc_component = { static struct hwloc_disc_component hwloc_windows_disc_component = {
HWLOC_DISC_COMPONENT_TYPE_CPU,
"windows", "windows",
HWLOC_DISC_COMPONENT_TYPE_GLOBAL, HWLOC_DISC_PHASE_CPU,
HWLOC_DISC_PHASE_GLOBAL,
hwloc_windows_component_instantiate, hwloc_windows_component_instantiate,
50, 50,
1, 1,
@ -1168,10 +1178,12 @@ const struct hwloc_component hwloc_windows_component = {
}; };
int int
hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { hwloc_fallback_nbprocessors(unsigned flags __hwloc_attribute_unused) {
int n; int n;
SYSTEM_INFO sysinfo; SYSTEM_INFO sysinfo;
/* TODO handle flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE */
/* by default, ignore groups (return only the number in the current group) */ /* by default, ignore groups (return only the number in the current group) */
GetSystemInfo(&sysinfo); GetSystemInfo(&sysinfo);
n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */ n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,18 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <hwloc/plugins.h> #include "hwloc/plugins.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
#include <private/xml.h> #include "private/xml.h"
#include <private/debug.h> #include "private/debug.h"
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
@ -27,9 +27,8 @@
*******************/ *******************/
struct hwloc__nolibxml_backend_data_s { struct hwloc__nolibxml_backend_data_s {
size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */ size_t buflen; /* size of both buffer, set during backend_init() */
char *buffer; /* allocated and filled during backend_init() */ char *buffer; /* allocated and filled during backend_init() */
char *copy; /* allocated during backend_init(), used later during actual parsing */
}; };
typedef struct hwloc__nolibxml_import_state_data_s { typedef struct hwloc__nolibxml_import_state_data_s {
@ -260,14 +259,11 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data;
unsigned major, minor; unsigned major, minor;
char *end; char *end;
char *buffer; char *buffer = nbdata->buffer;
char *tagname;
HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data)); HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data));
/* use a copy in the temporary buffer, we may modify during parsing */
buffer = nbdata->copy;
memcpy(buffer, nbdata->buffer, nbdata->buflen);
/* skip headers */ /* skip headers */
while (!strncmp(buffer, "<?xml ", 6) || !strncmp(buffer, "<!DOCTYPE ", 10)) { while (!strncmp(buffer, "<?xml ", 6) || !strncmp(buffer, "<!DOCTYPE ", 10)) {
buffer = strchr(buffer, '\n'); buffer = strchr(buffer, '\n');
@ -281,14 +277,17 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
bdata->version_major = major; bdata->version_major = major;
bdata->version_minor = minor; bdata->version_minor = minor;
end = strchr(buffer, '>') + 1; end = strchr(buffer, '>') + 1;
tagname = "topology";
} else if (!strncmp(buffer, "<topology>", 10)) { } else if (!strncmp(buffer, "<topology>", 10)) {
bdata->version_major = 1; bdata->version_major = 1;
bdata->version_minor = 0; bdata->version_minor = 0;
end = buffer + 10; end = buffer + 10;
tagname = "topology";
} else if (!strncmp(buffer, "<root>", 6)) { } else if (!strncmp(buffer, "<root>", 6)) {
bdata->version_major = 0; bdata->version_major = 0;
bdata->version_minor = 9; bdata->version_minor = 9;
end = buffer + 6; end = buffer + 6;
tagname = "root";
} else } else
goto failed; goto failed;
@ -301,7 +300,7 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
state->parent = NULL; state->parent = NULL;
nstate->closed = 0; nstate->closed = 0;
nstate->tagbuffer = end; nstate->tagbuffer = end;
nstate->tagname = (char *) "topology"; nstate->tagname = tagname;
nstate->attrbuffer = NULL; nstate->attrbuffer = NULL;
return 0; /* success */ return 0; /* success */
@ -320,10 +319,6 @@ hwloc_nolibxml_free_buffers(struct hwloc_xml_backend_data_s *bdata)
free(nbdata->buffer); free(nbdata->buffer);
nbdata->buffer = NULL; nbdata->buffer = NULL;
} }
if (nbdata->copy) {
free(nbdata->copy);
nbdata->copy = NULL;
}
} }
static void static void
@ -429,19 +424,11 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
goto out_with_nbdata; goto out_with_nbdata;
} }
/* allocate a temporary copy buffer that we may modify during parsing */
nbdata->copy = malloc(nbdata->buflen+1);
if (!nbdata->copy)
goto out_with_buffer;
nbdata->copy[nbdata->buflen] = '\0';
bdata->look_init = hwloc_nolibxml_look_init; bdata->look_init = hwloc_nolibxml_look_init;
bdata->look_done = hwloc_nolibxml_look_done; bdata->look_done = hwloc_nolibxml_look_done;
bdata->backend_exit = hwloc_nolibxml_backend_exit; bdata->backend_exit = hwloc_nolibxml_backend_exit;
return 0; return 0;
out_with_buffer:
free(nbdata->buffer);
out_with_nbdata: out_with_nbdata:
free(nbdata); free(nbdata);
out: out:
@ -666,7 +653,7 @@ hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *n
} }
static void static void
hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length) hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length __hwloc_attribute_unused)
{ {
hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
int res; int res;
@ -678,7 +665,7 @@ hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *
} }
ndata->has_content = 1; ndata->has_content = 1;
res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length); res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%s", buffer);
hwloc__nolibxml_export_update_buffer(ndata, res); hwloc__nolibxml_export_update_buffer(ndata, res);
} }
@ -799,6 +786,7 @@ hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *ref
state.new_prop = hwloc__nolibxml_export_new_prop; state.new_prop = hwloc__nolibxml_export_new_prop;
state.add_content = hwloc__nolibxml_export_add_content; state.add_content = hwloc__nolibxml_export_add_content;
state.end_object = hwloc__nolibxml_export_end_object; state.end_object = hwloc__nolibxml_export_end_object;
state.global = NULL;
ndata->indent = 0; ndata->indent = 0;
ndata->written = 0; ndata->written = 0;

View file

@ -6,12 +6,12 @@
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/xml.h> #include "private/xml.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
#include <private/debug.h> #include "private/debug.h"
#include <math.h> #include <math.h>
@ -158,7 +158,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_size")) { else if (!strcmp(name, "cache_size")) {
unsigned long long lvalue = strtoull(value, NULL, 10); unsigned long long lvalue = strtoull(value, NULL, 10);
if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.size = lvalue; obj->attr->cache.size = lvalue;
else if (hwloc__xml_verbose()) else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n", fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n",
@ -167,7 +167,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_linesize")) { else if (!strcmp(name, "cache_linesize")) {
unsigned long lvalue = strtoul(value, NULL, 10); unsigned long lvalue = strtoul(value, NULL, 10);
if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.linesize = lvalue; obj->attr->cache.linesize = lvalue;
else if (hwloc__xml_verbose()) else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n", fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n",
@ -176,7 +176,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_associativity")) { else if (!strcmp(name, "cache_associativity")) {
int lvalue = atoi(value); int lvalue = atoi(value);
if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE)
obj->attr->cache.associativity = lvalue; obj->attr->cache.associativity = lvalue;
else if (hwloc__xml_verbose()) else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n", fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n",
@ -185,7 +185,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "cache_type")) { else if (!strcmp(name, "cache_type")) {
unsigned long lvalue = strtoul(value, NULL, 10); unsigned long lvalue = strtoul(value, NULL, 10);
if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) {
if (lvalue == HWLOC_OBJ_CACHE_UNIFIED if (lvalue == HWLOC_OBJ_CACHE_UNIFIED
|| lvalue == HWLOC_OBJ_CACHE_DATA || lvalue == HWLOC_OBJ_CACHE_DATA
|| lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION)
@ -211,7 +211,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "depth")) { else if (!strcmp(name, "depth")) {
unsigned long lvalue = strtoul(value, NULL, 10); unsigned long lvalue = strtoul(value, NULL, 10);
if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) {
obj->attr->cache.depth = lvalue; obj->attr->cache.depth = lvalue;
} else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) { } else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) {
/* will be overwritten by the core */ /* will be overwritten by the core */
@ -805,21 +805,13 @@ hwloc__xml_import_object(hwloc_topology_t topology,
state->global->msgprefix); state->global->msgprefix);
goto error_with_object; goto error_with_object;
} }
} else if (!strcasecmp(attrvalue, "Die")) {
/* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP;
obj->subtype = strdup("Die");
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE;
obj->attr->group.dont_merge = data->dont_merge_die_groups;
} else if (!strcasecmp(attrvalue, "Tile")) { } else if (!strcasecmp(attrvalue, "Tile")) {
/* deal with possible future type */ /* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP; obj->type = HWLOC_OBJ_GROUP;
obj->subtype = strdup("Tile");
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE; obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE;
} else if (!strcasecmp(attrvalue, "Module")) { } else if (!strcasecmp(attrvalue, "Module")) {
/* deal with possible future type */ /* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP; obj->type = HWLOC_OBJ_GROUP;
obj->subtype = strdup("Module");
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
} else if (!strcasecmp(attrvalue, "MemCache")) { } else if (!strcasecmp(attrvalue, "MemCache")) {
/* ignore possible future type */ /* ignore possible future type */
@ -1053,6 +1045,13 @@ hwloc__xml_import_object(hwloc_topology_t topology,
/* end of 1.x specific checks */ /* end of 1.x specific checks */
} }
/* 2.0 backward compatibility */
if (obj->type == HWLOC_OBJ_GROUP) {
if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE
|| (obj->subtype && !strcmp(obj->subtype, "Die")))
obj->type = HWLOC_OBJ_DIE;
}
/* check that cache attributes are coherent with the actual type */ /* check that cache attributes are coherent with the actual type */
if (hwloc__obj_type_is_cache(obj->type) if (hwloc__obj_type_is_cache(obj->type)
&& obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) { && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) {
@ -1212,19 +1211,24 @@ hwloc__xml_import_object(hwloc_topology_t topology,
static int static int
hwloc__xml_v2import_distances(hwloc_topology_t topology, hwloc__xml_v2import_distances(hwloc_topology_t topology,
hwloc__xml_import_state_t state) hwloc__xml_import_state_t state,
int heterotypes)
{ {
hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; hwloc_obj_type_t unique_type = HWLOC_OBJ_TYPE_NONE;
hwloc_obj_type_t *different_types = NULL;
unsigned nbobjs = 0; unsigned nbobjs = 0;
int indexing = 0; int indexing = heterotypes;
int os_indexing = 0; int os_indexing = 0;
int gp_indexing = 0; int gp_indexing = heterotypes;
char *name = NULL;
unsigned long kind = 0; unsigned long kind = 0;
unsigned nr_indexes, nr_u64values; unsigned nr_indexes, nr_u64values;
uint64_t *indexes; uint64_t *indexes;
uint64_t *u64values; uint64_t *u64values;
int ret; int ret;
#define _TAG_NAME (heterotypes ? "distances2hetero" : "distances2")
/* process attributes */ /* process attributes */
while (1) { while (1) {
char *attrname, *attrvalue; char *attrname, *attrvalue;
@ -1233,8 +1237,12 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
if (!strcmp(attrname, "nbobjs")) if (!strcmp(attrname, "nbobjs"))
nbobjs = strtoul(attrvalue, NULL, 10); nbobjs = strtoul(attrvalue, NULL, 10);
else if (!strcmp(attrname, "type")) { else if (!strcmp(attrname, "type")) {
if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0) if (hwloc_type_sscanf(attrvalue, &unique_type, NULL, 0) < 0) {
if (hwloc__xml_verbose())
fprintf(stderr, "%s: unrecognized %s type %s\n",
state->global->msgprefix, _TAG_NAME, attrvalue);
goto out; goto out;
}
} }
else if (!strcmp(attrname, "indexing")) { else if (!strcmp(attrname, "indexing")) {
indexing = 1; indexing = 1;
@ -1246,27 +1254,32 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
else if (!strcmp(attrname, "kind")) { else if (!strcmp(attrname, "kind")) {
kind = strtoul(attrvalue, NULL, 10); kind = strtoul(attrvalue, NULL, 10);
} }
else if (!strcmp(attrname, "name")) {
name = attrvalue;
}
else { else {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring unknown distance attribute %s\n", fprintf(stderr, "%s: ignoring unknown %s attribute %s\n",
state->global->msgprefix, attrname); state->global->msgprefix, _TAG_NAME, attrname);
} }
} }
/* abort if missing attribute */ /* abort if missing attribute */
if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) { if (!nbobjs || (!heterotypes && unique_type == HWLOC_OBJ_TYPE_NONE) || !indexing || !kind) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 missing some attributes\n", fprintf(stderr, "%s: %s missing some attributes\n",
state->global->msgprefix); state->global->msgprefix, _TAG_NAME);
goto out; goto out;
} }
indexes = malloc(nbobjs*sizeof(*indexes)); indexes = malloc(nbobjs*sizeof(*indexes));
u64values = malloc(nbobjs*nbobjs*sizeof(*u64values)); u64values = malloc(nbobjs*nbobjs*sizeof(*u64values));
if (!indexes || !u64values) { if (heterotypes)
different_types = malloc(nbobjs*sizeof(*different_types));
if (!indexes || !u64values || (heterotypes && !different_types)) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n", fprintf(stderr, "%s: failed to allocate %s arrays for %u objects\n",
state->global->msgprefix, nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
@ -1290,16 +1303,16 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
is_u64values = 1; is_u64values = 1;
if (!is_index && !is_u64values) { if (!is_index && !is_u64values) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with unrecognized child %s\n", fprintf(stderr, "%s: %s with unrecognized child %s\n",
state->global->msgprefix, tag); state->global->msgprefix, _TAG_NAME, tag);
goto out_with_arrays; goto out_with_arrays;
} }
if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0 if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0
|| strcmp(attrname, "length")) { || strcmp(attrname, "length")) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 child must have length attribute\n", fprintf(stderr, "%s: %s child must have length attribute\n",
state->global->msgprefix); state->global->msgprefix, _TAG_NAME);
goto out_with_arrays; goto out_with_arrays;
} }
length = atoi(attrvalue); length = atoi(attrvalue);
@ -1307,24 +1320,43 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
ret = state->global->get_content(&childstate, &buffer, length); ret = state->global->get_content(&childstate, &buffer, length);
if (ret < 0) { if (ret < 0) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 child needs content of length %d\n", fprintf(stderr, "%s: %s child needs content of length %d\n",
state->global->msgprefix, length); state->global->msgprefix, _TAG_NAME, length);
goto out_with_arrays; goto out_with_arrays;
} }
if (is_index) { if (is_index) {
/* get indexes */ /* get indexes */
char *tmp; char *tmp, *tmp2;
if (nr_indexes >= nbobjs) { if (nr_indexes >= nbobjs) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with more than %u indexes\n", fprintf(stderr, "%s: %s with more than %u indexes\n",
state->global->msgprefix, nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
tmp = buffer; tmp = buffer;
while (1) { while (1) {
char *next; char *next;
unsigned long long u = strtoull(tmp, &next, 0); unsigned long long u;
if (heterotypes) {
hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE;
if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) {
if (hwloc__xml_verbose())
fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n",
state->global->msgprefix, _TAG_NAME, tmp);
goto out_with_arrays;
}
tmp2 = strchr(tmp, ':');
if (!tmp2) {
if (hwloc__xml_verbose())
fprintf(stderr, "%s: %s with missing colon after heterogeneous type %s\n",
state->global->msgprefix, _TAG_NAME, tmp);
goto out_with_arrays;
}
tmp = tmp2+1;
different_types[nr_indexes] = t;
}
u = strtoull(tmp, &next, 0);
if (next == tmp) if (next == tmp)
break; break;
indexes[nr_indexes++] = u; indexes[nr_indexes++] = u;
@ -1340,8 +1372,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
char *tmp; char *tmp;
if (nr_u64values >= nbobjs*nbobjs) { if (nr_u64values >= nbobjs*nbobjs) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with more than %u u64values\n", fprintf(stderr, "%s: %s with more than %u u64values\n",
state->global->msgprefix, nbobjs*nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
tmp = buffer; tmp = buffer;
@ -1364,8 +1396,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
ret = state->global->close_tag(&childstate); ret = state->global->close_tag(&childstate);
if (ret < 0) { if (ret < 0) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with more than %u indexes\n", fprintf(stderr, "%s: %s with more than %u indexes\n",
state->global->msgprefix, nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
@ -1374,56 +1406,60 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
if (nr_indexes != nbobjs) { if (nr_indexes != nbobjs) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with less than %u indexes\n", fprintf(stderr, "%s: %s with less than %u indexes\n",
state->global->msgprefix, nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
if (nr_u64values != nbobjs*nbobjs) { if (nr_u64values != nbobjs*nbobjs) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: distance2 with less than %u u64values\n", fprintf(stderr, "%s: %s with less than %u u64values\n",
state->global->msgprefix, nbobjs*nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs);
goto out_with_arrays; goto out_with_arrays;
} }
if (nbobjs < 2) { if (nbobjs < 2) {
/* distances with a single object are useless, even if the XML isn't invalid */ /* distances with a single object are useless, even if the XML isn't invalid */
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring distances2 with only %u objects\n", fprintf(stderr, "%s: ignoring %s with only %u objects\n",
state->global->msgprefix, nbobjs); state->global->msgprefix, _TAG_NAME, nbobjs);
goto out_ignore; goto out_ignore;
} }
if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) { if (unique_type == HWLOC_OBJ_PU || unique_type == HWLOC_OBJ_NUMANODE) {
if (!os_indexing) { if (!os_indexing) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n", fprintf(stderr, "%s: ignoring PU or NUMA %s without os_indexing\n",
state->global->msgprefix); state->global->msgprefix, _TAG_NAME);
goto out_ignore; goto out_ignore;
} }
} else { } else {
if (!gp_indexing) { if (!gp_indexing) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n", fprintf(stderr, "%s: ignoring !PU or !NUMA %s without gp_indexing\n",
state->global->msgprefix); state->global->msgprefix, _TAG_NAME);
goto out_ignore; goto out_ignore;
} }
} }
hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0); hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0);
/* prevent freeing below */ /* prevent freeing below */
indexes = NULL; indexes = NULL;
u64values = NULL; u64values = NULL;
different_types = NULL;
out_ignore: out_ignore:
free(different_types);
free(indexes); free(indexes);
free(u64values); free(u64values);
return state->global->close_tag(state); return state->global->close_tag(state);
out_with_arrays: out_with_arrays:
free(different_types);
free(indexes); free(indexes);
free(u64values); free(u64values);
out: out:
return -1; return -1;
#undef _TAG_NAME
} }
static int static int
@ -1625,8 +1661,12 @@ hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, flo
/* this canNOT be the first XML call */ /* this canNOT be the first XML call */
static int static int
hwloc_look_xml(struct hwloc_backend *backend) hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
{ {
/*
* This backend enforces !topology->is_thissystem by default.
*/
struct hwloc_topology *topology = backend->topology; struct hwloc_topology *topology = backend->topology;
struct hwloc_xml_backend_data_s *data = backend->private_data; struct hwloc_xml_backend_data_s *data = backend->private_data;
struct hwloc__xml_import_state_s state, childstate; struct hwloc__xml_import_state_s state, childstate;
@ -1634,9 +1674,10 @@ hwloc_look_xml(struct hwloc_backend *backend)
char *tag; char *tag;
int gotignored = 0; int gotignored = 0;
hwloc_localeswitch_declare; hwloc_localeswitch_declare;
char *env;
int ret; int ret;
assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL);
state.global = data; state.global = data;
assert(!root->cpuset); assert(!root->cpuset);
@ -1647,9 +1688,6 @@ hwloc_look_xml(struct hwloc_backend *backend)
data->first_numanode = data->last_numanode = NULL; data->first_numanode = data->last_numanode = NULL;
data->first_v1dist = data->last_v1dist = NULL; data->first_v1dist = data->last_v1dist = NULL;
env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS");
data->dont_merge_die_groups = env && atoi(env);
ret = data->look_init(data, &state); ret = data->look_init(data, &state);
if (ret < 0) if (ret < 0)
goto failed; goto failed;
@ -1684,15 +1722,20 @@ hwloc_look_xml(struct hwloc_backend *backend)
goto failed; goto failed;
if (!ret) if (!ret)
break; break;
if (strcmp(tag, "distances2")) { if (!strcmp(tag, "distances2")) {
ret = hwloc__xml_v2import_distances(topology, &childstate, 0);
if (ret < 0)
goto failed;
} else if (!strcmp(tag, "distances2hetero")) {
ret = hwloc__xml_v2import_distances(topology, &childstate, 1);
if (ret < 0)
goto failed;
} else {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n", fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n",
data->msgprefix, tag); data->msgprefix, tag);
goto done; goto done;
} }
ret = hwloc__xml_v2import_distances(topology, &childstate);
if (ret < 0)
goto failed;
state.global->close_child(&childstate); state.global->close_child(&childstate);
} }
} }
@ -1742,8 +1785,8 @@ done:
i<nbobjs; i<nbobjs;
i++, node = node->next_cousin) i++, node = node->next_cousin)
objs[i] = node; objs[i] = node;
hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0); hwloc_internal_distances_add(topology, NULL, nbobjs, objs, values, v1dist->kind, 0);
} else { } else {
free(objs); free(objs);
free(values); free(values);
@ -1791,9 +1834,11 @@ hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
/* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */
topology->support.discovery->pu = 1; topology->support.discovery->pu = 1;
topology->support.discovery->disallowed_pu = 1;
if (data->nbnumanodes) { if (data->nbnumanodes) {
topology->support.discovery->numa = 1; topology->support.discovery->numa = 1;
topology->support.discovery->numa_memory = 1; // FIXME topology->support.discovery->numa_memory = 1; // FIXME
topology->support.discovery->disallowed_numa = 1;
} }
if (data->look_done) if (data->look_done)
@ -1936,6 +1981,9 @@ hwloc__xml_export_safestrdup(const char *old)
char *new = malloc(strlen(old)+1); char *new = malloc(strlen(old)+1);
char *dst = new; char *dst = new;
const char *src = old; const char *src = old;
if (!new)
return NULL;
while (*src) { while (*src) {
if (HWLOC_XML_CHAR_VALID(*src)) if (HWLOC_XML_CHAR_VALID(*src))
*(dst++) = *src; *(dst++) = *src;
@ -1955,6 +2003,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
if (v1export && obj->type == HWLOC_OBJ_PACKAGE) if (v1export && obj->type == HWLOC_OBJ_PACKAGE)
state->new_prop(state, "type", "Socket"); state->new_prop(state, "type", "Socket");
else if (v1export && obj->type == HWLOC_OBJ_DIE)
state->new_prop(state, "type", "Group");
else if (v1export && hwloc__obj_type_is_cache(obj->type)) else if (v1export && hwloc__obj_type_is_cache(obj->type))
state->new_prop(state, "type", "Cache"); state->new_prop(state, "type", "Cache");
else else
@ -1966,8 +2016,23 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
} }
if (obj->cpuset) { if (obj->cpuset) {
if (v1export && obj->type == HWLOC_OBJ_NUMANODE && obj->sibling_rank > 0) { int empty_cpusets = 0;
/* v1 non-first NUMA nodes have empty cpusets */
if (v1export && obj->type == HWLOC_OBJ_NUMANODE) {
/* walk up this memory hierarchy to find-out if we are the first numa node.
* v1 non-first NUMA nodes have empty cpusets.
*/
hwloc_obj_t parent = obj;
while (!hwloc_obj_type_is_normal(parent->type)) {
if (parent->sibling_rank > 0) {
empty_cpusets = 1;
break;
}
parent = parent->parent;
}
}
if (empty_cpusets) {
state->new_prop(state, "cpuset", "0x0"); state->new_prop(state, "cpuset", "0x0");
state->new_prop(state, "online_cpuset", "0x0"); state->new_prop(state, "online_cpuset", "0x0");
state->new_prop(state, "complete_cpuset", "0x0"); state->new_prop(state, "complete_cpuset", "0x0");
@ -2024,13 +2089,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
if (obj->name) { if (obj->name) {
char *name = hwloc__xml_export_safestrdup(obj->name); char *name = hwloc__xml_export_safestrdup(obj->name);
state->new_prop(state, "name", name); if (name) {
free(name); state->new_prop(state, "name", name);
free(name);
}
} }
if (!v1export && obj->subtype) { if (!v1export && obj->subtype) {
char *subtype = hwloc__xml_export_safestrdup(obj->subtype); char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
state->new_prop(state, "subtype", subtype); if (subtype) {
free(subtype); state->new_prop(state, "subtype", subtype);
free(subtype);
}
} }
switch (obj->type) { switch (obj->type) {
@ -2057,6 +2126,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE: case HWLOC_OBJ_L3ICACHE:
case HWLOC_OBJ_MEMCACHE:
sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size); sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size);
state->new_prop(state, "cache_size", tmp); state->new_prop(state, "cache_size", tmp);
sprintf(tmp, "%u", obj->attr->cache.depth); sprintf(tmp, "%u", obj->attr->cache.depth);
@ -2125,23 +2195,34 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
for(i=0; i<obj->infos_count; i++) { for(i=0; i<obj->infos_count; i++) {
char *name = hwloc__xml_export_safestrdup(obj->infos[i].name); char *name = hwloc__xml_export_safestrdup(obj->infos[i].name);
char *value = hwloc__xml_export_safestrdup(obj->infos[i].value); char *value = hwloc__xml_export_safestrdup(obj->infos[i].value);
struct hwloc__xml_export_state_s childstate; if (name && value) {
state->new_child(state, &childstate, "info"); struct hwloc__xml_export_state_s childstate;
childstate.new_prop(&childstate, "name", name); state->new_child(state, &childstate, "info");
childstate.new_prop(&childstate, "value", value); childstate.new_prop(&childstate, "name", name);
childstate.end_object(&childstate, "info"); childstate.new_prop(&childstate, "value", value);
childstate.end_object(&childstate, "info");
}
free(name); free(name);
free(value); free(value);
} }
if (v1export && obj->subtype) { if (v1export && obj->subtype) {
char *subtype = hwloc__xml_export_safestrdup(obj->subtype); char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
if (subtype) {
struct hwloc__xml_export_state_s childstate;
int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);
state->new_child(state, &childstate, "info");
childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type");
childstate.new_prop(&childstate, "value", subtype);
childstate.end_object(&childstate, "info");
free(subtype);
}
}
if (v1export && obj->type == HWLOC_OBJ_DIE) {
struct hwloc__xml_export_state_s childstate; struct hwloc__xml_export_state_s childstate;
int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);
state->new_child(state, &childstate, "info"); state->new_child(state, &childstate, "info");
childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); childstate.new_prop(&childstate, "name", "Type");
childstate.new_prop(&childstate, "value", subtype); childstate.new_prop(&childstate, "value", "Die");
childstate.end_object(&childstate, "info"); childstate.end_object(&childstate, "info");
free(subtype);
} }
if (v1export && !obj->parent) { if (v1export && !obj->parent) {
@ -2152,19 +2233,27 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
for(dist = topology->first_dist; dist; dist = dist->next) { for(dist = topology->first_dist; dist; dist = dist->next) {
struct hwloc__xml_export_state_s childstate; struct hwloc__xml_export_state_s childstate;
unsigned nbobjs = dist->nbobjs; unsigned nbobjs = dist->nbobjs;
unsigned *logical_to_v2array;
int depth; int depth;
if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type)) if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->unique_type))
continue; continue;
if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY))
continue; continue;
{ if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
HWLOC_VLA(unsigned, logical_to_v2array, nbobjs); continue;
logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array));
if (!logical_to_v2array) {
fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n");
continue;
}
for(i=0; i<nbobjs; i++) for(i=0; i<nbobjs; i++)
logical_to_v2array[dist->objs[i]->logical_index] = i; logical_to_v2array[dist->objs[i]->logical_index] = i;
/* compute the relative depth */ /* compute the relative depth */
if (dist->type == HWLOC_OBJ_NUMANODE) { if (dist->unique_type == HWLOC_OBJ_NUMANODE) {
/* for NUMA nodes, use the highest normal-parent depth + 1 */ /* for NUMA nodes, use the highest normal-parent depth + 1 */
depth = -1; depth = -1;
for(i=0; i<nbobjs; i++) { for(i=0; i<nbobjs; i++) {
@ -2188,7 +2277,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
} }
} }
done: done:
depth = hwloc_get_type_depth(topology, dist->type) + parent_with_memory; depth = hwloc_get_type_depth(topology, dist->unique_type) + parent_with_memory;
} }
state->new_child(state, &childstate, "distances"); state->new_child(state, &childstate, "distances");
@ -2210,7 +2299,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
} }
} }
childstate.end_object(&childstate, "distances"); childstate.end_object(&childstate, "distances");
} free(logical_to_v2array);
} }
} }
@ -2243,13 +2332,90 @@ hwloc__xml_v2export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog
static void static void
hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags); hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags);
static hwloc_obj_t
hwloc__xml_v1export_object_next_numanode(hwloc_obj_t obj, hwloc_obj_t cur)
{
hwloc_obj_t parent;
if (!cur) {
/* first numa node is on the very bottom left */
cur = obj->memory_first_child;
goto find_first;
}
/* walk-up until there's a next sibling */
parent = cur;
while (1) {
if (parent->next_sibling) {
/* found a next sibling, we'll walk down-left from there */
cur = parent->next_sibling;
break;
}
parent = parent->parent;
if (parent == obj)
return NULL;
}
find_first:
while (cur->type != HWLOC_OBJ_NUMANODE)
cur = cur->memory_first_child;
assert(cur);
return cur;
}
static unsigned
hwloc__xml_v1export_object_list_numanodes(hwloc_obj_t obj, hwloc_obj_t *first_p, hwloc_obj_t **nodes_p)
{
hwloc_obj_t *nodes, cur;
int nr;
if (!obj->memory_first_child) {
*first_p = NULL;
*nodes_p = NULL;
return 0;
}
/* we're sure there's at least one numa node */
nr = hwloc_bitmap_weight(obj->nodeset);
assert(nr > 0);
/* these are local nodes, but some of them may be attached above instead of here */
nodes = calloc(nr, sizeof(*nodes));
if (!nodes) {
/* only return the first node */
cur = hwloc__xml_v1export_object_next_numanode(obj, NULL);
assert(cur);
*first_p = cur;
*nodes_p = NULL;
return 1;
}
nr = 0;
cur = NULL;
while (1) {
cur = hwloc__xml_v1export_object_next_numanode(obj, cur);
if (!cur)
break;
nodes[nr++] = cur;
}
*first_p = nodes[0];
*nodes_p = nodes;
return nr;
}
static void static void
hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags)
{ {
struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate; struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate;
hwloc_obj_t child; hwloc_obj_t child;
unsigned nr_numanodes;
hwloc_obj_t *numanodes, first_numanode;
unsigned i;
if (obj->parent->arity > 1 && obj->memory_arity > 1 && parentstate->global->v1_memory_group) { nr_numanodes = hwloc__xml_v1export_object_list_numanodes(obj, &first_numanode, &numanodes);
if (obj->parent->arity > 1 && nr_numanodes > 1 && parentstate->global->v1_memory_group) {
/* child has sibling, we must add a Group around those memory children */ /* child has sibling, we must add a Group around those memory children */
hwloc_obj_t group = parentstate->global->v1_memory_group; hwloc_obj_t group = parentstate->global->v1_memory_group;
parentstate->new_child(parentstate, &gstate, "object"); parentstate->new_child(parentstate, &gstate, "object");
@ -2266,10 +2432,8 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
} }
/* export first memory child */ /* export first memory child */
child = obj->memory_first_child;
assert(child->type == HWLOC_OBJ_NUMANODE);
state->new_child(state, &mstate, "object"); state->new_child(state, &mstate, "object");
hwloc__xml_export_object_contents (&mstate, topology, child, flags); hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags);
/* then the actual object */ /* then the actual object */
mstate.new_child(&mstate, &ostate, "object"); mstate.new_child(&mstate, &ostate, "object");
@ -2288,9 +2452,10 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
mstate.end_object(&mstate, "object"); mstate.end_object(&mstate, "object");
/* now other memory children */ /* now other memory children */
for_each_memory_child(child, obj) for(i=1; i<nr_numanodes; i++)
if (child->sibling_rank > 0) hwloc__xml_v1export_object (state, topology, numanodes[i], flags);
hwloc__xml_v1export_object (state, topology, child, flags);
free(numanodes);
if (state == &gstate) { if (state == &gstate) {
/* close group if any */ /* close group if any */
@ -2346,30 +2511,74 @@ hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog
} \ } \
} while (0) } while (0)
#define EXPORT_TYPE_GPINDEX_ARRAY(state, nr, objs, tagname, maxperline) do { \
unsigned _i = 0; \
while (_i<(nr)) { \
char _tmp[255]; /* enough for (snprintf(type+index)+space) x maxperline */ \
char _tmp2[16]; \
size_t _len = 0; \
unsigned _j; \
struct hwloc__xml_export_state_s _childstate; \
(state)->new_child(state, &_childstate, tagname); \
for(_j=0; \
_i+_j<(nr) && _j<maxperline; \
_j++) \
_len += sprintf(_tmp+_len, "%s:%llu ", hwloc_obj_type_string((objs)[_i+_j]->type), (unsigned long long) (objs)[_i+_j]->gp_index); \
_i += _j; \
sprintf(_tmp2, "%lu", (unsigned long) _len); \
_childstate.new_prop(&_childstate, "length", _tmp2); \
_childstate.add_content(&_childstate, _tmp, _len); \
_childstate.end_object(&_childstate, tagname); \
} \
} while (0)
static void
hwloc___xml_v2export_distances(hwloc__xml_export_state_t parentstate, struct hwloc_internal_distances_s *dist)
{
char tmp[255];
unsigned nbobjs = dist->nbobjs;
struct hwloc__xml_export_state_s state;
if (dist->different_types) {
parentstate->new_child(parentstate, &state, "distances2hetero");
} else {
parentstate->new_child(parentstate, &state, "distances2");
state.new_prop(&state, "type", hwloc_obj_type_string(dist->unique_type));
}
sprintf(tmp, "%u", nbobjs);
state.new_prop(&state, "nbobjs", tmp);
sprintf(tmp, "%lu", dist->kind);
state.new_prop(&state, "kind", tmp);
if (dist->name)
state.new_prop(&state, "name", dist->name);
if (!dist->different_types) {
state.new_prop(&state, "indexing",
HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type) ? "os" : "gp");
}
/* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */
if (dist->different_types) {
EXPORT_TYPE_GPINDEX_ARRAY(&state, nbobjs, dist->objs, "indexes", 10);
} else {
EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10);
}
EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10);
state.end_object(&state, dist->different_types ? "distances2hetero" : "distances2");
}
static void static void
hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology)
{ {
struct hwloc_internal_distances_s *dist; struct hwloc_internal_distances_s *dist;
for(dist = topology->first_dist; dist; dist = dist->next) { for(dist = topology->first_dist; dist; dist = dist->next)
char tmp[255]; if (!dist->different_types)
unsigned nbobjs = dist->nbobjs; hwloc___xml_v2export_distances(parentstate, dist);
struct hwloc__xml_export_state_s state; /* export homogeneous distances first in case the importer doesn't support heterogeneous and stops there */
for(dist = topology->first_dist; dist; dist = dist->next)
parentstate->new_child(parentstate, &state, "distances2"); if (dist->different_types)
hwloc___xml_v2export_distances(parentstate, dist);
state.new_prop(&state, "type", hwloc_obj_type_string(dist->type));
sprintf(tmp, "%u", nbobjs);
state.new_prop(&state, "nbobjs", tmp);
sprintf(tmp, "%lu", dist->kind);
state.new_prop(&state, "kind", tmp);
state.new_prop(&state, "indexing",
(dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp");
/* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */
EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10);
EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10);
state.end_object(&state, "distances2");
}
} }
void void
@ -2378,18 +2587,22 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top
hwloc_obj_t root = hwloc_get_root_obj(topology); hwloc_obj_t root = hwloc_get_root_obj(topology);
if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) {
if (root->memory_first_child) { hwloc_obj_t *numanodes, first_numanode;
unsigned nr_numanodes;
nr_numanodes = hwloc__xml_v1export_object_list_numanodes(root, &first_numanode, &numanodes);
if (nr_numanodes) {
/* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */ /* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */
struct hwloc__xml_export_state_s rstate, mstate; struct hwloc__xml_export_state_s rstate, mstate;
hwloc_obj_t child; hwloc_obj_t child;
unsigned i;
/* export the root */ /* export the root */
state->new_child(state, &rstate, "object"); state->new_child(state, &rstate, "object");
hwloc__xml_export_object_contents (&rstate, topology, root, flags); hwloc__xml_export_object_contents (&rstate, topology, root, flags);
/* export first memory child */ /* export first memory child */
child = root->memory_first_child;
assert(child->type == HWLOC_OBJ_NUMANODE);
rstate.new_child(&rstate, &mstate, "object"); rstate.new_child(&rstate, &mstate, "object");
hwloc__xml_export_object_contents (&mstate, topology, child, flags); hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags);
/* then its normal/io/misc children */ /* then its normal/io/misc children */
for_each_child(child, root) for_each_child(child, root)
hwloc__xml_v1export_object (&mstate, topology, child, flags); hwloc__xml_v1export_object (&mstate, topology, child, flags);
@ -2400,15 +2613,16 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top
/* close first memory child */ /* close first memory child */
mstate.end_object(&mstate, "object"); mstate.end_object(&mstate, "object");
/* now other memory children */ /* now other memory children */
for_each_memory_child(child, root) for(i=1; i<nr_numanodes; i++)
if (child->sibling_rank > 0) hwloc__xml_v1export_object (&rstate, topology, numanodes[i], flags);
hwloc__xml_v1export_object (&rstate, topology, child, flags);
/* close the root */ /* close the root */
rstate.end_object(&rstate, "object"); rstate.end_object(&rstate, "object");
} else { } else {
hwloc__xml_v1export_object(state, topology, root, flags); hwloc__xml_v1export_object(state, topology, root, flags);
} }
free(numanodes);
} else { } else {
hwloc__xml_v2export_object (state, topology, root, flags); hwloc__xml_v2export_object (state, topology, root, flags);
hwloc__xml_v2export_distances (state, topology); hwloc__xml_v2export_distances (state, topology);
@ -2788,7 +3002,9 @@ hwloc_xml_backend_disable(struct hwloc_backend *backend)
} }
static struct hwloc_backend * static struct hwloc_backend *
hwloc_xml_component_instantiate(struct hwloc_disc_component *component, hwloc_xml_component_instantiate(struct hwloc_topology *topology,
struct hwloc_disc_component *component,
unsigned excluded_phases __hwloc_attribute_unused,
const void *_data1, const void *_data1,
const void *_data2, const void *_data2,
const void *_data3) const void *_data3)
@ -2816,7 +3032,7 @@ hwloc_xml_component_instantiate(struct hwloc_disc_component *component,
} }
} }
backend = hwloc_backend_alloc(component); backend = hwloc_backend_alloc(topology, component);
if (!backend) if (!backend)
goto out; goto out;
@ -2868,8 +3084,8 @@ retry:
} }
static struct hwloc_disc_component hwloc_xml_disc_component = { static struct hwloc_disc_component hwloc_xml_disc_component = {
HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
"xml", "xml",
HWLOC_DISC_PHASE_GLOBAL,
~0, ~0,
hwloc_xml_component_instantiate, hwloc_xml_component_instantiate,
30, 30,

File diff suppressed because it is too large Load diff

View file

@ -1,16 +1,17 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2019 Inria. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2010 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include <private/autogen/config.h> #include "private/autogen/config.h"
#include <hwloc.h> #include "hwloc.h"
#include <private/private.h> #include "private/private.h"
#include <private/misc.h> #include "private/misc.h"
#include <private/debug.h> #include "private/debug.h"
#ifdef HAVE_STRINGS_H #ifdef HAVE_STRINGS_H
#include <strings.h> #include <strings.h>
#endif /* HAVE_STRINGS_H */ #endif /* HAVE_STRINGS_H */
@ -40,6 +41,8 @@ hwloc_get_depth_type (hwloc_topology_t topology, int depth)
return HWLOC_OBJ_OS_DEVICE; return HWLOC_OBJ_OS_DEVICE;
case HWLOC_TYPE_DEPTH_MISC: case HWLOC_TYPE_DEPTH_MISC:
return HWLOC_OBJ_MISC; return HWLOC_OBJ_MISC;
case HWLOC_TYPE_DEPTH_MEMCACHE:
return HWLOC_OBJ_MEMCACHE;
default: default:
return HWLOC_OBJ_TYPE_NONE; return HWLOC_OBJ_TYPE_NONE;
} }
@ -237,8 +240,10 @@ hwloc_obj_type_string (hwloc_obj_type_t obj)
case HWLOC_OBJ_MACHINE: return "Machine"; case HWLOC_OBJ_MACHINE: return "Machine";
case HWLOC_OBJ_MISC: return "Misc"; case HWLOC_OBJ_MISC: return "Misc";
case HWLOC_OBJ_GROUP: return "Group"; case HWLOC_OBJ_GROUP: return "Group";
case HWLOC_OBJ_MEMCACHE: return "MemCache";
case HWLOC_OBJ_NUMANODE: return "NUMANode"; case HWLOC_OBJ_NUMANODE: return "NUMANode";
case HWLOC_OBJ_PACKAGE: return "Package"; case HWLOC_OBJ_PACKAGE: return "Package";
case HWLOC_OBJ_DIE: return "Die";
case HWLOC_OBJ_L1CACHE: return "L1Cache"; case HWLOC_OBJ_L1CACHE: return "L1Cache";
case HWLOC_OBJ_L2CACHE: return "L2Cache"; case HWLOC_OBJ_L2CACHE: return "L2Cache";
case HWLOC_OBJ_L3CACHE: return "L3Cache"; case HWLOC_OBJ_L3CACHE: return "L3Cache";
@ -256,6 +261,41 @@ hwloc_obj_type_string (hwloc_obj_type_t obj)
} }
} }
/* Check if string matches the given type at least on minmatch chars.
* On success, return the address of where matching stop, either pointing to \0 or to a suffix (digits, colon, etc)
* On error, return NULL;
*/
static __hwloc_inline const char *
hwloc__type_match(const char *string,
const char *type, /* type must be lowercase */
size_t minmatch)
{
const char *s, *t;
unsigned i;
for(i=0, s=string, t=type; ; i++, s++, t++) {
if (!*s) {
/* string ends before type */
if (i<minmatch)
return NULL;
else
return s;
}
if (*s != *t && *s != *t + 'A' - 'a') {
/* string is different */
if ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || *s == '-')
/* valid character that doesn't match */
return NULL;
/* invalid character, we reached the end of the type namein string, stop matching here */
if (i<minmatch)
return NULL;
else
return s;
}
}
return NULL;
}
int int
hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep, hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep,
union hwloc_obj_attr_u *attrp, size_t attrsize) union hwloc_obj_attr_u *attrp, size_t attrsize)
@ -267,86 +307,102 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep,
hwloc_obj_osdev_type_t ostype = (hwloc_obj_osdev_type_t) -1; hwloc_obj_osdev_type_t ostype = (hwloc_obj_osdev_type_t) -1;
char *end; char *end;
/* never match the ending \0 since we want to match things like core:2 too. /* Never match the ending \0 since we want to match things like core:2 too.
* just use hwloc_strncasecmp() everywhere. * We'll only compare the beginning substring only made of letters and dash.
*/ */
/* types without a custom depth */ /* types without a custom depth */
/* osdev subtype first to avoid conflicts coproc/core etc */ /* osdev subtype first to avoid conflicts coproc/core etc */
if (!hwloc_strncasecmp(string, "os", 2)) { if (hwloc__type_match(string, "osdev", 2)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
} else if (!hwloc_strncasecmp(string, "bloc", 4)) { } else if (hwloc__type_match(string, "block", 4)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_BLOCK; ostype = HWLOC_OBJ_OSDEV_BLOCK;
} else if (!hwloc_strncasecmp(string, "net", 3)) { } else if (hwloc__type_match(string, "network", 3)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_NETWORK; ostype = HWLOC_OBJ_OSDEV_NETWORK;
} else if (!hwloc_strncasecmp(string, "openfab", 7)) { } else if (hwloc__type_match(string, "openfabrics", 7)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_OPENFABRICS; ostype = HWLOC_OBJ_OSDEV_OPENFABRICS;
} else if (!hwloc_strncasecmp(string, "dma", 3)) { } else if (hwloc__type_match(string, "dma", 3)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_DMA; ostype = HWLOC_OBJ_OSDEV_DMA;
} else if (!hwloc_strncasecmp(string, "gpu", 3)) { } else if (hwloc__type_match(string, "gpu", 3)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_GPU; ostype = HWLOC_OBJ_OSDEV_GPU;
} else if (!hwloc_strncasecmp(string, "copro", 5) } else if (hwloc__type_match(string, "coproc", 5)
|| !hwloc_strncasecmp(string, "co-pro", 6)) { || hwloc__type_match(string, "co-processor", 6)) {
type = HWLOC_OBJ_OS_DEVICE; type = HWLOC_OBJ_OS_DEVICE;
ostype = HWLOC_OBJ_OSDEV_COPROC; ostype = HWLOC_OBJ_OSDEV_COPROC;
} else if (!hwloc_strncasecmp(string, "machine", 2)) { } else if (hwloc__type_match(string, "machine", 2)) {
type = HWLOC_OBJ_MACHINE; type = HWLOC_OBJ_MACHINE;
} else if (!hwloc_strncasecmp(string, "node", 2) } else if (hwloc__type_match(string, "numanode", 2)
|| !hwloc_strncasecmp(string, "numa", 2)) { /* matches node and numanode */ || hwloc__type_match(string, "node", 2)) { /* for convenience */
type = HWLOC_OBJ_NUMANODE; type = HWLOC_OBJ_NUMANODE;
} else if (!hwloc_strncasecmp(string, "package", 2) } else if (hwloc__type_match(string, "memcache", 5)
|| !hwloc_strncasecmp(string, "socket", 2)) { /* backward compat with v1.10 */ || hwloc__type_match(string, "memory-side cache", 8)) {
type = HWLOC_OBJ_MEMCACHE;
} else if (hwloc__type_match(string, "package", 2)
|| hwloc__type_match(string, "socket", 2)) { /* backward compat with v1.10 */
type = HWLOC_OBJ_PACKAGE; type = HWLOC_OBJ_PACKAGE;
} else if (!hwloc_strncasecmp(string, "core", 2)) { } else if (hwloc__type_match(string, "die", 2)) {
type = HWLOC_OBJ_DIE;
} else if (hwloc__type_match(string, "core", 2)) {
type = HWLOC_OBJ_CORE; type = HWLOC_OBJ_CORE;
} else if (!hwloc_strncasecmp(string, "pu", 2)) { } else if (hwloc__type_match(string, "pu", 2)) {
type = HWLOC_OBJ_PU; type = HWLOC_OBJ_PU;
} else if (!hwloc_strncasecmp(string, "misc", 4)) { } else if (hwloc__type_match(string, "misc", 4)) {
type = HWLOC_OBJ_MISC; type = HWLOC_OBJ_MISC;
} else if (!hwloc_strncasecmp(string, "bridge", 4)) { } else if (hwloc__type_match(string, "bridge", 4)) {
type = HWLOC_OBJ_BRIDGE; type = HWLOC_OBJ_BRIDGE;
} else if (!hwloc_strncasecmp(string, "hostbridge", 6)) { } else if (hwloc__type_match(string, "hostbridge", 6)) {
type = HWLOC_OBJ_BRIDGE; type = HWLOC_OBJ_BRIDGE;
ubtype = HWLOC_OBJ_BRIDGE_HOST; ubtype = HWLOC_OBJ_BRIDGE_HOST;
} else if (!hwloc_strncasecmp(string, "pcibridge", 5)) { } else if (hwloc__type_match(string, "pcibridge", 5)) {
type = HWLOC_OBJ_BRIDGE; type = HWLOC_OBJ_BRIDGE;
ubtype = HWLOC_OBJ_BRIDGE_PCI; ubtype = HWLOC_OBJ_BRIDGE_PCI;
} else if (!hwloc_strncasecmp(string, "pci", 3)) { } else if (hwloc__type_match(string, "pcidev", 3)) {
type = HWLOC_OBJ_PCI_DEVICE; type = HWLOC_OBJ_PCI_DEVICE;
/* types with depthattr */ /* types with depthattr */
} else if ((string[0] == 'l' || string[0] == 'L') && string[1] >= '0' && string[1] <= '9') { } else if ((string[0] == 'l' || string[0] == 'L') && string[1] >= '0' && string[1] <= '9') {
char *suffix;
depthattr = strtol(string+1, &end, 10); depthattr = strtol(string+1, &end, 10);
if (*end == 'i') { if (*end == 'i' || *end == 'I') {
if (depthattr >= 1 && depthattr <= 3) { if (depthattr >= 1 && depthattr <= 3) {
type = HWLOC_OBJ_L1ICACHE + depthattr-1; type = HWLOC_OBJ_L1ICACHE + depthattr-1;
cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION; cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION;
suffix = end+1;
} else } else
return -1; return -1;
} else { } else {
if (depthattr >= 1 && depthattr <= 5) { if (depthattr >= 1 && depthattr <= 5) {
type = HWLOC_OBJ_L1CACHE + depthattr-1; type = HWLOC_OBJ_L1CACHE + depthattr-1;
cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED; if (*end == 'd' || *end == 'D') {
cachetypeattr = HWLOC_OBJ_CACHE_DATA;
suffix = end+1;
} else if (*end == 'u' || *end == 'U') {
cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED;
suffix = end+1;
} else {
cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED;
suffix = end;
}
} else } else
return -1; return -1;
} }
/* check whether the optional suffix matches "cache" */
if (!hwloc__type_match(suffix, "cache", 0))
return -1;
} else if (!hwloc_strncasecmp(string, "group", 2)) { } else if ((end = (char *) hwloc__type_match(string, "group", 2)) != NULL) {
size_t length;
type = HWLOC_OBJ_GROUP; type = HWLOC_OBJ_GROUP;
length = strcspn(string, "0123456789"); if (*end >= '0' && *end <= '9') {
if (length <= 5 && !hwloc_strncasecmp(string, "group", length) depthattr = strtol(end, &end, 10);
&& string[length] >= '0' && string[length] <= '9') {
depthattr = strtol(string+length, &end, 10);
} }
} else } else
@ -421,7 +477,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
case HWLOC_OBJ_MISC: case HWLOC_OBJ_MISC:
case HWLOC_OBJ_MACHINE: case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_NUMANODE: case HWLOC_OBJ_NUMANODE:
case HWLOC_OBJ_MEMCACHE:
case HWLOC_OBJ_PACKAGE: case HWLOC_OBJ_PACKAGE:
case HWLOC_OBJ_DIE:
case HWLOC_OBJ_CORE: case HWLOC_OBJ_CORE:
case HWLOC_OBJ_PU: case HWLOC_OBJ_PU:
return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type));
@ -523,6 +581,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE: case HWLOC_OBJ_L3ICACHE:
case HWLOC_OBJ_MEMCACHE:
if (verbose) { if (verbose) {
char assoc[32]; char assoc[32];
if (obj->attr->cache.associativity == -1) if (obj->attr->cache.associativity == -1)

View file

@ -50,6 +50,8 @@ xmrig::App::App(Process *process)
xmrig::App::~App() xmrig::App::~App()
{ {
Cpu::release();
delete m_signals; delete m_signals;
delete m_console; delete m_console;
delete m_controller; delete m_controller;

View file

@ -39,6 +39,11 @@
#include "version.h" #include "version.h"
#ifdef XMRIG_ALGO_RANDOMX
# include "crypto/rx/RxConfig.h"
#endif
namespace xmrig { namespace xmrig {
@ -59,24 +64,36 @@ inline static const char *asmName(Assembly::Id assembly)
#endif #endif
static void print_memory(Config *config) { static void print_memory(Config *config)
# ifdef _WIN32 {
# ifdef XMRIG_OS_WIN
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
"HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled"));
# else
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled"));
# endif
# ifdef XMRIG_ALGO_RANDOMX
# ifdef XMRIG_OS_LINUX
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
"1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable")));
# else
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable"));
# endif
# endif # endif
} }
static void print_cpu(Config *) static void print_cpu(Config *)
{ {
const ICpuInfo *info = Cpu::info(); const auto info = Cpu::info();
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES", Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES",
"CPU", "CPU",
info->brand(), info->brand(),
info->packages(), info->packages(),
info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-", info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-",
info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-" info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-"
); );
# if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC) # if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC)
Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB") Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB")

View file

@ -25,6 +25,7 @@
#include "crypto/rx/RxConfig.h" #include "crypto/rx/RxConfig.h"
#include "crypto/common/HugePagesInfo.h"
#include <cstdint> #include <cstdint>
@ -44,9 +45,9 @@ class IRxStorage
public: public:
virtual ~IRxStorage() = default; virtual ~IRxStorage() = default;
virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; virtual HugePagesInfo hugePages() const = 0;
virtual std::pair<uint32_t, uint32_t> hugePages() const = 0; virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0;
virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) = 0; virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) = 0;
}; };

View file

@ -68,17 +68,15 @@ static std::mutex mutex;
struct CpuLaunchStatus struct CpuLaunchStatus
{ {
public: public:
inline size_t hugePages() const { return m_hugePages; } inline const HugePagesInfo &hugePages() const { return m_hugePages; }
inline size_t memory() const { return m_ways * m_memory; } inline size_t memory() const { return m_ways * m_memory; }
inline size_t pages() const { return m_pages; } inline size_t threads() const { return m_threads; }
inline size_t threads() const { return m_threads; } inline size_t ways() const { return m_ways; }
inline size_t ways() const { return m_ways; }
inline void start(const std::vector<CpuLaunchData> &threads, size_t memory) inline void start(const std::vector<CpuLaunchData> &threads, size_t memory)
{ {
m_hugePages = 0; m_hugePages.reset();
m_memory = memory; m_memory = memory;
m_pages = 0;
m_started = 0; m_started = 0;
m_errors = 0; m_errors = 0;
m_threads = threads.size(); m_threads = threads.size();
@ -89,11 +87,9 @@ public:
inline bool started(IWorker *worker, bool ready) inline bool started(IWorker *worker, bool ready)
{ {
if (ready) { if (ready) {
auto hugePages = worker->memory()->hugePages();
m_started++; m_started++;
m_hugePages += hugePages.first;
m_pages += hugePages.second; m_hugePages += worker->memory()->hugePages();
m_ways += worker->intensity(); m_ways += worker->intensity();
} }
else { else {
@ -115,19 +111,18 @@ public:
tag, tag,
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S, m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
m_started, m_threads, m_ways, m_started, m_threads, m_ways,
(m_hugePages == m_pages ? GREEN_BOLD_S : (m_hugePages == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), (m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
m_hugePages == 0 ? 0.0 : static_cast<double>(m_hugePages) / m_pages * 100.0, m_hugePages.percent(),
m_hugePages, m_pages, m_hugePages.allocated, m_hugePages.total,
memory() / 1024, memory() / 1024,
Chrono::steadyMSecs() - m_ts Chrono::steadyMSecs() - m_ts
); );
} }
private: private:
HugePagesInfo m_hugePages;
size_t m_errors = 0; size_t m_errors = 0;
size_t m_hugePages = 0;
size_t m_memory = 0; size_t m_memory = 0;
size_t m_pages = 0;
size_t m_started = 0; size_t m_started = 0;
size_t m_threads = 0; size_t m_threads = 0;
size_t m_ways = 0; size_t m_ways = 0;
@ -169,18 +164,17 @@ public:
rapidjson::Value hugePages(int version, rapidjson::Document &doc) rapidjson::Value hugePages(int version, rapidjson::Document &doc)
{ {
std::pair<unsigned, unsigned> pages(0, 0); HugePagesInfo pages;
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
if (algo.family() == Algorithm::RANDOM_X) { if (algo.family() == Algorithm::RANDOM_X) {
pages = Rx::hugePages(); pages += Rx::hugePages();
} }
# endif # endif
mutex.lock(); mutex.lock();
pages.first += status.hugePages(); pages += status.hugePages();
pages.second += status.pages();
mutex.unlock(); mutex.unlock();
@ -188,11 +182,11 @@ public:
if (version > 1) { if (version > 1) {
hugepages.SetArray(); hugepages.SetArray();
hugepages.PushBack(pages.first, doc.GetAllocator()); hugepages.PushBack(static_cast<uint64_t>(pages.allocated), doc.GetAllocator());
hugepages.PushBack(pages.second, doc.GetAllocator()); hugepages.PushBack(static_cast<uint64_t>(pages.total), doc.GetAllocator());
} }
else { else {
hugepages = pages.first == pages.second; hugepages = pages.isFullyAllocated();
} }
return hugepages; return hugepages;

View file

@ -119,10 +119,10 @@ std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, cons
void xmrig::CpuConfig::read(const rapidjson::Value &value) void xmrig::CpuConfig::read(const rapidjson::Value &value)
{ {
if (value.IsObject()) { if (value.IsObject()) {
m_enabled = Json::getBool(value, kEnabled, m_enabled); m_enabled = Json::getBool(value, kEnabled, m_enabled);
m_hugePages = Json::getBool(value, kHugePages, m_hugePages); m_hugePages = Json::getBool(value, kHugePages, m_hugePages);
m_limit = Json::getUint(value, kMaxThreadsHint, m_limit); m_limit = Json::getUint(value, kMaxThreadsHint, m_limit);
m_yield = Json::getBool(value, kYield, m_yield); m_yield = Json::getBool(value, kYield, m_yield);
setAesMode(Json::getValue(value, kHwAes)); setAesMode(Json::getValue(value, kHwAes));
setPriority(Json::getInt(value, kPriority, -1)); setPriority(Json::getInt(value, kPriority, -1));

View file

@ -60,6 +60,7 @@ public:
inline const String &argon2Impl() const { return m_argon2Impl; } inline const String &argon2Impl() const { return m_argon2Impl; }
inline const Threads<CpuThreads> &threads() const { return m_threads; } inline const Threads<CpuThreads> &threads() const { return m_threads; }
inline int priority() const { return m_priority; } inline int priority() const { return m_priority; }
inline uint32_t limit() const { return m_limit; }
private: private:
void generate(); void generate();

View file

@ -63,7 +63,7 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
m_miner(data.miner), m_miner(data.miner),
m_ctx() m_ctx()
{ {
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, true, m_node); m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, m_node);
} }
@ -96,7 +96,7 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
} }
if (!m_vm) { if (!m_vm) {
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES); m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly);
} }
} }
#endif #endif

View file

@ -37,6 +37,12 @@ namespace xmrig {
class ICpuInfo class ICpuInfo
{ {
public: public:
enum Vendor {
VENDOR_UNKNOWN,
VENDOR_INTEL,
VENDOR_AMD
};
virtual ~ICpuInfo() = default; virtual ~ICpuInfo() = default;
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) # if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
@ -48,6 +54,7 @@ public:
virtual Assembly::Id assembly() const = 0; virtual Assembly::Id assembly() const = 0;
virtual bool hasAES() const = 0; virtual bool hasAES() const = 0;
virtual bool hasAVX2() const = 0; virtual bool hasAVX2() const = 0;
virtual bool hasOneGbPages() const = 0;
virtual const char *backend() const = 0; virtual const char *backend() const = 0;
virtual const char *brand() const = 0; virtual const char *brand() const = 0;
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0; virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;
@ -57,6 +64,7 @@ public:
virtual size_t nodes() const = 0; virtual size_t nodes() const = 0;
virtual size_t packages() const = 0; virtual size_t packages() const = 0;
virtual size_t threads() const = 0; virtual size_t threads() const = 0;
virtual Vendor vendor() const = 0;
}; };

View file

@ -22,6 +22,17 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "backend/cpu/platform/AdvancedCpuInfo.h"
#include "3rdparty/libcpuid/libcpuid.h"
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <cpuid.h>
#endif
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
@ -29,10 +40,6 @@
#include <cstring> #include <cstring>
#include "3rdparty/libcpuid/libcpuid.h"
#include "backend/cpu/platform/AdvancedCpuInfo.h"
namespace xmrig { namespace xmrig {
@ -54,11 +61,38 @@ static inline void cpu_brand_string(char out[64], const char *in) {
} }
static inline void cpuid(uint32_t level, int32_t output[4])
{
memset(output, 0, sizeof(int32_t) * 4);
# ifdef _MSC_VER
__cpuid(output, static_cast<int>(level));
# else
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
# endif
}
static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit)
{
int32_t cpu_info[4] = { 0 };
cpuid(level, cpu_info);
return (cpu_info[reg] & bit) != 0;
}
static inline bool has_pdpe1gb()
{
return has_feature(0x80000001, 3, 1 << 26);
}
} // namespace xmrig } // namespace xmrig
xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
m_brand() m_pdpe1gb(has_pdpe1gb())
{ {
struct cpu_raw_data_t raw = {}; struct cpu_raw_data_t raw = {};
struct cpu_id_t data = {}; struct cpu_id_t data = {};
@ -69,21 +103,28 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
cpu_brand_string(m_brand, data.brand_str); cpu_brand_string(m_brand, data.brand_str);
snprintf(m_backend, sizeof m_backend, "libcpuid/%s", cpuid_lib_version()); snprintf(m_backend, sizeof m_backend, "libcpuid/%s", cpuid_lib_version());
if (data.vendor == ::VENDOR_INTEL) {
m_vendor = VENDOR_INTEL;
}
else if (data.vendor == ::VENDOR_AMD) {
m_vendor = VENDOR_AMD;
}
m_threads = static_cast<size_t>(data.total_logical_cpus); m_threads = static_cast<size_t>(data.total_logical_cpus);
m_packages = std::max<size_t>(threads() / static_cast<size_t>(data.num_logical_cpus), 1); m_packages = std::max<size_t>(threads() / static_cast<size_t>(data.num_logical_cpus), 1);
m_cores = static_cast<size_t>(data.num_cores) * m_packages; m_cores = static_cast<size_t>(data.num_cores) * m_packages;
m_L3 = data.l3_cache > 0 ? static_cast<size_t>(data.l3_cache) * m_packages : 0; m_L3 = data.l3_cache > 0 ? static_cast<size_t>(data.l3_cache) * m_packages : 0;
const size_t l2 = static_cast<size_t>(data.l2_cache); const auto l2 = static_cast<size_t>(data.l2_cache);
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 // Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { if (m_vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) {
m_L2 = l2 * (cores() / 2) * m_packages; m_L2 = l2 * (cores() / 2) * m_packages;
m_L2_exclusive = true; m_L2_exclusive = true;
} }
// Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue // Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue
// These processors have L2 cache shared by 2 cores. // These processors have L2 cache shared by 2 cores.
else if (data.vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) { else if (m_vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) {
size_t l2_count_per_socket = cores() > 1 ? cores() / 2 : 1; size_t l2_count_per_socket = cores() > 1 ? cores() / 2 : 1;
m_L2 = data.l2_cache > 0 ? l2 * l2_count_per_socket * m_packages : 0; m_L2 = data.l2_cache > 0 ? l2 * l2_count_per_socket * m_packages : 0;
} }
@ -97,10 +138,10 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
if (data.flags[CPU_FEATURE_AES]) { if (data.flags[CPU_FEATURE_AES]) {
m_aes = true; m_aes = true;
if (data.vendor == VENDOR_AMD) { if (m_vendor == VENDOR_AMD) {
m_assembly = (data.ext_family >= 23) ? Assembly::RYZEN : Assembly::BULLDOZER; m_assembly = (data.ext_family >= 23) ? Assembly::RYZEN : Assembly::BULLDOZER;
} }
else if (data.vendor == VENDOR_INTEL) { else if (m_vendor == VENDOR_INTEL) {
m_assembly = Assembly::INTEL; m_assembly = Assembly::INTEL;
} }
} }

View file

@ -43,6 +43,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; } inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; } inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; } inline bool hasAVX2() const override { return m_avx2; }
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *backend() const override { return m_backend; } inline const char *backend() const override { return m_backend; }
inline const char *brand() const override { return m_brand; } inline const char *brand() const override { return m_brand; }
inline size_t cores() const override { return m_cores; } inline size_t cores() const override { return m_cores; }
@ -51,19 +52,22 @@ protected:
inline size_t nodes() const override { return 0; } inline size_t nodes() const override { return 0; }
inline size_t packages() const override { return m_packages; } inline size_t packages() const override { return m_packages; }
inline size_t threads() const override { return m_threads; } inline size_t threads() const override { return m_threads; }
inline Vendor vendor() const override { return m_vendor; }
private: private:
Assembly m_assembly; Assembly m_assembly;
bool m_aes = false; bool m_aes = false;
bool m_avx2 = false; bool m_avx2 = false;
bool m_L2_exclusive = false; bool m_L2_exclusive = false;
char m_backend[32]; char m_backend[32]{};
char m_brand[64 + 5]; char m_brand[64 + 5]{};
const bool m_pdpe1gb = false;
size_t m_cores = 0; size_t m_cores = 0;
size_t m_L2 = 0; size_t m_L2 = 0;
size_t m_L3 = 0; size_t m_L3 = 0;
size_t m_packages = 1; size_t m_packages = 1;
size_t m_threads = 0; size_t m_threads = 0;
Vendor m_vendor = VENDOR_UNKNOWN;
}; };

View file

@ -23,7 +23,7 @@
*/ */
#include <algorithm> #include <algorithm>
#include <string.h> #include <cstring>
#include <thread> #include <thread>
@ -45,6 +45,10 @@
# define bit_AVX2 (1 << 5) # define bit_AVX2 (1 << 5)
#endif #endif
#ifndef bit_PDPE1GB
# define bit_PDPE1GB (1 << 26)
#endif
#include "backend/cpu/platform/BasicCpuInfo.h" #include "backend/cpu/platform/BasicCpuInfo.h"
#include "crypto/common/Assembly.h" #include "crypto/common/Assembly.h"
@ -53,6 +57,7 @@
#define VENDOR_ID (0) #define VENDOR_ID (0)
#define PROCESSOR_INFO (1) #define PROCESSOR_INFO (1)
#define EXTENDED_FEATURES (7) #define EXTENDED_FEATURES (7)
#define PROCESSOR_EXT_INFO (0x80000001)
#define PROCESSOR_BRAND_STRING_1 (0x80000002) #define PROCESSOR_BRAND_STRING_1 (0x80000002)
#define PROCESSOR_BRAND_STRING_2 (0x80000003) #define PROCESSOR_BRAND_STRING_2 (0x80000003)
#define PROCESSOR_BRAND_STRING_3 (0x80000004) #define PROCESSOR_BRAND_STRING_3 (0x80000004)
@ -108,7 +113,7 @@ static void cpu_brand_string(char out[64 + 6]) {
} }
static bool has_feature(uint32_t level, uint32_t reg, int32_t bit) static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit)
{ {
int32_t cpu_info[4] = { 0 }; int32_t cpu_info[4] = { 0 };
cpuid(level, cpu_info); cpuid(level, cpu_info);
@ -136,15 +141,20 @@ static inline bool has_avx2()
} }
static inline bool has_pdpe1gb()
{
return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB);
}
} // namespace xmrig } // namespace xmrig
xmrig::BasicCpuInfo::BasicCpuInfo() : xmrig::BasicCpuInfo::BasicCpuInfo() :
m_brand(),
m_threads(std::thread::hardware_concurrency()), m_threads(std::thread::hardware_concurrency()),
m_assembly(Assembly::NONE),
m_aes(has_aes_ni()), m_aes(has_aes_ni()),
m_avx2(has_avx2()) m_avx2(has_avx2()),
m_pdpe1gb(has_pdpe1gb())
{ {
cpu_brand_string(m_brand); cpu_brand_string(m_brand);
@ -160,12 +170,15 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
memcpy(vendor + 8, &data[2], 4); memcpy(vendor + 8, &data[2], 4);
if (memcmp(vendor, "AuthenticAMD", 12) == 0) { if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
m_vendor = VENDOR_AMD;
cpuid(PROCESSOR_INFO, data); cpuid(PROCESSOR_INFO, data);
const int32_t family = get_masked(data[EAX_Reg], 12, 8) + get_masked(data[EAX_Reg], 28, 20); const int32_t family = get_masked(data[EAX_Reg], 12, 8) + get_masked(data[EAX_Reg], 28, 20);
m_assembly = family >= 23 ? Assembly::RYZEN : Assembly::BULLDOZER; m_assembly = family >= 23 ? Assembly::RYZEN : Assembly::BULLDOZER;
} }
else { else if (memcmp(vendor, "GenuineIntel", 12) == 0) {
m_vendor = VENDOR_INTEL;
m_assembly = Assembly::INTEL; m_assembly = Assembly::INTEL;
} }
} }
@ -179,7 +192,7 @@ const char *xmrig::BasicCpuInfo::backend() const
} }
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
{ {
const size_t count = std::thread::hardware_concurrency(); const size_t count = std::thread::hardware_concurrency();

View file

@ -44,6 +44,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; } inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; } inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; } inline bool hasAVX2() const override { return m_avx2; }
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *brand() const override { return m_brand; } inline const char *brand() const override { return m_brand; }
inline size_t cores() const override { return 0; } inline size_t cores() const override { return 0; }
inline size_t L2() const override { return 0; } inline size_t L2() const override { return 0; }
@ -51,15 +52,18 @@ protected:
inline size_t nodes() const override { return 0; } inline size_t nodes() const override { return 0; }
inline size_t packages() const override { return 1; } inline size_t packages() const override { return 1; }
inline size_t threads() const override { return m_threads; } inline size_t threads() const override { return m_threads; }
inline Vendor vendor() const override { return m_vendor; }
protected: protected:
char m_brand[64 + 6]; char m_brand[64 + 6]{};
size_t m_threads; size_t m_threads;
private: private:
Assembly m_assembly; Assembly m_assembly = Assembly::NONE;
bool m_aes; bool m_aes = false;
const bool m_avx2; const bool m_avx2 = false;
const bool m_pdpe1gb = false;
Vendor m_vendor = VENDOR_UNKNOWN;
}; };

View file

@ -22,7 +22,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <string.h> #include <cstring>
#include <thread> #include <thread>
@ -36,10 +36,7 @@
xmrig::BasicCpuInfo::BasicCpuInfo() : xmrig::BasicCpuInfo::BasicCpuInfo() :
m_brand(), m_threads(std::thread::hardware_concurrency())
m_threads(std::thread::hardware_concurrency()),
m_aes(false),
m_avx2(false)
{ {
# ifdef XMRIG_ARMv8 # ifdef XMRIG_ARMv8
memcpy(m_brand, "ARMv8", 5); memcpy(m_brand, "ARMv8", 5);

View file

@ -262,7 +262,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3
void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const
{ {
constexpr size_t oneMiB = 1024u * 1024u; constexpr size_t oneMiB = 1024U * 1024U;
size_t PUs = countByType(cache, HWLOC_OBJ_PU); size_t PUs = countByType(cache, HWLOC_OBJ_PU);
if (PUs == 0) { if (PUs == 0) {

View file

@ -24,13 +24,14 @@
*/ */
#include <string.h>
#include <uv.h>
#include "base/io/log/backends/FileLog.h" #include "base/io/log/backends/FileLog.h"
#include <cassert>
#include <cstring>
#include <uv.h>
xmrig::FileLog::FileLog(const char *fileName) xmrig::FileLog::FileLog(const char *fileName)
{ {
uv_fs_t req; uv_fs_t req;
@ -45,13 +46,12 @@ void xmrig::FileLog::print(int, const char *line, size_t, size_t size, bool colo
return; return;
} }
# ifdef _WIN32 assert(strlen(line) == size);
uv_buf_t buf = uv_buf_init(strdup(line), static_cast<unsigned int>(size));
# else
uv_buf_t buf = uv_buf_init(strdup(line), size);
# endif
uv_fs_t *req = new uv_fs_t; uv_buf_t buf = uv_buf_init(new char[size], size);
memcpy(buf.base, line, size);
auto req = new uv_fs_t;
req->data = buf.base; req->data = buf.base;
uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, -1, FileLog::onWrite); uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, -1, FileLog::onWrite);

View file

@ -90,6 +90,8 @@ public:
RandomXInitKey = 1022, RandomXInitKey = 1022,
RandomXNumaKey = 1023, RandomXNumaKey = 1023,
RandomXModeKey = 1029, RandomXModeKey = 1029,
RandomX1GbPagesKey = 1031,
RandomXWrmsrKey = 1032,
CPUMaxThreadsKey = 1026, CPUMaxThreadsKey = 1026,
MemoryPoolKey = 1027, MemoryPoolKey = 1027,
YieldKey = 1030, YieldKey = 1030,

View file

@ -338,6 +338,10 @@ bool xmrig::Client::isCriticalError(const char *message)
return true; return true;
} }
if (strncasecmp(message, "Invalid job id", 14) == 0) {
return true;
}
return false; return false;
} }
@ -558,7 +562,7 @@ void xmrig::Client::connect(sockaddr *addr)
{ {
setState(ConnectingState); setState(ConnectingState);
uv_connect_t *req = new uv_connect_t; auto req = new uv_connect_t;
req->data = m_storage.ptr(m_key); req->data = m_storage.ptr(m_key);
m_socket = new uv_tcp_t; m_socket = new uv_tcp_t;
@ -799,7 +803,7 @@ void xmrig::Client::ping()
void xmrig::Client::read(ssize_t nread) void xmrig::Client::read(ssize_t nread)
{ {
const size_t size = static_cast<size_t>(nread); const auto size = static_cast<size_t>(nread);
if (nread > 0 && size > m_recvBuf.available()) { if (nread > 0 && size > m_recvBuf.available()) {
nread = UV_ENOBUFS; nread = UV_ENOBUFS;
@ -859,7 +863,7 @@ void xmrig::Client::reconnect()
void xmrig::Client::setState(SocketState state) void xmrig::Client::setState(SocketState state)
{ {
LOG_DEBUG("[%s] state: \"%s\"", url(), states[state]); LOG_DEBUG("[%s] state: \"%s\" -> \"%s\"", url(), states[m_state], states[state]);
if (m_state == state) { if (m_state == state) {
return; return;
@ -956,6 +960,12 @@ void xmrig::Client::onConnect(uv_connect_t *req, int status)
return; return;
} }
if (client->state() == ConnectedState) {
LOG_ERR("[%s] already connected");
return;
}
client->m_stream = static_cast<uv_stream_t*>(req->handle); client->m_stream = static_cast<uv_stream_t*>(req->handle);
client->m_stream->data = req->data; client->m_stream->data = req->data;
client->setState(ConnectedState); client->setState(ConnectedState);

View file

@ -11,12 +11,13 @@
"restricted": true "restricted": true
}, },
"autosave": true, "autosave": true,
"version": 1,
"background": false, "background": false,
"colors": true, "colors": true,
"randomx": { "randomx": {
"init": -1, "init": -1,
"mode": "auto", "mode": "auto",
"1gb-pages": false,
"wrmsr": 6,
"numa": true "numa": true
}, },
"cpu": { "cpu": {

View file

@ -234,10 +234,7 @@ public:
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
inline bool initRX() inline bool initRX() { return Rx::init(job, controller->config()->rx(), controller->config()->cpu()); }
{
return Rx::init(job, controller->config()->rx(), controller->config()->cpu().isHugePages());
}
# endif # endif

View file

@ -165,6 +165,12 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
case IConfig::RandomXModeKey: /* --randomx-mode */ case IConfig::RandomXModeKey: /* --randomx-mode */
return set(doc, kRandomX, "mode", arg); return set(doc, kRandomX, "mode", arg);
case IConfig::RandomX1GbPagesKey: /* --randomx-1gb-pages */
return set(doc, kRandomX, "1gb-pages", true);
case IConfig::RandomXWrmsrKey: /* --randomx-wrmsr */
return set(doc, kRandomX, "wrmsr", static_cast<int64_t>(strtol(arg, nullptr, 10)));
# endif # endif
# ifdef XMRIG_FEATURE_OPENCL # ifdef XMRIG_FEATURE_OPENCL

View file

@ -98,6 +98,9 @@ static const option options[] = {
{ "randomx-init", 1, nullptr, IConfig::RandomXInitKey }, { "randomx-init", 1, nullptr, IConfig::RandomXInitKey },
{ "randomx-no-numa", 0, nullptr, IConfig::RandomXNumaKey }, { "randomx-no-numa", 0, nullptr, IConfig::RandomXNumaKey },
{ "randomx-mode", 1, nullptr, IConfig::RandomXModeKey }, { "randomx-mode", 1, nullptr, IConfig::RandomXModeKey },
{ "randomx-1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey },
{ "1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey },
{ "randomx-wrmsr", 1, nullptr, IConfig::RandomXWrmsrKey },
# endif # endif
# ifdef XMRIG_FEATURE_OPENCL # ifdef XMRIG_FEATURE_OPENCL
{ "opencl", 0, nullptr, IConfig::OclKey }, { "opencl", 0, nullptr, IConfig::OclKey },

View file

@ -88,6 +88,8 @@ static inline const std::string &usage()
u += " --randomx-init=N threads count to initialize RandomX dataset\n"; u += " --randomx-init=N threads count to initialize RandomX dataset\n";
u += " --randomx-no-numa disable NUMA support for RandomX\n"; u += " --randomx-no-numa disable NUMA support for RandomX\n";
u += " --randomx-mode=MODE RandomX mode: auto, fast, light\n"; u += " --randomx-mode=MODE RandomX mode: auto, fast, light\n";
u += " --randomx-1gb-pages use 1GB hugepages for dataset (Linux only)\n";
u += " --randomx-wrmsr=N write value (0-15) to Intel MSR register 0x1a4 or do nothing (-1) (Linux only)\n";
# endif # endif
# ifdef XMRIG_FEATURE_HTTP # ifdef XMRIG_FEATURE_HTTP

View file

@ -23,40 +23,28 @@
*/ */
#include "crypto/rx/RxConfig.h" #include "crypto/common/HugePagesInfo.h"
#include "base/io/json/Json.h" #include "crypto/common/VirtualMemory.h"
#include "rapidjson/document.h"
namespace xmrig { namespace xmrig {
static const char *kInit = "init"; constexpr size_t twoMiB = 2U * 1024U * 1024U;
static const char *kMode = "mode"; constexpr size_t oneGiB = 1024U * 1024U * 1024U;
} } // namespace xmrig
rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const xmrig::HugePagesInfo::HugePagesInfo(const VirtualMemory *memory)
{ {
using namespace rapidjson; if (memory->isOneGbPages()) {
auto &allocator = doc.GetAllocator(); size = VirtualMemory::align(memory->size(), oneGiB);
total = size / oneGiB;
Value obj(kObjectType); allocated = size / oneGiB;
obj.AddMember(StringRef(kInit), m_threads, allocator); }
obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); else {
size = memory->size();
return obj; total = size / twoMiB;
} allocated = memory->isHugePages() ? total : 0;
bool xmrig::RxConfig::read(const rapidjson::Value &value)
{
if (value.IsObject()) {
m_threads = Json::getInt(value, kInit, m_threads);
m_mode = readMode(Json::getValue(value, kMode));
return true;
} }
return false;
} }

View file

@ -0,0 +1,67 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_HUGEPAGESINFO_H
#define XMRIG_HUGEPAGESINFO_H
#include <cstdint>
#include <cstddef>
namespace xmrig {
class VirtualMemory;
class HugePagesInfo
{
public:
HugePagesInfo() = default;
HugePagesInfo(const VirtualMemory *memory);
size_t allocated = 0;
size_t total = 0;
size_t size = 0;
inline bool isFullyAllocated() const { return allocated == total; }
inline double percent() const { return allocated == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
inline void reset() { allocated = 0; total = 0; size = 0; }
inline HugePagesInfo &operator+=(const HugePagesInfo &other)
{
allocated += other.allocated;
total += other.total;
size += other.size;
return *this;
}
};
} /* namespace xmrig */
#endif /* XMRIG_HUGEPAGESINFO_H */

View file

@ -0,0 +1,103 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//#include <iostream>
#include "crypto/common/LinuxMemory.h"
#include "base/io/log/Log.h"
#include "crypto/common/VirtualMemory.h"
#include "backend/cpu/Cpu.h"
#include <algorithm>
#include <fstream>
#include <string>
#include <mutex>
namespace xmrig {
static std::mutex mutex;
constexpr size_t twoMiB = 2U * 1024U * 1024U;
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
static inline std::string sysfs_path(uint32_t node, bool oneGbPages, bool nr)
{
return "/sys/devices/system/node/node" + std::to_string(node) + "/hugepages/hugepages-" + (oneGbPages ? "1048576" : "2048") + "kB/" + (nr ? "nr" : "free") + "_hugepages";
}
static inline bool write_nr_hugepages(uint32_t node, bool oneGbPages, uint64_t count) { return LinuxMemory::write(sysfs_path(node, oneGbPages, true).c_str(), count); }
static inline int64_t free_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, false).c_str()); }
static inline int64_t nr_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, true).c_str()); }
} // namespace xmrig
bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, bool oneGbPages)
{
std::lock_guard<std::mutex> lock(mutex);
const size_t pageSize = oneGbPages ? oneGiB : twoMiB;
const size_t required = VirtualMemory::align(size, pageSize) / pageSize;
const auto available = free_hugepages(node, oneGbPages);
if (available < 0 || static_cast<size_t>(available) >= required) {
return false;
}
return write_nr_hugepages(node, oneGbPages, std::max<size_t>(nr_hugepages(node, oneGbPages), 0) + (required - available));
}
bool xmrig::LinuxMemory::write(const char *path, uint64_t value)
{
std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc);
if (!file.is_open()) {
return false;
}
file << value;
file.flush();
return true;
}
int64_t xmrig::LinuxMemory::read(const char *path)
{
std::ifstream file(path);
if (!file.is_open()) {
return -1;
}
uint64_t value = 0;
file >> value;
return value;
}

View file

@ -0,0 +1,49 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_LINUXMEMORY_H
#define XMRIG_LINUXMEMORY_H
#include <cstdint>
#include <cstddef>
namespace xmrig {
class LinuxMemory
{
public:
static bool reserve(size_t size, uint32_t node, bool oneGbPages = false);
static bool write(const char *path, uint64_t value);
static int64_t read(const char *path);
};
} /* namespace xmrig */
#endif /* XMRIG_LINUXMEMORY_H */

View file

@ -47,7 +47,7 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
return; return;
} }
m_memory = new VirtualMemory(size * pageSize, hugePages, false, node); m_memory = new VirtualMemory(size * pageSize, hugePages, false, false, node);
} }

View file

@ -46,10 +46,13 @@ namespace xmrig {
static IMemoryPool *pool = nullptr; static IMemoryPool *pool = nullptr;
static std::mutex mutex; static std::mutex mutex;
constexpr size_t twoMiB = 2U * 1024U * 1024U;
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
} // namespace xmrig } // namespace xmrig
xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool usePool, uint32_t node, size_t alignSize) : xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node, size_t alignSize) :
m_size(align(size)), m_size(align(size)),
m_node(node) m_node(node)
{ {
@ -68,6 +71,10 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool usePool, u
} }
} }
if (oneGbPages && allocateOneGbPagesMemory()) {
return;
}
if (hugePages && allocateLargePagesMemory()) { if (hugePages && allocateLargePagesMemory()) {
return; return;
} }
@ -86,7 +93,7 @@ xmrig::VirtualMemory::~VirtualMemory()
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
pool->release(m_node); pool->release(m_node);
} }
else if (isHugePages()) { else if (isHugePages() || isOneGbPages()) {
freeLargePagesMemory(); freeLargePagesMemory();
} }
else { else {
@ -95,6 +102,12 @@ xmrig::VirtualMemory::~VirtualMemory()
} }
xmrig::HugePagesInfo xmrig::VirtualMemory::hugePages() const
{
return { this };
}
#ifndef XMRIG_FEATURE_HWLOC #ifndef XMRIG_FEATURE_HWLOC
uint32_t xmrig::VirtualMemory::bindToNUMANode(int64_t) uint32_t xmrig::VirtualMemory::bindToNUMANode(int64_t)
{ {

View file

@ -29,6 +29,7 @@
#include "base/tools/Object.h" #include "base/tools/Object.h"
#include "crypto/common/HugePagesInfo.h"
#include <bitset> #include <bitset>
@ -45,22 +46,23 @@ class VirtualMemory
public: public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(VirtualMemory) XMRIG_DISABLE_COPY_MOVE_DEFAULT(VirtualMemory)
VirtualMemory(size_t size, bool hugePages, bool usePool, uint32_t node = 0, size_t alignSize = 64); VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node = 0, size_t alignSize = 64);
~VirtualMemory(); ~VirtualMemory();
inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); } inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); }
inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); }
inline size_t size() const { return m_size; } inline size_t size() const { return m_size; }
inline uint8_t *raw() const { return m_scratchpad; }
inline uint8_t *scratchpad() const { return m_scratchpad; } inline uint8_t *scratchpad() const { return m_scratchpad; }
inline std::pair<size_t, size_t> hugePages() const HugePagesInfo hugePages() const;
{
return { isHugePages() ? (align(size()) / 2097152) : 0, align(size()) / 2097152 };
}
static bool isHugepagesAvailable(); static bool isHugepagesAvailable();
static bool isOneGbPagesAvailable();
static uint32_t bindToNUMANode(int64_t affinity); static uint32_t bindToNUMANode(int64_t affinity);
static void *allocateExecutableMemory(size_t size); static void *allocateExecutableMemory(size_t size);
static void *allocateLargePagesMemory(size_t size); static void *allocateLargePagesMemory(size_t size);
static void *allocateOneGbPagesMemory(size_t size);
static void destroy(); static void destroy();
static void flushInstructionCache(void *p, size_t size); static void flushInstructionCache(void *p, size_t size);
static void freeLargePagesMemory(void *p, size_t size); static void freeLargePagesMemory(void *p, size_t size);
@ -73,6 +75,7 @@ public:
private: private:
enum Flags { enum Flags {
FLAG_HUGEPAGES, FLAG_HUGEPAGES,
FLAG_1GB_PAGES,
FLAG_LOCK, FLAG_LOCK,
FLAG_EXTERNAL, FLAG_EXTERNAL,
FLAG_MAX FLAG_MAX
@ -81,6 +84,7 @@ private:
static void osInit(bool hugePages); static void osInit(bool hugePages);
bool allocateLargePagesMemory(); bool allocateLargePagesMemory();
bool allocateOneGbPagesMemory();
void freeLargePagesMemory(); void freeLargePagesMemory();
const size_t m_size; const size_t m_size;

View file

@ -29,6 +29,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include "backend/cpu/Cpu.h"
#include "crypto/common/portable/mm_malloc.h" #include "crypto/common/portable/mm_malloc.h"
#include "crypto/common/VirtualMemory.h" #include "crypto/common/VirtualMemory.h"
@ -38,12 +39,30 @@
#endif #endif
#if defined(XMRIG_OS_LINUX)
# if (defined(MAP_HUGE_1GB) || defined(MAP_HUGE_SHIFT))
# define XMRIG_HAS_1GB_PAGES
# endif
# include "crypto/common/LinuxMemory.h"
#endif
bool xmrig::VirtualMemory::isHugepagesAvailable() bool xmrig::VirtualMemory::isHugepagesAvailable()
{ {
return true; return true;
} }
bool xmrig::VirtualMemory::isOneGbPagesAvailable()
{
# ifdef XMRIG_HAS_1GB_PAGES
return Cpu::info()->hasOneGbPages();
# else
return false;
# endif
}
void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size)
{ {
# if defined(__APPLE__) # if defined(__APPLE__)
@ -70,6 +89,28 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
} }
void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size)
{
# ifdef XMRIG_HAS_1GB_PAGES
if (isOneGbPagesAvailable()) {
# if defined(MAP_HUGE_1GB)
constexpr int flag_1gb = MAP_HUGE_1GB;
# elif defined(MAP_HUGE_SHIFT)
constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT);
# else
constexpr int flag_1gb = 0;
# endif
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0);
return mem == MAP_FAILED ? nullptr : mem;
}
# endif
return nullptr;
}
void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size) void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size)
{ {
# ifdef HAVE_BUILTIN_CLEAR_CACHE # ifdef HAVE_BUILTIN_CLEAR_CACHE
@ -103,6 +144,10 @@ void xmrig::VirtualMemory::osInit(bool)
bool xmrig::VirtualMemory::allocateLargePagesMemory() bool xmrig::VirtualMemory::allocateLargePagesMemory()
{ {
# if defined(XMRIG_OS_LINUX)
LinuxMemory::reserve(m_size, m_node);
# endif
m_scratchpad = static_cast<uint8_t*>(allocateLargePagesMemory(m_size)); m_scratchpad = static_cast<uint8_t*>(allocateLargePagesMemory(m_size));
if (m_scratchpad) { if (m_scratchpad) {
m_flags.set(FLAG_HUGEPAGES, true); m_flags.set(FLAG_HUGEPAGES, true);
@ -120,6 +165,29 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory()
} }
bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
{
# if defined(XMRIG_HAS_1GB_PAGES)
LinuxMemory::reserve(m_size, m_node, true);
# endif
m_scratchpad = static_cast<uint8_t*>(allocateOneGbPagesMemory(m_size));
if (m_scratchpad) {
m_flags.set(FLAG_1GB_PAGES, true);
madvise(m_scratchpad, m_size, MADV_RANDOM | MADV_WILLNEED);
if (mlock(m_scratchpad, m_size) == 0) {
m_flags.set(FLAG_LOCK, true);
}
return true;
}
return false;
}
void xmrig::VirtualMemory::freeLargePagesMemory() void xmrig::VirtualMemory::freeLargePagesMemory()
{ {
if (m_flags.test(FLAG_LOCK)) { if (m_flags.test(FLAG_LOCK)) {

View file

@ -156,6 +156,12 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
} }
bool xmrig::VirtualMemory::isOneGbPagesAvailable()
{
return false;
}
void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size)
{ {
return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
@ -175,6 +181,12 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
} }
void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t)
{
return nullptr;
}
void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size) void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size)
{ {
::FlushInstructionCache(GetCurrentProcess(), p, size); ::FlushInstructionCache(GetCurrentProcess(), p, size);
@ -221,6 +233,12 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory()
return false; return false;
} }
bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
{
m_scratchpad = nullptr;
return false;
}
void xmrig::VirtualMemory::freeLargePagesMemory() void xmrig::VirtualMemory::freeLargePagesMemory()
{ {

View file

@ -43,7 +43,7 @@ namespace randomx {
} }
template<size_t alignment> template<size_t alignment>
void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) { void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t) {
rx_aligned_free(ptr); rx_aligned_free(ptr);
} }

View file

@ -43,4 +43,9 @@ namespace randomx {
static void freeMemory(void*, size_t); static void freeMemory(void*, size_t);
}; };
struct OneGbPageAllocator {
static void* allocMemory(size_t);
static void freeMemory(void*, size_t);
};
} }

View file

@ -0,0 +1,18 @@
mov rcx, rbp ;# ecx = ma
shr rcx, 32
and ecx, RANDOMX_DATASET_BASE_MASK
xor rbp, rax ;# modify "mx"
mov rax, qword ptr [rdi+rcx]
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
xor r8, rax
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]

View file

@ -167,7 +167,5 @@ namespace randomx {
typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
typedef void(DatasetDeallocFunc)(randomx_dataset*);
typedef void(CacheDeallocFunc)(randomx_cache*);
typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t); typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
} }

View file

@ -38,13 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* Global scope for C binding */ /* Global scope for C binding */
struct randomx_dataset { struct randomx_dataset {
uint8_t* memory = nullptr; uint8_t* memory = nullptr;
randomx::DatasetDeallocFunc* dealloc;
}; };
/* Global scope for C binding */ /* Global scope for C binding */
struct randomx_cache { struct randomx_cache {
uint8_t* memory = nullptr; uint8_t* memory = nullptr;
randomx::CacheDeallocFunc* dealloc;
randomx::JitCompiler* jit; randomx::JitCompiler* jit;
randomx::CacheInitializeFunc* initialize; randomx::CacheInitializeFunc* initialize;
randomx::DatasetInitFunc* datasetInit; randomx::DatasetInitFunc* datasetInit;

View file

@ -118,7 +118,7 @@ static void clear_code_cache(char* p1, char* p2)
# endif # endif
} }
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config) void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t)
{ {
uint32_t codePos = MainLoopBegin + 4; uint32_t codePos = MainLoopBegin + 4;

View file

@ -49,7 +49,7 @@ namespace randomx {
JitCompilerA64(); JitCompilerA64();
~JitCompilerA64(); ~JitCompilerA64();
void generateProgram(Program&, ProgramConfiguration&); void generateProgram(Program&, ProgramConfiguration&, uint32_t);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N> template<size_t N>

View file

@ -44,7 +44,7 @@ namespace randomx {
JitCompilerFallback() { JitCompilerFallback() {
throw std::runtime_error("JIT compilation is not supported on this platform"); throw std::runtime_error("JIT compilation is not supported on this platform");
} }
void generateProgram(Program&, ProgramConfiguration&) { void generateProgram(Program&, ProgramConfiguration&, uint32_t) {
} }
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {

View file

@ -89,7 +89,6 @@ namespace randomx {
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
@ -105,7 +104,6 @@ namespace randomx {
const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad; const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad;
const int32_t prologueSize = codeLoopBegin - codePrologue; const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
@ -171,6 +169,7 @@ namespace randomx {
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e }; static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC }; static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
static const uint8_t AND_OR_MOV_LDMXCSR_RYZEN[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x3B, 0x44, 0x24, 0xFC, 0x74, 0x09, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
@ -301,10 +300,24 @@ namespace randomx {
freePagedMemory(allocatedCode, CodeSize); freePagedMemory(allocatedCode, CodeSize);
} }
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
vm_flags = flags;
generateProgramPrologue(prog, pcfg); generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize);
codePos += readDatasetSize; uint8_t* p;
uint32_t n;
if (flags & RANDOMX_FLAG_RYZEN) {
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
}
else {
p = RandomX_CurrentConfig.codeReadDatasetTweaked;
n = RandomX_CurrentConfig.codeReadDatasetTweakedSize;
}
memcpy(code + codePos, p, n);
codePos += n;
generateProgramEpilogue(prog, pcfg); generateProgramEpilogue(prog, pcfg);
} }
@ -1000,7 +1013,12 @@ namespace randomx {
emit(ROL_RAX, p, pos); emit(ROL_RAX, p, pos);
emitByte(rotate, p, pos); emitByte(rotate, p, pos);
} }
emit(AND_OR_MOV_LDMXCSR, p, pos); if (vm_flags & RANDOMX_FLAG_RYZEN) {
emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos);
}
else {
emit(AND_OR_MOV_LDMXCSR, p, pos);
}
codePos = pos; codePos = pos;
} }

View file

@ -49,7 +49,7 @@ namespace randomx {
public: public:
JitCompilerX86(); JitCompilerX86();
~JitCompilerX86(); ~JitCompilerX86();
void generateProgram(Program&, ProgramConfiguration&); void generateProgram(Program&, ProgramConfiguration&, uint32_t);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N> template<size_t N>
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &); void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
@ -70,6 +70,7 @@ namespace randomx {
uint8_t* allocatedCode; uint8_t* allocatedCode;
uint8_t* code; uint8_t* code;
int32_t codePos; int32_t codePos;
uint32_t vm_flags;
static bool BranchesWithin32B; static bool BranchesWithin32B;

View file

@ -45,6 +45,7 @@
.global DECL(randomx_program_loop_load) .global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start) .global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset) .global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_ryzen)
.global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_init)
.global DECL(randomx_program_read_dataset_sshash_fin) .global DECL(randomx_program_read_dataset_sshash_fin)
.global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_store)
@ -92,6 +93,7 @@ DECL(randomx_program_prologue_first_load):
and eax, RANDOMX_SCRATCHPAD_MASK and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32 ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK and edx, RANDOMX_SCRATCHPAD_MASK
stmxcsr dword ptr [rsp-20]
jmp DECL(randomx_program_loop_begin) jmp DECL(randomx_program_loop_begin)
.balign 64 .balign 64
@ -110,6 +112,9 @@ DECL(randomx_program_start):
DECL(randomx_program_read_dataset): DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc" #include "asm/program_read_dataset.inc"
DECL(randomx_program_read_dataset_ryzen):
#include "asm/program_read_dataset_ryzen.inc"
DECL(randomx_program_read_dataset_sshash_init): DECL(randomx_program_read_dataset_sshash_init):
#include "asm/program_read_dataset_sshash_init.inc" #include "asm/program_read_dataset_sshash_init.inc"

View file

@ -36,6 +36,7 @@ PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_ryzen
PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init PUBLIC randomx_dataset_init
@ -80,6 +81,7 @@ randomx_program_prologue_first_load PROC
and eax, RANDOMX_SCRATCHPAD_MASK and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32 ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK and edx, RANDOMX_SCRATCHPAD_MASK
stmxcsr dword ptr [rsp-20]
jmp randomx_program_loop_begin jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP randomx_program_prologue_first_load ENDP
@ -103,6 +105,10 @@ randomx_program_read_dataset PROC
include asm/program_read_dataset.inc include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP randomx_program_read_dataset ENDP
randomx_program_read_dataset_ryzen PROC
include asm/program_read_dataset_ryzen.inc
randomx_program_read_dataset_ryzen ENDP
randomx_program_read_dataset_sshash_init PROC randomx_program_read_dataset_sshash_init PROC
include asm/program_read_dataset_sshash_init.inc include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP randomx_program_read_dataset_sshash_init ENDP

View file

@ -37,6 +37,7 @@ extern "C" {
void randomx_program_loop_load(); void randomx_program_loop_load();
void randomx_program_start(); void randomx_program_start();
void randomx_program_read_dataset(); void randomx_program_read_dataset();
void randomx_program_read_dataset_ryzen();
void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_init();
void randomx_program_read_dataset_sshash_fin(); void randomx_program_read_dataset_sshash_fin();
void randomx_program_loop_store(); void randomx_program_loop_store();

View file

@ -157,8 +157,15 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
} }
{ {
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset; const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init; const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen;
memcpy(codeReadDatasetTweaked, a, b - a); memcpy(codeReadDatasetTweaked, a, b - a);
codeReadDatasetTweakedSize = b - a;
}
{
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen;
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
codeReadDatasetRyzenTweakedSize = b - a;
} }
{ {
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init; const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
@ -191,10 +198,11 @@ void RandomX_ConfigurationBase::Apply()
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1; *(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE; // Not needed right now because all variants use default dataset base size
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask; //const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask; //*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask;
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; //*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
@ -264,42 +272,24 @@ RandomX_ConfigurationBase RandomX_CurrentConfig;
extern "C" { extern "C" {
randomx_cache *randomx_alloc_cache(randomx_flags flags) { randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory) {
randomx_cache *cache = nullptr; randomx_cache *cache = nullptr;
try { try {
cache = new randomx_cache(); cache = new randomx_cache();
switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { switch (flags & RANDOMX_FLAG_JIT) {
case RANDOMX_FLAG_DEFAULT: case RANDOMX_FLAG_DEFAULT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>; cache->jit = nullptr;
cache->jit = nullptr; cache->initialize = &randomx::initCache;
cache->initialize = &randomx::initCache; cache->datasetInit = &randomx::initDataset;
cache->datasetInit = &randomx::initDataset; cache->memory = memory;
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
case RANDOMX_FLAG_JIT: case RANDOMX_FLAG_JIT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>; cache->jit = new randomx::JitCompiler();
cache->jit = new randomx::JitCompiler(); cache->initialize = &randomx::initCacheCompile;
cache->initialize = &randomx::initCacheCompile; cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->datasetInit = cache->jit->getDatasetInitFunc(); cache->memory = memory;
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
case RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = nullptr;
cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
break; break;
default: default:
@ -323,31 +313,12 @@ extern "C" {
} }
void randomx_release_cache(randomx_cache* cache) { void randomx_release_cache(randomx_cache* cache) {
assert(cache != nullptr);
cache->dealloc(cache);
delete cache; delete cache;
} }
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { randomx_dataset *randomx_create_dataset(uint8_t *memory) {
randomx_dataset *dataset = nullptr; auto dataset = new randomx_dataset();
dataset->memory = memory;
try {
dataset = new randomx_dataset();
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
else {
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
}
}
catch (std::exception &ex) {
if (dataset != nullptr) {
randomx_release_dataset(dataset);
dataset = nullptr;
}
}
return dataset; return dataset;
} }
@ -372,8 +343,6 @@ extern "C" {
} }
void randomx_release_dataset(randomx_dataset *dataset) { void randomx_release_dataset(randomx_dataset *dataset) {
assert(dataset != nullptr);
dataset->dealloc(dataset);
delete dataset; delete dataset;
} }
@ -431,6 +400,7 @@ extern "C" {
} }
vm->setScratchpad(scratchpad); vm->setScratchpad(scratchpad);
vm->setFlags(flags);
} }
catch (std::exception &ex) { catch (std::exception &ex) {
delete vm; delete vm;

View file

@ -48,6 +48,8 @@ enum randomx_flags {
RANDOMX_FLAG_HARD_AES = 2, RANDOMX_FLAG_HARD_AES = 2,
RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8, RANDOMX_FLAG_JIT = 8,
RANDOMX_FLAG_1GB_PAGES = 16,
RANDOMX_FLAG_RYZEN = 64,
}; };
@ -117,7 +119,10 @@ struct RandomX_ConfigurationBase
rx_vec_i128 fillAes4Rx4_Key[8]; rx_vec_i128 fillAes4Rx4_Key[8];
uint8_t codeShhPrefetchTweaked[20]; uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[64]; uint8_t codeReadDatasetTweaked[256];
uint32_t codeReadDatasetTweakedSize;
uint8_t codeReadDatasetRyzenTweaked[256];
uint32_t codeReadDatasetRyzenTweakedSize;
uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codePrefetchScratchpadTweaked[32]; uint8_t codePrefetchScratchpadTweaked[32];
@ -210,7 +215,7 @@ extern "C" {
* NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT * NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT
* is set and JIT compilation is not supported on the current platform. * is set and JIT compilation is not supported on the current platform.
*/ */
RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags); RANDOMX_EXPORT randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory);
/** /**
* Initializes the cache memory and SuperscalarHash using the provided key value. * Initializes the cache memory and SuperscalarHash using the provided key value.
@ -237,7 +242,7 @@ RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache);
* @return Pointer to an allocated randomx_dataset structure. * @return Pointer to an allocated randomx_dataset structure.
* NULL is returned if memory allocation fails. * NULL is returned if memory allocation fails.
*/ */
RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags); RANDOMX_EXPORT randomx_dataset *randomx_create_dataset(uint8_t *memory);
/** /**
* Gets the number of items contained in the dataset. * Gets the number of items contained in the dataset.

Some files were not shown because too many files have changed in this diff Show more