From 450b9ec19a77eaaf1332ebe8c0a5e231bdf2f245 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 3 Dec 2019 20:25:51 +0100 Subject: [PATCH 01/31] Update VirtualMemory_unix.cpp --- src/crypto/common/VirtualMemory_unix.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 3d099c761..ffa4b137d 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -58,12 +58,24 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) { + int flag_1gb = 0; + # if defined(__APPLE__) void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); # elif defined(__FreeBSD__) void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); # else - void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); + +# if defined(MAP_HUGE_1GB) + flag_1gb = (size > (1UL << 30)) ? MAP_HUGE_1GB : 0; +# elif defined(MAP_HUGE_SHIFT) + flag_1gb = (size > (1UL << 30)) ? (30 << MAP_HUGE_SHIFT) : 0; +# endif + + void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0); + if (mem == MAP_FAILED) { + mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); + } # endif return mem == MAP_FAILED ? nullptr : mem; From 99d995fdabccd575e619704251b873e1bcec26f4 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 5 Dec 2019 12:16:05 +0700 Subject: [PATCH 02/31] v5.1.2-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index 086378ebc..2cb9fef43 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "5.1.1" +#define APP_VERSION "5.1.2-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 5 #define APP_VER_MINOR 1 -#define APP_VER_PATCH 1 +#define APP_VER_PATCH 2 #ifdef _MSC_VER # if (_MSC_VER >= 1920) From a066f9a49c8ad4dd25a4fd369341249d35250aab Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 5 Dec 2019 12:47:31 +0700 Subject: [PATCH 03/31] hwloc for MSVC updated to v2.1.0. --- cmake/flags.cmake | 4 +- src/3rdparty/hwloc/AUTHORS | 1 + src/3rdparty/hwloc/CMakeLists.txt | 2 +- src/3rdparty/hwloc/NEWS | 153 ++- src/3rdparty/hwloc/VERSION | 8 +- src/3rdparty/hwloc/include/hwloc.h | 188 ++- .../hwloc/include/hwloc/autogen/config.h | 6 +- src/3rdparty/hwloc/include/hwloc/bitmap.h | 29 +- src/3rdparty/hwloc/include/hwloc/cuda.h | 8 +- src/3rdparty/hwloc/include/hwloc/cudart.h | 8 +- src/3rdparty/hwloc/include/hwloc/deprecated.h | 4 +- src/3rdparty/hwloc/include/hwloc/distances.h | 53 +- src/3rdparty/hwloc/include/hwloc/gl.h | 2 +- .../hwloc/include/hwloc/glibc-sched.h | 5 +- src/3rdparty/hwloc/include/hwloc/helper.h | 32 +- src/3rdparty/hwloc/include/hwloc/intel-mic.h | 10 +- .../hwloc/include/hwloc/linux-libnuma.h | 3 +- src/3rdparty/hwloc/include/hwloc/linux.h | 3 +- src/3rdparty/hwloc/include/hwloc/nvml.h | 8 +- src/3rdparty/hwloc/include/hwloc/opencl.h | 119 +- .../hwloc/include/hwloc/openfabrics-verbs.h | 6 +- src/3rdparty/hwloc/include/hwloc/plugins.h | 170 ++- src/3rdparty/hwloc/include/hwloc/rename.h | 66 +- src/3rdparty/hwloc/include/hwloc/shmem.h | 2 +- .../hwloc/include/private/components.h | 12 +- src/3rdparty/hwloc/include/private/debug.h | 4 +- .../include/private/internal-components.h | 3 +- src/3rdparty/hwloc/include/private/misc.h | 23 +- src/3rdparty/hwloc/include/private/private.h | 101 +- src/3rdparty/hwloc/include/private/xml.h | 5 +- src/3rdparty/hwloc/src/base64.c | 2 +- src/3rdparty/hwloc/src/bind.c | 13 +- src/3rdparty/hwloc/src/bitmap.c | 55 +- src/3rdparty/hwloc/src/components.c | 370 ++++-- src/3rdparty/hwloc/src/diff.c | 15 +- src/3rdparty/hwloc/src/distances.c | 369 ++++-- src/3rdparty/hwloc/src/misc.c | 25 +- src/3rdparty/hwloc/src/pci-common.c | 401 ++++--- src/3rdparty/hwloc/src/shmem.c | 17 +- src/3rdparty/hwloc/src/topology-noos.c | 30 +- src/3rdparty/hwloc/src/topology-synthetic.c | 84 +- src/3rdparty/hwloc/src/topology-windows.c | 34 +- src/3rdparty/hwloc/src/topology-x86.c | 905 +++++++++------ .../hwloc/src/topology-xml-nolibxml.c | 48 +- src/3rdparty/hwloc/src/topology-xml.c | 470 ++++++-- src/3rdparty/hwloc/src/topology.c | 1026 ++++++++++++----- src/3rdparty/hwloc/src/traversal.c | 131 ++- 47 files changed, 3481 insertions(+), 1552 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e9533eed5..d2bc70d05 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -57,9 +57,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU) add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE) elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") + set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(/D_CRT_NONSTDC_NO_WARNINGS) add_definitions(/DNOMINMAX) diff --git a/src/3rdparty/hwloc/AUTHORS b/src/3rdparty/hwloc/AUTHORS index 7187a723d..b4809d159 100644 --- a/src/3rdparty/hwloc/AUTHORS +++ b/src/3rdparty/hwloc/AUTHORS @@ -21,6 +21,7 @@ Nathalie Furmento CNRS Bryon Gloden Brice Goglin Inria Gilles Gouaillardet RIST +Valentin Hoyet Inria Joshua Hursey UWL Alexey Kardashevskiy IBM Rob Latham ANL diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt index 431c11eb3..0e56b6fcd 100644 --- a/src/3rdparty/hwloc/CMakeLists.txt +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(include) include_directories(src) add_definitions(/D_CRT_SECURE_NO_WARNINGS) -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT") +set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Ob2 /DNDEBUG") set(HEADERS include/hwloc.h diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 664c8d55c..99809e6a3 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -13,8 +13,96 @@ $HEADER$ This file contains the main features as well as overviews of specific bug fixes (and other actions) for each version of hwloc since version -0.9 (as initially released as "libtopology", then re-branded to "hwloc" -in v0.9.1). +0.9. + + +Version 2.1.0 +------------- +* API + + Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors + with multiple dies per package, in the x86 and Linux backends. + + Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches. + - They are filtered-out by default, except in command-line tools. + - They are only available on very recent platforms running Linux 5.2+ + and uptodate ACPI tables. + - The KNL MCDRAM in cache mode is still exposed as a L3 unless + HWLOC_KNL_MSCACHE_L3=0 in the environment. + + Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting + topologies based on some memory nodes. + + Add hwloc_topology_set_components() for blacklisting some components + from being enabled in a topology. + + Add hwloc_bitmap_nr_ulongs() and hwloc_bitmap_from/to_ulongs(), + thanks to Junchao Zhang for the suggestion. + + Improve the API for dealing with disallowed resources + - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED + and --whole-system command-line options with --disallowed. + . Former names are still accepted for backward compatibility. + - Add hwloc_topology_allow() for changing allowed sets after load(). + - Add the HWLOC_ALLOW=all environment variable to totally ignore + administrative restrictions such as Linux Cgroups. + - Add disallowed_pu and disallowed_numa bits to the discovery support + structure. + + Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + Add more distances-related features: + - Add hwloc_distances_get_name() to retrieve a string describing + what a distances structure contain. + - Add hwloc_distances_get_by_name() to retrieve distances structures + based on their name. + - Add hwloc_distances_release_remove() + - Distances may now cover objects of different types with new kind + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES. +* Backends + + Add support for Linux 5.3 new sysfs cpu topology files with Die information. + + Add support for Intel v2 Extended Topology Enumeration in the x86 backend. + + Improve memory locality on Linux by using HMAT initiators (exposed + since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes. + + The x86 backend now properly handles offline CPUs. + + Detect the locality of NVIDIA GPU OpenCL devices. + + Ignore NUMA nodes that correspond to NVIDIA GPU by default. + - They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment. + - Fix their CPU locality and add info attributes to identify them. + Thanks to Max Katz and Edgar Leon for the help. + + Add support for IBM S/390 drawers. + + Rework the heuristics for discovering KNL Cluster and Memory modes + to stop assuming all CPUs are online (required for mOS support). + Thanks to Sharath K Bhat for testing patches. + + Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. + + Expose Linux DAX devices as hwloc Block OS devices. + + Remove support for /proc/cpuinfo-only topology discovery in Linux + kernel prior to 2.6.16. + + Disable POWER device-tree-based topology on Linux by default. + - It may be reenabled by setting HWLOC_USE_DT=1 in the environment. + + Discovery components are now divided in phases that may be individually + blacklisted. + - The linuxio component has been merged back into the linux component. +* Tools + + lstopo + - lstopo factorizes objects by default in the graphical output when + there are more than 4 identical children. + . New options --no-factorize and --factorize may be used to configure this. + . Hit the 'f' key to disable factorizing in interactive outputs. + - Both logical and OS/physical indexes are now displayed by default + for PU and NUMA nodes. + - The X11 and Windows interactive outputs support many keyboard + shortcuts to dynamically customize the attributes, legend, etc. + - Add --linespacing and change default margins and linespacing. + - Add --allow for changing allowed sets. + - Add a native SVG backend. Its graphical output may be slightly less + pretty than Cairo (still used by default if available) but the SVG + code provides attributes to manipulate objects from HTML/JS. + See dynamic_SVG_example.html for an example. + + Add --nodeset options to hwloc-calc for converting between cpusets and + nodesets. + + Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple + PU in SMT cores. + + hwloc-annotate may annotate multiple locations at once. + + Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README. + + Add bash completions. +* Misc + + Add several FAQ entries in "Compatibility between hwloc versions" + about API version, ABI, XML, Synthetic strings, and shmem topologies. Version 2.0.4 (also included in 1.11.13 when appropriate) @@ -214,6 +302,54 @@ Version 2.0.0 + hwloc now requires a C99 compliant compiler. +Version 1.11.13 (also included in 2.0.4) +--------------- +* Add support for Linux 5.3 new sysfs cpu topology files with Die information. +* Add support for Intel v2 Extended Topology Enumeration in the x86 backend. +* Tiles, Modules and Dies are exposed as Groups for now. + + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent + Die groups from being automatically merged with identical parent or children. +* Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. +* Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + +Version 1.11.12 (also included in 2.0.3) +--------------- +* Fix a corner case of hwloc_topology_restrict() where children would + become out-of-order. +* Fix the return length of export_xmlbuffer() functions to always + include the ending \0. + + +Version 1.11.11 (also included in 2.0.2) +--------------- +* Add support for Hygon Dhyana processors in the x86 backend, + thanks to Pu Wen for the patch. +* Fix symbol renaming to also rename internal components, + thanks to Evan Ramos for the patch. +* Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues. +* Detect PCI link speed without being root on Linux >= 4.13. + + +Version 1.11.10 (also included in 2.0.1) +--------------- +* Fix detection of cores and hyperthreads on Mac OS X. +* Serialize pciaccess discovery to fix concurrent topology loads in + multiple threads. +* Fix first touch area memory binding on Linux when thread memory + binding is different. +* Some minor fixes to memory binding. +* Fix hwloc-dump-hwdata to only process SMBIOS information that correspond + to the KNL and KNM configuration. +* Add a heuristic for guessing KNL/KNM memory and cluster modes when + hwloc-dump-hwdata could not run as root earlier. +* Fix discovery of NVMe OS devices on Linux >= 4.0. +* Add get_area_memlocation() on Windows. +* Add CPUVendor, Model, ... attributes on Mac OS X. + + Version 1.11.9 -------------- * Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend, @@ -941,7 +1077,7 @@ Version 1.6.0 + Add a section about Synthetic topologies in the documentation. -Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) +Version 1.5.2 (some of these changes are in 1.6.2 but not in 1.6) ------------- * Use libpciaccess instead of pciutils/libpci by default for I/O discovery. pciutils/libpci is only used if --enable-libpci is given to configure @@ -1076,9 +1212,8 @@ Version 1.4.2 for most of them. -Version 1.4.1 +Version 1.4.1 (contains all 1.3.2 changes) ------------- -* This release contains all changes from v1.3.2. * Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. * Fix memory leaks in some get_membind() functions. * Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) @@ -1091,7 +1226,7 @@ Version 1.4.1 issues. -Version 1.4.0 (does not contain all v1.3.2 changes) +Version 1.4.0 (does not contain all 1.3.2 changes) ------------- * Major features + Add "custom" interface and "assembler" tools to build multi-node @@ -1536,7 +1671,7 @@ Version 1.0.0 Version 0.9.4 (unreleased) --------------------------- +------------- * Fix reseting colors to normal in lstopo -.txt output. * Fix Linux pthread_t binding error report. @@ -1593,7 +1728,7 @@ Version 0.9.1 the physical location of IB devices. -Version 0.9 (libtopology) -------------------------- +Version 0.9 (formerly named "libtopology") +----------- * First release. diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index 5ebc6bb47..9035ed9cc 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,8 +8,8 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=0 -release=4 +minor=1 +release=0 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Jun 03, 2019" +date="Sep 30, 2019" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=15:3:0 +libhwloc_so_version=16:0:1 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index ee6da6fd1..e106e9cc0 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -53,7 +53,8 @@ #ifndef HWLOC_H #define HWLOC_H -#include +#include "hwloc/autogen/config.h" + #include #include #include @@ -62,13 +63,13 @@ /* * Symbol transforms */ -#include +#include "hwloc/rename.h" /* * Bitmap definitions */ -#include +#include "hwloc/bitmap.h" #ifdef __cplusplus @@ -86,13 +87,13 @@ extern "C" { * actually modifies the API. * * Users may check for available features at build time using this number - * (see \ref faq_upgrade). + * (see \ref faq_version_api). * * \note This should not be confused with HWLOC_VERSION, the library version. * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020000 +#define HWLOC_API_VERSION 0x00020100 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -101,7 +102,7 @@ extern "C" { HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); /** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ -#define HWLOC_COMPONENT_ABI 5 +#define HWLOC_COMPONENT_ABI 6 /** @} */ @@ -186,7 +187,8 @@ typedef enum { HWLOC_OBJ_PACKAGE, /**< \brief Physical package. * The physical package that usually gets inserted * into a socket on the motherboard. - * A processor package usually contains multiple cores. + * A processor package usually contains multiple cores, + * and possibly some dies. */ HWLOC_OBJ_CORE, /**< \brief Core. * A computation unit (may be shared by several @@ -233,6 +235,10 @@ typedef enum { * It is usually close to some cores (the corresponding objects * are descendants of the NUMA node object in the hwloc tree). * + * This is the smallest object representing Memory resources, + * it cannot have any child except Misc objects. + * However it may have Memory-side cache parents. + * * There is always at least one such object in the topology * even if the machine is not NUMA. * @@ -279,6 +285,24 @@ typedef enum { * Misc objects have NULL CPU and node sets. */ + HWLOC_OBJ_MEMCACHE, /**< \brief Memory-side cache (filtered out by default). + * A cache in front of a specific NUMA node. + * + * This object always has at least one NUMA node as a memory child. + * + * Memory objects are not listed in the main children list, + * but rather in the dedicated Memory children list. + * + * Memory-side cache have a special depth ::HWLOC_TYPE_DEPTH_MEMCACHE + * instead of a normal depth just like other objects in the + * main tree. + */ + + HWLOC_OBJ_DIE, /**< \brief Die within a physical package. + * A subpart of the physical package, that contains multiple cores. + * \hideinitializer + */ + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ } hwloc_obj_type_t; @@ -297,8 +321,8 @@ typedef enum hwloc_obj_bridge_type_e { /** \brief Type of a OS device. */ typedef enum hwloc_obj_osdev_type_e { - HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. - * For instance "sda" on Linux. */ + HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device, or non-volatile memory device. + * For instance "sda" or "dax2.0" on Linux. */ HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. * For instance ":0.0" for a GL display, * "card0" for a Linux DRM device. */ @@ -434,9 +458,15 @@ struct hwloc_obj { * These children are listed in \p memory_first_child. */ struct hwloc_obj *memory_first_child; /**< \brief First Memory child. - * NUMA nodes are listed here (\p memory_arity and \p memory_first_child) + * NUMA nodes and Memory-side caches are listed here + * (\p memory_arity and \p memory_first_child) * instead of in the normal children list. * See also hwloc_obj_type_is_memory(). + * + * A memory hierarchy starts from a normal CPU-side object + * (e.g. Package) and ends with NUMA nodes as leaves. + * There might exist some memory-side caches between them + * in the middle of the memory subtree. */ /**@}*/ @@ -471,7 +501,7 @@ struct hwloc_obj { * object and known how (the children path between this object and the PU * objects). * - * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these CPUs may not be allowed for binding, * see hwloc_topology_get_allowed_cpuset(). * @@ -483,7 +513,7 @@ struct hwloc_obj { * * This may include not only the same as the cpuset field, but also some CPUs for * which topology information is unknown or incomplete, some offlines CPUs, and - * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag + * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED flag * is not set. * Thus no corresponding PU object may be found in the topology, because the * precise position is undefined. It is however known that it would be somewhere @@ -501,7 +531,7 @@ struct hwloc_obj { * * In the end, these nodes are those that are close to the current object. * - * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these nodes may not be allowed for allocation, * see hwloc_topology_get_allowed_nodeset(). * @@ -516,7 +546,7 @@ struct hwloc_obj { * * This may include not only the same as the nodeset field, but also some NUMA * nodes for which topology information is unknown or incomplete, some offlines - * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM + * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED * flag is not set. * Thus no corresponding NUMA node object may be found in the topology, because the * precise position is undefined. It is however known that it would be @@ -770,7 +800,8 @@ enum hwloc_get_type_depth_e { HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */ HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */ - HWLOC_TYPE_DEPTH_MISC = -7 /**< \brief Virtual depth for Misc object. \hideinitializer */ + HWLOC_TYPE_DEPTH_MISC = -7, /**< \brief Virtual depth for Misc object. \hideinitializer */ + HWLOC_TYPE_DEPTH_MEMCACHE = -8 /**< \brief Virtual depth for MemCache object. \hideinitializer */ }; /** \brief Return the depth of parents where memory objects are attached. @@ -1781,6 +1812,31 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo */ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); +/** \brief Flags to be passed to hwloc_topology_set_components() + */ +enum hwloc_topology_components_flag_e { + /** \brief Blacklist the target component from being used. + * \hideinitializer + */ + HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST = (1UL<<0) +}; + +/** \brief Prevent a discovery component from being used for a topology. + * + * \p name is the name of the discovery component that should not be used + * when loading topology \p topology. The name is a string such as "cuda". + * + * For components with multiple phases, it may also be suffixed with the name + * of a phase, for instance "linux:io". + * + * \p flags should be ::HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST. + * + * This may be used to avoid expensive parts of the discovery process. + * For instance, CUDA-specific discovery may be expensive and unneeded + * while generic I/O discovery could still be useful. + */ +HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restrict topology, unsigned long flags, const char * __hwloc_restrict name); + /** @} */ @@ -1800,28 +1856,27 @@ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restric * They may also be returned by hwloc_topology_get_flags(). */ enum hwloc_topology_flags_e { - /** \brief Detect the whole system, ignore reservations. + /** \brief Detect the whole system, ignore reservations, include disallowed objects. * * Gather all resources, even if some were disabled by the administrator. * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes. * * When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology. * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. + * All existing PUs and NUMA nodes in the topology are allowed. + * hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset() + * are equal to the root object cpuset and nodeset. * * When this flag is set, the actual sets of allowed PUs and NUMA nodes are given * by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset(). * They may be smaller than the root object cpuset and nodeset. * - * When this flag is not set, all existing PUs and NUMA nodes in the topology - * are allowed. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset() - * are equal to the root object cpuset and nodeset. - * * If the current topology is exported to XML and reimported later, this flag * should be set again in the reimported topology so that disallowed resources * are reimported as well. * \hideinitializer */ - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), + HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED = (1UL<<0), /** \brief Assume that the selected backend provides the topology for the * system on which we are running. @@ -1901,6 +1956,10 @@ struct hwloc_topology_discovery_support { unsigned char numa; /** \brief Detecting the amount of memory in NUMA nodes is supported. */ unsigned char numa_memory; + /** \brief Detecting and identifying PU objects that are not available to the current process is supported. */ + unsigned char disallowed_pu; + /** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */ + unsigned char disallowed_numa; }; /** \brief Flags describing actual PU binding support for this topology. @@ -1998,7 +2057,7 @@ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(h * * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). - * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). * * Note that group objects are also ignored individually (without the entire level) * when they do not bring structure. @@ -2063,11 +2122,15 @@ HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwl */ HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); -/** \brief Set the filtering for all cache object types. +/** \brief Set the filtering for all CPU cache object types. + * + * Memory-side caches are not involved since they are not CPU caches. */ HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); -/** \brief Set the filtering for all instruction cache object types. +/** \brief Set the filtering for all CPU instruction cache object types. + * + * Memory-side caches are not involved since they are not CPU caches. */ HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); @@ -2110,6 +2173,19 @@ enum hwloc_restrict_flags_e { */ HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0), + /** \brief Restrict by nodeset instead of CPU set. + * Only keep objects whose nodeset is included or partially included in the given set. + * This flag may not be used with ::HWLOC_RESTRICT_FLAG_BYNODESET. + */ + HWLOC_RESTRICT_FLAG_BYNODESET = (1UL<<3), + + /** \brief Remove all objects that became Memory-less. + * By default, only objects that contain no PU and no memory are removed. + * This flag may only be used with ::HWLOC_RESTRICT_FLAG_BYNODESET. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS = (1UL<<4), + /** \brief Move Misc objects to ancestors if their parents are removed during restriction. * If this flag is not set, Misc objects are removed when their parents are removed. * \hideinitializer @@ -2123,28 +2199,70 @@ enum hwloc_restrict_flags_e { HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2) }; -/** \brief Restrict the topology to the given CPU set. +/** \brief Restrict the topology to the given CPU set or nodeset. * * Topology \p topology is modified so as to remove all objects that - * are not included (or partially included) in the CPU set \p cpuset. + * are not included (or partially included) in the CPU set \p set. * All objects CPU and node sets are restricted accordingly. * + * If ::HWLOC_RESTRICT_FLAG_BYNODESET is passed in \p flags, + * \p set is considered a nodeset instead of a CPU set. + * * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. * * \note This call may not be reverted by restricting back to a larger - * cpuset. Once dropped during restriction, objects may not be brought + * set. Once dropped during restriction, objects may not be brought * back, except by loading another topology with hwloc_topology_load(). * * \return 0 on success. * - * \return -1 with errno set to EINVAL if the input cpuset is invalid. + * \return -1 with errno set to EINVAL if the input set is invalid. * The topology is not modified in this case. * * \return -1 with errno set to ENOMEM on failure to allocate internal data. * The topology is reinitialized in this case. It should be either * destroyed with hwloc_topology_destroy() or configured and loaded again. */ -HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); +HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_bitmap_t set, unsigned long flags); + +/** \brief Flags to be given to hwloc_topology_allow(). */ +enum hwloc_allow_flags_e { + /** \brief Mark all objects as allowed in the topology. + * + * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_ALL = (1UL<<0), + + /** \brief Only allow objects that are available to the current process. + * + * The topology must have ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM so that the set + * of available resources can actually be retrieved from the operating system. + * + * \p cpuset and \p nođeset given to hwloc_topology_allow() must be \c NULL. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS = (1UL<<1), + + /** \brief Allow a custom set of objects, given to hwloc_topology_allow() as \p cpuset and/or \p nodeset parameters. + * \hideinitializer */ + HWLOC_ALLOW_FLAG_CUSTOM = (1UL<<2) +}; + +/** \brief Change the sets of allowed PUs and NUMA nodes in the topology. + * + * This function only works if the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED + * was set on the topology. It does not modify any object, it only changes + * the sets returned by hwloc_topology_get_allowed_cpuset() and + * hwloc_topology_get_allowed_nodeset(). + * + * It is notably useful when importing a topology from another process + * running in a different Linux Cgroup. + * + * \p flags must be set to one flag among ::hwloc_allow_flags_e. + * + * \note Removing objects from a topology should rather be performed with + * hwloc_topology_restrict(). + */ +HWLOC_DECLSPEC int hwloc_topology_allow(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, unsigned long flags); /** \brief Add a MISC object as a leaf of the topology * @@ -2250,21 +2368,21 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src /* high-level helpers */ -#include +#include "hwloc/helper.h" /* inline code of some functions above */ -#include +#include "hwloc/inlines.h" /* exporting to XML or synthetic */ -#include +#include "hwloc/export.h" /* distances */ -#include +#include "hwloc/distances.h" /* topology diffs */ -#include +#include "hwloc/diff.h" /* deprecated headers */ -#include +#include "hwloc/deprecated.h" #endif /* HWLOC_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 14d4481d2..36669de55 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.0.4" +#define HWLOC_VERSION "2.1.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 0 -#define HWLOC_VERSION_RELEASE 4 +#define HWLOC_VERSION_MINOR 1 +#define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index bae623c8c..d5b0ea020 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -13,7 +13,8 @@ #ifndef HWLOC_BITMAP_H #define HWLOC_BITMAP_H -#include +#include "hwloc/autogen/config.h" + #include @@ -198,6 +199,9 @@ HWLOC_DECLSPEC int hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long /** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */ HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); +/** \brief Setup bitmap \p bitmap from unsigned longs \p masks used as first \p nr subsets */ +HWLOC_DECLSPEC int hwloc_bitmap_from_ulongs(hwloc_bitmap_t bitmap, unsigned nr, const unsigned long *masks); + /* * Modifying bitmaps. @@ -256,6 +260,29 @@ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) /** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */ HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure; +/** \brief Convert the first \p nr subsets of bitmap \p bitmap into the array of \p nr unsigned long \p masks + * + * \p nr may be determined earlier with hwloc_bitmap_nr_ulongs(). + * + * \return 0 + */ +HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned nr, unsigned long *masks); + +/** \brief Return the number of unsigned longs required for storing bitmap \p bitmap entirely + * + * This is the number of contiguous unsigned longs from the very first bit of the bitmap + * (even if unset) up to the last set bit. + * This is useful for knowing the \p nr parameter to pass to hwloc_bitmap_to_ulongs() + * (or which calls to hwloc_bitmap_to_ith_ulong() are needed) + * to entirely convert a bitmap into multiple unsigned longs. + * + * When called on the output of hwloc_topology_get_topology_cpuset(), + * the returned number is large enough for all cpusets of the topology. + * + * \return -1 if \p bitmap is infinite. + */ +HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + /** \brief Test whether index \p id is part of bitmap \p bitmap. * * \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise. diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 77c8473e6..6f0cda4cd 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -16,11 +16,11 @@ #ifndef HWLOC_CUDA_H #define HWLOC_CUDA_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 63c7f59c6..688b8421e 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -16,11 +16,11 @@ #ifndef HWLOC_CUDART_H #define HWLOC_CUDART_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include /* for CUDA_VERSION */ diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h index 8f3b1459a..4a231f507 100644 --- a/src/3rdparty/hwloc/include/hwloc/deprecated.h +++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -21,6 +21,8 @@ extern "C" { #endif +/* backward compat with v2.0 before WHOLE_SYSTEM renaming */ +#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED /* backward compat with v1.11 before System removal */ #define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE /* backward compat with v1.10 before Socket->Package renaming */ diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index d523f29fc..b7baed8a4 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -87,7 +87,12 @@ enum hwloc_distances_kind_e { * Such values are currently ignored for distance-based grouping. * \hideinitializer */ - HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3) + HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), + + /** \brief This distances structure covers objects of different types. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) }; /** \brief Retrieve distance matrices. @@ -131,20 +136,32 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, * * Identical to hwloc_distances_get() with the additional \p type filter. */ -static __hwloc_inline int +HWLOC_DECLSPEC int hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned *nr, struct hwloc_distances_s **distances, - unsigned long kind, unsigned long flags) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) { - *nr = 0; - return 0; - } - return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags); -} + unsigned long kind, unsigned long flags); -/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */ +/** \brief Retrieve a distance matrix with the given name. + * + * Usually only one distances structure may match a given name. + */ +HWLOC_DECLSPEC int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long flags); + +/** \brief Get a description of what a distances structure contains. + * + * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), + * or NULL if unknown. + */ +HWLOC_DECLSPEC const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); + +/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). + * + * \note This function is not required if the structure is removed with hwloc_distances_release_remove(). + */ HWLOC_DECLSPEC void hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); @@ -221,11 +238,11 @@ enum hwloc_distances_add_flag_e { * The distance from object i to object j is in slot i*nbobjs+j. * * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added + * if objects of different types are given. * * \p flags configures the behavior of the function using an optional OR'ed set of * ::hwloc_distances_add_flag_e. - * - * Objects must be of the same type. They cannot be of type Group. */ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, @@ -237,7 +254,7 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, * gathered through the OS. * * If these distances were used to group objects, these additional - *Group objects are not removed from the topology. + * Group objects are not removed from the topology. */ HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); @@ -260,6 +277,12 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) return hwloc_distances_remove_by_depth(topology, depth); } +/** \brief Release and remove the given distance matrice from the topology. + * + * This function includes a call to hwloc_distances_release(). + */ +HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h index 3e643fa9a..897ef784b 100644 --- a/src/3rdparty/hwloc/include/hwloc/gl.h +++ b/src/3rdparty/hwloc/include/hwloc/gl.h @@ -14,7 +14,7 @@ #ifndef HWLOC_GL_H #define HWLOC_GL_H -#include +#include "hwloc.h" #include #include diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h index 1f9ba7cdd..99659e03c 100644 --- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -17,8 +17,9 @@ #ifndef HWLOC_GLIBC_SCHED_H #define HWLOC_GLIBC_SCHED_H -#include -#include +#include "hwloc.h" +#include "hwloc/helper.h" + #include #if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index d48df15f3..bc27be591 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -527,30 +527,36 @@ hwloc_obj_type_is_io(hwloc_obj_type_t type); * * Memory objects are objects attached to their parents * in the Memory children list. - * This current only includes NUMA nodes. + * This current includes NUMA nodes and Memory-side caches. * * \return 1 if an object of type \p type is a Memory object, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_memory(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Cache (Data, Unified or Instruction). +/** \brief Check whether an object type is a CPU Cache (Data, Unified or Instruction). + * + * Memory-side caches are not CPU caches. * * \return 1 if an object of type \p type is a Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_cache(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Data or Unified Cache. +/** \brief Check whether an object type is a CPU Data or Unified Cache. * - * \return 1 if an object of type \p type is a Data or Unified Cache, 0 otherwise. + * Memory-side caches are not CPU caches. + * + * \return 1 if an object of type \p type is a CPU Data or Unified Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_dcache(hwloc_obj_type_t type); -/** \brief Check whether an object type is a Instruction Cache, +/** \brief Check whether an object type is a CPU Instruction Cache, * - * \return 1 if an object of type \p type is a Instruction Cache, 0 otherwise. + * Memory-side caches are not CPU caches. + * + * \return 1 if an object of type \p type is a CPU Instruction Cache, 0 otherwise. */ HWLOC_DECLSPEC int hwloc_obj_type_is_icache(hwloc_obj_type_t type); @@ -914,7 +920,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object CPU-set. */ HWLOC_DECLSPEC hwloc_const_cpuset_t hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -923,11 +929,11 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * * \return the CPU set of allowed logical processors of the system. * - * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_cpuset(), which means * all PUs are allowed. * - * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying * hwloc_bitmap_intersects() on the result of this function and on an object * cpuset checks whether there are allowed PUs inside that object. * Applying hwloc_bitmap_and() returns the list of these allowed PUs. @@ -945,7 +951,7 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object complete nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -959,7 +965,7 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -968,11 +974,11 @@ hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute * * \return the node set of allowed memory of the system. * - * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_nodeset(), which means * all NUMA nodes are allowed. * - * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * \note If ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was set, applying * hwloc_bitmap_intersects() on the result of this function and on an object * nodeset checks whether there are allowed NUMA nodes inside that object. * Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes. diff --git a/src/3rdparty/hwloc/include/hwloc/intel-mic.h b/src/3rdparty/hwloc/include/hwloc/intel-mic.h index 6f6f9d1b3..c504cd7e0 100644 --- a/src/3rdparty/hwloc/include/hwloc/intel-mic.h +++ b/src/3rdparty/hwloc/include/hwloc/intel-mic.h @@ -13,11 +13,13 @@ #ifndef HWLOC_INTEL_MIC_H #define HWLOC_INTEL_MIC_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" + #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" + #include #include #endif diff --git a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h index 7cea4166b..0e2cc19f7 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h +++ b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h @@ -15,7 +15,8 @@ #ifndef HWLOC_LINUX_LIBNUMA_H #define HWLOC_LINUX_LIBNUMA_H -#include +#include "hwloc.h" + #include diff --git a/src/3rdparty/hwloc/include/hwloc/linux.h b/src/3rdparty/hwloc/include/hwloc/linux.h index c409e1c2a..ecc86be3d 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux.h +++ b/src/3rdparty/hwloc/include/hwloc/linux.h @@ -15,7 +15,8 @@ #ifndef HWLOC_LINUX_H #define HWLOC_LINUX_H -#include +#include "hwloc.h" + #include diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 197108660..1bc2599f6 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -13,11 +13,11 @@ #ifndef HWLOC_NVML_H #define HWLOC_NVML_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 058968d74..ebf09848f 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -14,19 +14,17 @@ #ifndef HWLOC_OPENCL_H #define HWLOC_OPENCL_H -#include -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #ifdef __APPLE__ #include -#include #else #include -#include #endif #include @@ -37,17 +35,75 @@ extern "C" { #endif +/* OpenCL extensions aren't always shipped with default headers, and + * they don't always reflect what the installed implementations support. + * Try everything and let the implementation return errors when non supported. + */ +/* Copyright (c) 2008-2018 The Khronos Group Inc. */ + +/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ +#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037 +typedef union { + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} hwloc_cl_device_topology_amd; +#define HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +/* needs "cl_nv_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ +#define HWLOC_CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define HWLOC_CL_DEVICE_PCI_SLOT_ID_NV 0x4009 + + /** \defgroup hwlocality_opencl Interoperability with OpenCL * * This interface offers ways to retrieve topology information about * OpenCL devices. * - * Only the AMD OpenCL interface currently offers useful locality information - * about its devices. + * Only AMD and NVIDIA OpenCL implementations currently offer useful locality + * information about their devices. * * @{ */ +/** \brief Return the domain, bus and device IDs of the OpenCL device \p device. + * + * Device \p device must match the local machine. + */ +static __hwloc_inline int +hwloc_opencl_get_device_pci_busid(cl_device_id device, + unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func) +{ + hwloc_cl_device_topology_amd amdtopo; + cl_uint nvbus, nvslot; + cl_int clret; + + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS == clret + && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) { + *domain = 0; /* can't do anything better */ + *bus = (unsigned) amdtopo.pcie.bus; + *dev = (unsigned) amdtopo.pcie.device; + *func = (unsigned) amdtopo.pcie.function; + return 0; + } + + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_ID_NV, sizeof(nvbus), &nvbus, NULL); + if (CL_SUCCESS == clret) { + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_SLOT_ID_NV, sizeof(nvslot), &nvslot, NULL); + if (CL_SUCCESS == clret) { + /* FIXME: PCI bus only uses 8bit, assume nvidia hardcodes the domain in higher bits */ + *domain = nvbus >> 8; + *bus = nvbus & 0xff; + /* non-documented but used in many other projects */ + *dev = nvslot >> 3; + *func = nvslot & 0x7; + return 0; + } + } + + return -1; +} + /** \brief Get the CPU set of logical processors that are physically * close to OpenCL device \p device. * @@ -62,7 +118,7 @@ extern "C" { * and hwloc_opencl_get_device_osdev_by_index(). * * This function is currently only implemented in a meaningful way for - * Linux with the AMD OpenCL implementation; other systems will simply + * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply * get a full cpuset. */ static __hwloc_inline int @@ -70,35 +126,28 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse cl_device_id device __hwloc_attribute_unused, hwloc_cpuset_t set) { -#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD) - /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */ +#if (defined HWLOC_LINUX_SYS) + /* If we're on Linux, try AMD/NVIDIA extensions + the sysfs mechanism to get the local cpus */ #define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128 char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX]; - cl_device_topology_amd amdtopo; - cl_int clret; + unsigned pcidomain, pcibus, pcidev, pcifunc; if (!hwloc_topology_is_thissystem(topology)) { errno = EINVAL; return -1; } - clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); - if (CL_SUCCESS != clret) { - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); - return 0; - } - if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidev, &pcifunc) < 0) { hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); return 0; } - sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", - (unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function); + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", pcidomain, pcibus, pcidev, pcifunc); if (hwloc_linux_read_path_as_cpumask(path, set) < 0 || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); #else - /* Non-Linux + AMD OpenCL systems simply get a full cpuset */ + /* Non-Linux systems simply get a full cpuset */ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); #endif return 0; @@ -140,8 +189,8 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology, * Use OpenCL device attributes to find the corresponding hwloc OS device object. * Return NULL if there is none or if useful attributes are not available. * - * This function currently only works on AMD OpenCL devices that support - * the CL_DEVICE_TOPOLOGY_AMD extension. hwloc_opencl_get_device_osdev_by_index() + * This function currently only works on AMD and NVIDIA OpenCL devices that support + * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index() * should be preferred whenever possible, i.e. when platform and device index * are known. * @@ -159,17 +208,10 @@ static __hwloc_inline hwloc_obj_t hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused, cl_device_id device __hwloc_attribute_unused) { -#ifdef CL_DEVICE_TOPOLOGY_AMD hwloc_obj_t osdev; - cl_device_topology_amd amdtopo; - cl_int clret; + unsigned pcidomain, pcibus, pcidevice, pcifunc; - clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); - if (CL_SUCCESS != clret) { - errno = EINVAL; - return NULL; - } - if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + if (hwloc_opencl_get_device_pci_busid(device, &pcidomain, &pcibus, &pcidevice, &pcifunc) < 0) { errno = EINVAL; return NULL; } @@ -181,18 +223,15 @@ hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused continue; if (pcidev && pcidev->type == HWLOC_OBJ_PCI_DEVICE - && pcidev->attr->pcidev.domain == 0 - && pcidev->attr->pcidev.bus == amdtopo.pcie.bus - && pcidev->attr->pcidev.dev == amdtopo.pcie.device - && pcidev->attr->pcidev.func == amdtopo.pcie.function) + && pcidev->attr->pcidev.domain == pcidomain + && pcidev->attr->pcidev.bus == pcibus + && pcidev->attr->pcidev.dev == pcidevice + && pcidev->attr->pcidev.func == pcifunc) return osdev; /* if PCI are filtered out, we need a info attr to match on */ } return NULL; -#else - return NULL; -#endif } /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index 174ab4a57..d247a8b1c 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -19,10 +19,10 @@ #ifndef HWLOC_OPENFABRICS_VERBS_H #define HWLOC_OPENFABRICS_VERBS_H -#include -#include +#include "hwloc.h" +#include "hwloc/autogen/config.h" #ifdef HWLOC_LINUX_SYS -#include +#include "hwloc/linux.h" #endif #include diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index cb22000d4..0f53ac4d4 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2017 Inria. All rights reserved. + * Copyright © 2013-2019 Inria. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -13,7 +13,8 @@ struct hwloc_backend; -#include +#include "hwloc.h" + #ifdef HWLOC_INSIDE_PLUGIN /* needed for hwloc_plugin_check_namespace() */ #include @@ -25,52 +26,36 @@ struct hwloc_backend; * @{ */ -/** \brief Discovery component type */ -typedef enum hwloc_disc_component_type_e { - /** \brief CPU-only discovery through the OS, or generic no-OS support. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0), - - /** \brief xml or synthetic, - * platform-specific components such as bgq. - * Anything the discovers CPU and everything else. - * No misc backend is expected to complement a global component. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1), - - /** \brief OpenCL, Cuda, etc. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2) -} hwloc_disc_component_type_t; - /** \brief Discovery component structure * * This is the major kind of components, taking care of the discovery. * They are registered by generic components, either statically-built or as plugins. */ struct hwloc_disc_component { - /** \brief Discovery component type */ - hwloc_disc_component_type_t type; - /** \brief Name. * If this component is built as a plugin, this name does not have to match the plugin filename. */ const char *name; - /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. + /** \brief Discovery phases performed by this component. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + + /** \brief Component phases to exclude, as an OR'ed set of ::hwloc_disc_phase_t. * - * For a GLOBAL component, this usually includes all other types (~0). + * For a GLOBAL component, this usually includes all other phases (\c ~UL). * * Other components only exclude types that may bring conflicting * topology information. MISC components should likely not be excluded * since they usually bring non-primary additional information. */ - unsigned excludes; + unsigned excluded_phases; /** \brief Instantiate callback to create a backend from the component. * Parameters data1, data2, data3 are NULL except for components * that have special enabling routines such as hwloc_topology_set_xml(). */ - struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); + struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, unsigned excluded_phases, const void *data1, const void *data2, const void *data3); /** \brief Component priority. * Used to sort topology->components, higher priority first. @@ -107,6 +92,72 @@ struct hwloc_disc_component { * @{ */ +/** \brief Discovery phase */ +typedef enum hwloc_disc_phase_e { + /** \brief xml or synthetic, platform-specific components such as bgq. + * Discovers everything including CPU, memory, I/O and everything else. + * A component with a Global phase usually excludes all other phases. + * \hideinitializer */ + HWLOC_DISC_PHASE_GLOBAL = (1U<<0), + + /** \brief CPU discovery. + * \hideinitializer */ + HWLOC_DISC_PHASE_CPU = (1U<<1), + + /** \brief Attach memory to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_MEMORY = (1U<<2), + + /** \brief Attach PCI devices and bridges to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_PCI = (1U<<3), + + /** \brief I/O discovery that requires PCI devices (OS devices such as OpenCL, CUDA, etc.). + * \hideinitializer */ + HWLOC_DISC_PHASE_IO = (1U<<4), + + /** \brief Misc objects that gets added below anything else. + * \hideinitializer */ + HWLOC_DISC_PHASE_MISC = (1U<<5), + + /** \brief Annotating existing objects, adding distances, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_ANNOTATE = (1U<<6), + + /** \brief Final tweaks to a ready-to-use topology. + * This phase runs once the topology is loaded, before it is returned to the topology. + * Hence it may only use the main hwloc API for modifying the topology, + * for instance by restricting it, adding info attributes, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_TWEAK = (1U<<7) +} hwloc_disc_phase_t; + +/** \brief Discovery status flags */ +enum hwloc_disc_status_flag_e { + /** \brief The sets of allowed resources were already retrieved \hideinitializer */ + HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES = (1UL<<1) +}; + +/** \brief Discovery status structure + * + * Used by the core and backends to inform about what has been/is being done + * during the discovery process. + */ +struct hwloc_disc_status { + /** \brief The current discovery phase that is performed. + * Must match one of the phases in the component phases field. + */ + hwloc_disc_phase_t phase; + + /** \brief Dynamically excluded phases. + * If a component decides during discovery that some phases are no longer needed. + */ + unsigned excluded_phases; + + /** \brief OR'ed set of hwloc_disc_status_flag_e */ + unsigned long flags; +}; + /** \brief Discovery backend structure * * A backend is the instantiation of a discovery component. @@ -116,6 +167,14 @@ struct hwloc_disc_component { * hwloc_backend_alloc() initializes all fields to default values * that the component may change (except "component" and "next") * before enabling the backend with hwloc_backend_enable(). + * + * Most backends assume that the topology is_thissystem flag is + * set because they talk to the underlying operating system. + * However they may still be used in topologies without the + * is_thissystem flag for debugging reasons. + * In practice, they are usually auto-disabled in such cases + * (excluded by xml or synthetic backends, or by environment + * variables when changing the Linux fsroot or the x86 cpuid path). */ struct hwloc_backend { /** \private Reserved for the core, set by hwloc_backend_alloc() */ @@ -127,12 +186,20 @@ struct hwloc_backend { /** \private Reserved for the core. Used internally to list backends topology->backends. */ struct hwloc_backend * next; + /** \brief Discovery phases performed by this component, possibly without some of them if excluded by other components. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + /** \brief Backend flags, currently always 0. */ unsigned long flags; /** \brief Backend-specific 'is_thissystem' property. - * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled. - * Set to -1 if the backend doesn't care (default). */ + * Set to 0 if the backend disables the thissystem flag for this topology + * (e.g. loading from xml or synthetic string, + * or using a different fsroot on Linux, or a x86 CPUID dump). + * Set to -1 if the backend doesn't care (default). + */ int is_thissystem; /** \brief Backend private data, or NULL if none. */ @@ -147,20 +214,22 @@ struct hwloc_backend { * or because of an actual discovery/gathering failure. * May be NULL. */ - int (*discover)(struct hwloc_backend *backend); + int (*discover)(struct hwloc_backend *backend, struct hwloc_disc_status *status); - /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. - * May be NULL. */ + /** \brief Callback to retrieve the locality of a PCI object. + * Called by the PCI core when attaching PCI hierarchy to CPU objects. + * May be NULL. + */ int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset); }; /** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc. * The caller will then modify whatever needed, and call hwloc_backend_enable(). */ -HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component); +HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_topology *topology, struct hwloc_disc_component *component); /** \brief Enable a previously allocated and setup backend. */ -HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend); +HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend); /** @} */ @@ -480,7 +549,9 @@ HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pcidisc_check_bridge_type(unsigned device_ * * Returns -1 and destroys /p obj if bridge fields are invalid. */ -HWLOC_DECLSPEC int hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config); +HWLOC_DECLSPEC int hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func, + unsigned *secondary_busp, unsigned *subordinate_busp, + const unsigned char *config); /** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs. * @@ -490,10 +561,7 @@ HWLOC_DECLSPEC void hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, /** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology. * - * For now, they will be attached to the root object. The core will move them to their actual PCI - * locality using hwloc_pci_belowroot_apply_locality() at the end of the discovery. - * - * In the meantime, other backends lookup PCI objects or localities (for instance to attach OS devices) + * Other backends may lookup PCI objects or localities (for instance to attach OS devices) * by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent(). */ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree); @@ -507,32 +575,14 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st * @{ */ -/** \brief Find the PCI object that matches the bus ID. - * - * To be used after a PCI backend added PCI devices with hwloc_pcidisc_tree_attach() - * and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality(). - * - * If no exactly matching object is found, return the container bridge if any, or NULL. - * - * On failure, it may be possible to find the PCI locality (instead of the PCI device) - * by calling hwloc_pcidisc_find_busid_parent(). - * - * \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works - * after the topology is fully loaded. - */ -HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); - /** \brief Find the normal parent of a PCI bus ID. * * Look at PCI affinity to find out where the given PCI bus ID should be attached. * - * This function should be used to attach an I/O device directly under a normal - * (non-I/O) object, instead of below a PCI object. - * It is usually used by backends when hwloc_pcidisc_find_by_busid() failed - * to find the hwloc object corresponding to this bus ID, for instance because - * PCI discovery is not supported on this platform. + * This function should be used to attach an I/O device under the corresponding + * PCI object (if any), or under a normal (non-I/O) object with same locality. */ -HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); +HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index 7cef1b2e8..a23738d0d 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,13 +1,13 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2018 Inria. All rights reserved. + * Copyright © 2010-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ #ifndef HWLOC_RENAME_H #define HWLOC_RENAME_H -#include +#include "hwloc/autogen/config.h" #ifdef __cplusplus @@ -49,7 +49,9 @@ extern "C" { #define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) #define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) +#define HWLOC_OBJ_MEMCACHE HWLOC_NAME_CAPS(OBJ_MEMCACHE) #define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_DIE HWLOC_NAME_CAPS(OBJ_DIE) #define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) #define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) #define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE) @@ -116,7 +118,7 @@ extern "C" { #define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) -#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) +#define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) @@ -124,6 +126,9 @@ extern "C" { #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) #define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) #define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) +#define hwloc_topology_components_flag_e HWLOC_NAME(hwloc_topology_components_flag_e) +#define HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST HWLOC_NAME_CAPS(TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) +#define hwloc_topology_set_components HWLOC_NAME(topology_set_components) #define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) #define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) @@ -151,10 +156,18 @@ extern "C" { #define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) #define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS) +#define HWLOC_RESTRICT_FLAG_BYNODESET HWLOC_NAME_CAPS(RESTRICT_FLAG_BYNODESET) +#define HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_MEMLESS) #define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) #define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) #define hwloc_topology_restrict HWLOC_NAME(topology_restrict) +#define hwloc_allow_flags_e HWLOC_NAME(allow_flags_e) +#define HWLOC_ALLOW_FLAG_ALL HWLOC_NAME_CAPS(ALLOW_FLAG_ALL) +#define HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS HWLOC_NAME_CAPS(ALLOW_FLAG_LOCAL_RESTRICTIONS) +#define HWLOC_ALLOW_FLAG_CUSTOM HWLOC_NAME_CAPS(ALLOW_FLAG_CUSTOM) +#define hwloc_topology_allow HWLOC_NAME(topology_allow) + #define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) @@ -172,6 +185,7 @@ extern "C" { #define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) #define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC) #define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE) +#define HWLOC_TYPE_DEPTH_MEMCACHE HWLOC_NAME_CAPS(TYPE_DEPTH_MEMCACHE) #define hwloc_get_depth_type HWLOC_NAME(get_depth_type) #define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) @@ -266,10 +280,12 @@ extern "C" { #define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) #define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) #define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) - +#define hwloc_bitmap_from_ulongs HWLOC_NAME(bitmap_from_ulongs) #define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) #define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) #define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) +#define hwloc_bitmap_to_ulongs HWLOC_NAME(bitmap_to_ulongs) +#define hwloc_bitmap_nr_ulongs HWLOC_NAME(bitmap_nr_ulongs) #define hwloc_bitmap_only HWLOC_NAME(bitmap_only) #define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) #define hwloc_bitmap_set HWLOC_NAME(bitmap_set) @@ -380,10 +396,13 @@ extern "C" { #define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY) #define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH) +#define HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES HWLOC_NAME_CAPS(DISTANCES_KIND_HETEROGENEOUS_TYPES) #define hwloc_distances_get HWLOC_NAME(distances_get) #define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth) #define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type) +#define hwloc_distances_get_by_name HWLOC_NAME(distances_get_by_name) +#define hwloc_distances_get_name HWLOC_NAME(distances_get_name) #define hwloc_distances_release HWLOC_NAME(distances_release) #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) @@ -396,6 +415,7 @@ extern "C" { #define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) +#define hwloc_distances_release_remove HWLOC_NAME(distances_release_remove) /* diff.h */ @@ -469,6 +489,8 @@ extern "C" { /* opencl.h */ +#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd) +#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids) #define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) #define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) #define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) @@ -502,13 +524,22 @@ extern "C" { /* hwloc/plugins.h */ -#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) -#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) -#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) -#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) -#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_phase_e HWLOC_NAME(disc_phase_e) +#define HWLOC_DISC_PHASE_GLOBAL HWLOC_NAME_CAPS(DISC_PHASE_GLOBAL) +#define HWLOC_DISC_PHASE_CPU HWLOC_NAME_CAPS(DISC_PHASE_CPU) +#define HWLOC_DISC_PHASE_MEMORY HWLOC_NAME_CAPS(DISC_PHASE_MEMORY) +#define HWLOC_DISC_PHASE_PCI HWLOC_NAME_CAPS(DISC_PHASE_PCI) +#define HWLOC_DISC_PHASE_IO HWLOC_NAME_CAPS(DISC_PHASE_IO) +#define HWLOC_DISC_PHASE_MISC HWLOC_NAME_CAPS(DISC_PHASE_MISC) +#define HWLOC_DISC_PHASE_ANNOTATE HWLOC_NAME_CAPS(DISC_PHASE_ANNOTATE) +#define HWLOC_DISC_PHASE_TWEAK HWLOC_NAME_CAPS(DISC_PHASE_TWEAK) +#define hwloc_disc_phase_t HWLOC_NAME(disc_phase_t) #define hwloc_disc_component HWLOC_NAME(disc_component) +#define hwloc_disc_status_flag_e HWLOC_NAME(disc_status_flag_e) +#define HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES HWLOC_NAME_CAPS(DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) +#define hwloc_disc_status HWLOC_NAME(disc_status) + #define hwloc_backend HWLOC_NAME(backend) #define hwloc_backend_alloc HWLOC_NAME(backend_alloc) @@ -540,12 +571,11 @@ extern "C" { #define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap) #define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed) #define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type) -#define hwloc_pcidisc_setup_bridge_attr HWLOC_NAME(pcidisc_setup_bridge_attr) +#define hwloc_pcidisc_find_bridge_buses HWLOC_NAME(pcidisc_find_bridge_buses) #define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) #define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) -#define hwloc_pcidisc_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) -#define hwloc_pcidisc_find_busid_parent HWLOC_NAME(pcidisc_find_busid_parent) +#define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent) /* hwloc/deprecated.h */ @@ -571,8 +601,9 @@ extern "C" { /* private/misc.h */ +#ifndef HWLOC_HAVE_CORRECT_SNPRINTF #define hwloc_snprintf HWLOC_NAME(snprintf) -#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) +#endif #define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) #define hwloc_ffs32 HWLOC_NAME(ffs32) #define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) @@ -631,8 +662,9 @@ extern "C" { #define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) #define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks) -#define hwloc_backends_init HWLOC_NAME(backends_init) +#define hwloc_topology_components_init HWLOC_NAME(topology_components_init) #define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) +#define hwloc_topology_components_fini HWLOC_NAME(topology_components_fini) #define hwloc_components_init HWLOC_NAME(components_init) #define hwloc_components_fini HWLOC_NAME(components_fini) @@ -656,7 +688,6 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) -#define hwloc_linuxio_component HWLOC_NAME(linuxio_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_pci_component HWLOC_NAME(pci_component) @@ -669,6 +700,9 @@ extern "C" { #define hwloc_special_level_s HWLOC_NAME(special_level_s) #define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) +#define hwloc_pci_locality_s HWLOC_NAME(pci_locality_s) + +#define hwloc_topology_forced_component_s HWLOC_NAME(topology_forced_component) #define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets) #define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) @@ -687,8 +721,8 @@ extern "C" { #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) +#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) #define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) -#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality) #define hwloc__add_info HWLOC_NAME(_add_info) #define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) diff --git a/src/3rdparty/hwloc/include/hwloc/shmem.h b/src/3rdparty/hwloc/include/hwloc/shmem.h index 222494630..86f57b4f6 100644 --- a/src/3rdparty/hwloc/include/hwloc/shmem.h +++ b/src/3rdparty/hwloc/include/hwloc/shmem.h @@ -10,7 +10,7 @@ #ifndef HWLOC_SHMEM_H #define HWLOC_SHMEM_H -#include +#include "hwloc.h" #ifdef __cplusplus extern "C" { diff --git a/src/3rdparty/hwloc/include/private/components.h b/src/3rdparty/hwloc/include/private/components.h index 8525bbe46..e28c00b1d 100644 --- a/src/3rdparty/hwloc/include/private/components.h +++ b/src/3rdparty/hwloc/include/private/components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2015 Inria. All rights reserved. + * Copyright © 2012-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -16,13 +16,13 @@ #ifndef PRIVATE_COMPONENTS_H #define PRIVATE_COMPONENTS_H 1 -#include +#include "hwloc/plugins.h" struct hwloc_topology; extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology, int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */ - int type, const char *name, + const char *name, const void *data1, const void *data2, const void *data3); extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology); @@ -30,10 +30,12 @@ extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology) extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology); extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology); -/* Initialize the list of backends used by a topology */ -extern void hwloc_backends_init(struct hwloc_topology *topology); +/* Initialize the lists of components and backends used by a topology */ +extern void hwloc_topology_components_init(struct hwloc_topology *topology); /* Disable and destroy all backends used by a topology */ extern void hwloc_backends_disable_all(struct hwloc_topology *topology); +/* Cleanup the lists of components used by a topology */ +extern void hwloc_topology_components_fini(struct hwloc_topology *topology); /* Used by the core to setup/destroy the list of components */ extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */ diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h index 74b697db4..637e0141e 100644 --- a/src/3rdparty/hwloc/include/private/debug.h +++ b/src/3rdparty/hwloc/include/private/debug.h @@ -11,8 +11,8 @@ #ifndef HWLOC_DEBUG_H #define HWLOC_DEBUG_H -#include -#include +#include "private/autogen/config.h" +#include "private/misc.h" #ifdef HWLOC_DEBUG #include diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h index b138a0eb9..d3c897836 100644 --- a/src/3rdparty/hwloc/include/private/internal-components.h +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2018 Inria. All rights reserved. + * Copyright © 2018-2019 Inria. All rights reserved. * * See COPYING in top-level directory. */ @@ -29,7 +29,6 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; /* I/O discovery */ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/src/3rdparty/hwloc/include/private/misc.h b/src/3rdparty/hwloc/include/private/misc.h index 66608bc79..6c02d793b 100644 --- a/src/3rdparty/hwloc/include/private/misc.h +++ b/src/3rdparty/hwloc/include/private/misc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_PRIVATE_MISC_H #define HWLOC_PRIVATE_MISC_H -#include -#include -#include +#include "hwloc/autogen/config.h" +#include "private/autogen/config.h" +#include "hwloc.h" #ifdef HWLOC_HAVE_DECL_STRNCASECMP #ifdef HAVE_STRINGS_H @@ -439,14 +439,14 @@ hwloc_linux_pci_link_speed_from_string(const char *string) static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type) { /* type contiguity is asserted in topology_check() */ - return type <= HWLOC_OBJ_GROUP; + return type <= HWLOC_OBJ_GROUP || type == HWLOC_OBJ_DIE; } -/* Any object attached to memory children, currently only NUMA nodes */ +/* Any object attached to memory children, currently NUMA nodes or Memory-side caches */ static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type) { /* type contiguity is asserted in topology_check() */ - return type == HWLOC_OBJ_NUMANODE; + return type == HWLOC_OBJ_NUMANODE || type == HWLOC_OBJ_MEMCACHE; } /* I/O or Misc object, without cpusets or nodesets. */ @@ -463,6 +463,7 @@ static __hwloc_inline int hwloc__obj_type_is_io (hwloc_obj_type_t type) return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE; } +/* Any CPU caches (not Memory-side caches) */ static __hwloc_inline int hwloc__obj_type_is_cache(hwloc_obj_type_t type) { @@ -572,12 +573,4 @@ typedef SSIZE_T ssize_t; # endif #endif -#if defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined(__CYGWIN__) -/* MSVC doesn't support C99 variable-length array */ -#include -#define HWLOC_VLA(_type, _name, _nb) _type *_name = (_type*) _alloca((_nb)*sizeof(_type)) -#else -#define HWLOC_VLA(_type, _name, _nb) _type _name[_nb] -#endif - #endif /* HWLOC_PRIVATE_MISC_H */ diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index 8e3964ab2..5f8789376 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -22,11 +22,12 @@ #ifndef HWLOC_PRIVATE_H #define HWLOC_PRIVATE_H -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/bitmap.h" +#include "private/components.h" +#include "private/misc.h" + #include #ifdef HAVE_UNISTD_H #include @@ -39,7 +40,7 @@ #endif #include -#define HWLOC_TOPOLOGY_ABI 0x20000 /* version of the layout of struct topology */ +#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */ /***************************************************** * WARNING: @@ -67,12 +68,13 @@ struct hwloc_topology { void *adopted_shmem_addr; size_t adopted_shmem_length; -#define HWLOC_NR_SLEVELS 5 +#define HWLOC_NR_SLEVELS 6 #define HWLOC_SLEVEL_NUMANODE 0 #define HWLOC_SLEVEL_BRIDGE 1 #define HWLOC_SLEVEL_PCIDEV 2 #define HWLOC_SLEVEL_OSDEV 3 #define HWLOC_SLEVEL_MISC 4 +#define HWLOC_SLEVEL_MEMCACHE 5 /* order must match negative depth, it's asserted in setup_defaults() */ #define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) #define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) @@ -86,6 +88,7 @@ struct hwloc_topology { hwloc_bitmap_t allowed_nodeset; struct hwloc_binding_hooks { + /* These are actually rather OS hooks since some of them are not about binding */ int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); @@ -127,20 +130,35 @@ struct hwloc_topology { int userdata_not_decoded; struct hwloc_internal_distances_s { - hwloc_obj_type_t type; + char *name; /* FIXME: needs an API to set it from user */ + + unsigned id; /* to match the container id field of public distances structure + * not exported to XML, regenerated during _add() + */ + + /* if all objects have the same type, different_types is NULL and unique_type is valid. + * otherwise unique_type is HWLOC_OBJ_TYPE_NONE and different_types contains individual objects types. + */ + hwloc_obj_type_t unique_type; + hwloc_obj_type_t *different_types; + /* add union hwloc_obj_attr_u if we ever support groups */ unsigned nbobjs; - uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */ + uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. + * OS indexes for distances covering only PUs or only NUMAnodes. + */ +#define HWLOC_DIST_TYPE_USE_OS_INDEX(_type) ((_type) == HWLOC_OBJ_PU || (_type == HWLOC_OBJ_NUMANODE)) uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array. * distance from i to j is stored in slot i*nbnodes+j. */ unsigned long kind; +#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */ + unsigned iflags; + /* objects are currently stored in physical_index order */ hwloc_obj_t *objs; /* array of objects */ - int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */ - unsigned id; /* to match the container id field of public distances structure */ struct hwloc_internal_distances_s *prev, *next; } *first_dist, *last_dist; unsigned next_dist_id; @@ -153,8 +171,9 @@ struct hwloc_topology { /* list of enabled backends. */ struct hwloc_backend * backends; - struct hwloc_backend * get_pci_busid_cpuset_backend; - unsigned backend_excludes; + struct hwloc_backend * get_pci_busid_cpuset_backend; /* first backend that provides get_pci_busid_cpuset() callback */ + unsigned backend_phases; + unsigned backend_excluded_phases; /* memory allocator for topology objects */ struct hwloc_tma * tma; @@ -176,7 +195,6 @@ struct hwloc_topology { struct hwloc_numanode_attr_s machine_memory; /* pci stuff */ - int need_pci_belowroot_apply_locality; int pci_has_forced_locality; unsigned pci_forced_locality_nr; struct hwloc_pci_forced_locality_s { @@ -185,13 +203,32 @@ struct hwloc_topology { hwloc_bitmap_t cpuset; } * pci_forced_locality; + /* component blacklisting */ + unsigned nr_blacklisted_components; + struct hwloc_topology_forced_component_s { + struct hwloc_disc_component *component; + unsigned phases; + } *blacklisted_components; + + /* FIXME: keep until topo destroy and reuse for finding specific buses */ + struct hwloc_pci_locality_s { + unsigned domain; + unsigned bus_min; + unsigned bus_max; + hwloc_bitmap_t cpuset; + hwloc_obj_t parent; + struct hwloc_pci_locality_s *prev, *next; + } *first_pci_locality, *last_pci_locality; }; extern void hwloc_alloc_root_sets(hwloc_obj_t root); extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus); extern int hwloc_get_sysctlbyname(const char *name, int64_t *n); extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n); -extern int hwloc_fallback_nbprocessors(struct hwloc_topology *topology); + +/* returns the number of CPU from the OS (only valid if thissystem) */ +#define HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE 1 /* by default we try to get only the online CPUs */ +extern int hwloc_fallback_nbprocessors(unsigned flags); extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2); extern void hwloc__reorder_children(hwloc_obj_t parent); @@ -208,19 +245,17 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); +/* Look for an object matching the given domain/bus/func, + * either exactly or return the smallest container bridge + */ +extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + /* Look for an object matching complete cpuset exactly, or insert one. * Return NULL on failure. * Return a good fallback (object above) on failure to insert. */ extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset); -/* Move PCI objects currently attached to the root object ot their actual location. - * Called by the core at the end of hwloc_topology_load(). - * Prior to this call, all PCI objects may be found below the root object. - * After this call and a reconnect of levels, all PCI objects are available through levels. - */ -extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology); - extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); @@ -313,8 +348,8 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); -extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); -extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); /* encode src buffer into target buffer. @@ -330,13 +365,19 @@ extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *targe */ extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize); -/* Check whether needle matches the beginning of haystack, at least n, and up - * to a colon or \0 */ -extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n); - /* On some systems, snprintf returns the size of written data, not the actually - * required size. hwloc_snprintf always report the actually required size. */ + * required size. Sometimes it returns -1 on truncation too. + * And sometimes it doesn't like NULL output buffers. + * http://www.gnu.org/software/gnulib/manual/html_node/snprintf.html + * + * hwloc_snprintf behaves properly, but it's a bit overkill on the vast majority + * of platforms, so don't enable it unless really needed. + */ +#ifdef HWLOC_HAVE_CORRECT_SNPRINTF +#define hwloc_snprintf snprintf +#else extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4); +#endif /* Return the name of the currently running program, if supported. * If not NULL, must be freed by the caller. @@ -356,7 +397,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology); #define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ -#define HWLOC_GROUP_KIND_S390_BOOK 110 /* no subkind */ +#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */ #define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ /* then, OS-specific groups */ #define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h index 7c73384d9..f59fca1ff 100644 --- a/src/3rdparty/hwloc/include/private/xml.h +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -1,12 +1,12 @@ /* - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ #ifndef PRIVATE_XML_H #define PRIVATE_XML_H 1 -#include +#include "hwloc.h" #include @@ -54,7 +54,6 @@ struct hwloc_xml_backend_data_s { unsigned nbnumanodes; hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */ struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist; - int dont_merge_die_groups; }; /************** diff --git a/src/3rdparty/hwloc/src/base64.c b/src/3rdparty/hwloc/src/base64.c index 7b3e12101..4df67bf97 100644 --- a/src/3rdparty/hwloc/src/base64.c +++ b/src/3rdparty/hwloc/src/base64.c @@ -11,7 +11,7 @@ /* include hwloc's config before anything else * so that extensions and features are properly enabled */ -#include +#include "private/private.h" /* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */ diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c index b3457bc76..0bd85e258 100644 --- a/src/3rdparty/hwloc/src/bind.c +++ b/src/3rdparty/hwloc/src/bind.c @@ -1,15 +1,16 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "hwloc/helper.h" + #ifdef HAVE_SYS_MMAN_H # include #endif @@ -885,6 +886,8 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology) } else { /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */ hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support); + + /* Linux has some hooks that also work in this case, but they are not strictly needed yet. */ } /* if not is_thissystem, set_cpubind is fake diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c index ea1264afc..5fb9cd356 100644 --- a/src/3rdparty/hwloc/src/bitmap.c +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -1,18 +1,18 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc/autogen/config.h" +#include "hwloc.h" +#include "private/misc.h" +#include "private/private.h" +#include "private/debug.h" +#include "hwloc/bitmap.h" #include #include @@ -766,6 +766,21 @@ int hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned return 0; } +int hwloc_bitmap_from_ulongs(struct hwloc_bitmap_s *set, unsigned nr, const unsigned long *masks) +{ + unsigned j; + + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_ulongs(set, nr) < 0) + return -1; + + for(j=0; julongs[j] = masks[j]; + set->infinite = 0; + return 0; +} + unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set) { HWLOC__BITMAP_CHECK(set); @@ -780,6 +795,30 @@ unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsign return HWLOC_SUBBITMAP_READULONG(set, i); } +int hwloc_bitmap_to_ulongs(const struct hwloc_bitmap_s *set, unsigned nr, unsigned long *masks) +{ + unsigned j; + + HWLOC__BITMAP_CHECK(set); + + for(j=0; jinfinite) + return -1; + + last = hwloc_bitmap_last(set); + return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG; +} + int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) { unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c index bd7c00e36..5c2879b64 100644 --- a/src/3rdparty/hwloc/src/components.c +++ b/src/3rdparty/hwloc/src/components.c @@ -1,18 +1,19 @@ /* - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2012 Université Bordeaux * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/xml.h" +#include "private/misc.h" #define HWLOC_COMPONENT_STOP_NAME "stop" #define HWLOC_COMPONENT_EXCLUDE_CHAR '-' #define HWLOC_COMPONENT_SEPS "," +#define HWLOC_COMPONENT_PHASESEP_CHAR ':' /* list of all registered discovery components, sorted by priority, higher priority first. * noos is last because its priority is 0. @@ -232,17 +233,6 @@ hwloc_plugins_init(void) #endif /* HWLOC_HAVE_PLUGINS */ -static const char * -hwloc_disc_component_type_string(hwloc_disc_component_type_t type) -{ - switch (type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu"; - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global"; - case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc"; - default: return "**unknown**"; - } -} - static int hwloc_disc_component_register(struct hwloc_disc_component *component, const char *filename) @@ -256,21 +246,26 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, return -1; } if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) + || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR) || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { if (hwloc_components_verbose) fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); return -1; } - /* check that the component type is valid */ - switch ((unsigned) component->type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: - case HWLOC_DISC_COMPONENT_TYPE_MISC: - break; - default: - fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n", - component->name, (unsigned) component->type); + + /* check that the component phases are valid */ + if (!component->phases + || (component->phases != HWLOC_DISC_PHASE_GLOBAL + && component->phases & ~(HWLOC_DISC_PHASE_CPU + |HWLOC_DISC_PHASE_MEMORY + |HWLOC_DISC_PHASE_PCI + |HWLOC_DISC_PHASE_IO + |HWLOC_DISC_PHASE_MISC + |HWLOC_DISC_PHASE_ANNOTATE + |HWLOC_DISC_PHASE_TWEAK))) { + fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n", + component->name, component->phases); return -1; } @@ -295,8 +290,8 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, prev = &((*prev)->next); } if (hwloc_components_verbose) - fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n", - hwloc_disc_component_type_string(component->type), component->name, component->priority, + fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", + component->name, component->phases, component->priority, filename ? "from plugin " : "statically build", filename ? filename : ""); prev = &hwloc_disc_components; @@ -310,7 +305,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, return 0; } -#include +#include "static-components.h" static void (**hwloc_component_finalize_cbs)(unsigned long); static unsigned hwloc_component_finalize_cb_count; @@ -415,31 +410,152 @@ hwloc_components_init(void) } void -hwloc_backends_init(struct hwloc_topology *topology) +hwloc_topology_components_init(struct hwloc_topology *topology) { + topology->nr_blacklisted_components = 0; + topology->blacklisted_components = NULL; + topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_phases = 0; + topology->backend_excluded_phases = 0; } +/* look for name among components, ignoring things after `:' */ static struct hwloc_disc_component * -hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */, - const char *name /* name of NULL if any */) +hwloc_disc_component_find(const char *name, const char **endp) { - struct hwloc_disc_component *comp = hwloc_disc_components; + struct hwloc_disc_component *comp; + size_t length; + const char *end = strchr(name, HWLOC_COMPONENT_PHASESEP_CHAR); + if (end) { + length = end-name; + if (endp) + *endp = end+1; + } else { + length = strlen(name); + if (endp) + *endp = NULL; + } + + comp = hwloc_disc_components; while (NULL != comp) { - if ((-1 == type || type == (int) comp->type) - && (NULL == name || !strcmp(name, comp->name))) + if (!strncmp(name, comp->name, length)) return comp; comp = comp->next; } return NULL; } +static unsigned +hwloc_phases_from_string(const char *s) +{ + if (!s) + return ~0U; + if (s[0]<'0' || s[0]>'9') { + if (!strcasecmp(s, "global")) + return HWLOC_DISC_PHASE_GLOBAL; + else if (!strcasecmp(s, "cpu")) + return HWLOC_DISC_PHASE_CPU; + if (!strcasecmp(s, "memory")) + return HWLOC_DISC_PHASE_MEMORY; + if (!strcasecmp(s, "pci")) + return HWLOC_DISC_PHASE_PCI; + if (!strcasecmp(s, "io")) + return HWLOC_DISC_PHASE_IO; + if (!strcasecmp(s, "misc")) + return HWLOC_DISC_PHASE_MISC; + if (!strcasecmp(s, "annotate")) + return HWLOC_DISC_PHASE_ANNOTATE; + if (!strcasecmp(s, "tweak")) + return HWLOC_DISC_PHASE_TWEAK; + return 0; + } + return (unsigned) strtoul(s, NULL, 0); +} + +static int +hwloc_disc_component_blacklist_one(struct hwloc_topology *topology, + const char *name) +{ + struct hwloc_topology_forced_component_s *blacklisted; + struct hwloc_disc_component *comp; + unsigned phases; + unsigned i; + + if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) { + /* replace linuxpci and linuxio with linux (with IO phases) + * for backward compatibility with pre-v2.0 and v2.0 respectively */ + if (hwloc_components_verbose) + fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); + comp = hwloc_disc_component_find("linux", NULL); + phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE; + + } else { + /* normal lookup */ + const char *end; + comp = hwloc_disc_component_find(name, &end); + phases = hwloc_phases_from_string(end); + } + if (!comp) { + errno = EINVAL; + return -1; + } + + if (hwloc_components_verbose) + fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases); + + for(i=0; inr_blacklisted_components; i++) { + if (topology->blacklisted_components[i].component == comp) { + topology->blacklisted_components[i].phases |= phases; + return 0; + } + } + + blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted)); + if (!blacklisted) + return -1; + + blacklisted[topology->nr_blacklisted_components].component = comp; + blacklisted[topology->nr_blacklisted_components].phases = phases; + topology->blacklisted_components = blacklisted; + topology->nr_blacklisted_components++; + return 0; +} + +int +hwloc_topology_set_components(struct hwloc_topology *topology, + unsigned long flags, + const char *name) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + if (flags & ~HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) { + errno = EINVAL; + return -1; + } + + /* this flag is strictly required for now */ + if (flags != HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST) { + errno = EINVAL; + return -1; + } + + if (!strncmp(name, "all", 3) && name[3] == HWLOC_COMPONENT_PHASESEP_CHAR) { + topology->backend_excluded_phases = hwloc_phases_from_string(name+4); + return 0; + } + + return hwloc_disc_component_blacklist_one(topology, name); +} + /* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */ int hwloc_disc_component_force_enable(struct hwloc_topology *topology, int envvar_forced, - int type, const char *name, + const char *name, const void *data1, const void *data2, const void *data3) { struct hwloc_disc_component *comp; @@ -450,18 +566,28 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, return -1; } - comp = hwloc_disc_component_find(type, name); + comp = hwloc_disc_component_find(name, NULL); if (!comp) { errno = ENOSYS; return -1; } - backend = comp->instantiate(comp, data1, data2, data3); + backend = comp->instantiate(topology, comp, 0U /* force-enabled don't get any phase blacklisting */, + data1, data2, data3); if (backend) { + int err; backend->envvar_forced = envvar_forced; if (topology->backends) hwloc_backends_disable_all(topology); - return hwloc_backend_enable(topology, backend); + err = hwloc_backend_enable(backend); + + if (comp->phases == HWLOC_DISC_PHASE_GLOBAL) { + char *env = getenv("HWLOC_ANNOTATE_GLOBAL_COMPONENTS"); + if (env && atoi(env)) + topology->backend_excluded_phases &= ~HWLOC_DISC_PHASE_ANNOTATE; + } + + return err; } else return -1; } @@ -469,29 +595,32 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, static int hwloc_disc_component_try_enable(struct hwloc_topology *topology, struct hwloc_disc_component *comp, - const char *comparg, - int envvar_forced) + int envvar_forced, + unsigned blacklisted_phases) { struct hwloc_backend *backend; - if (topology->backend_excludes & comp->type) { + if (!(comp->phases & ~(topology->backend_excluded_phases | blacklisted_phases))) { + /* all this backend phases are already excluded, exclude the backend entirely */ if (hwloc_components_verbose) /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. */ - fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n", - hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes); + fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", + comp->name, comp->phases, topology->backend_excluded_phases); return -1; } - backend = comp->instantiate(comp, comparg, NULL, NULL); + backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, + NULL, NULL, NULL); if (!backend) { if (hwloc_components_verbose || envvar_forced) fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); return -1; } + backend->phases &= ~blacklisted_phases; backend->envvar_forced = envvar_forced; - return hwloc_backend_enable(topology, backend); + return hwloc_backend_enable(backend); } void @@ -502,11 +631,12 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) int tryall = 1; const char *_env; char *env; /* we'll to modify the env value, so duplicate it */ + unsigned i; _env = getenv("HWLOC_COMPONENTS"); env = _env ? strdup(_env) : NULL; - /* enable explicitly listed components */ + /* blacklist disabled components */ if (env) { char *curenv = env; size_t s; @@ -516,21 +646,41 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (s) { char c; - /* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */ - if (!strncmp(curenv, "linuxpci", 8) && s == 8) { - curenv[5] = 'i'; - curenv[6] = 'o'; - curenv[7] = *HWLOC_COMPONENT_SEPS; - } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) { - curenv[6] = 'i'; - curenv[7] = 'o'; - curenv[8] = *HWLOC_COMPONENT_SEPS; - /* skip this name, it's a negated one */ + if (curenv[0] != HWLOC_COMPONENT_EXCLUDE_CHAR) goto nextname; - } - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR) - goto nextname; + /* save the last char and replace with \0 */ + c = curenv[s]; + curenv[s] = '\0'; + + /* blacklist it, and just ignore failures to allocate */ + hwloc_disc_component_blacklist_one(topology, curenv+1); + + /* remove that blacklisted name from the string */ + for(i=0; inr_blacklisted_components; i++) + if (comp == topology->blacklisted_components[i].component) { + blacklisted_phases = topology->blacklisted_components[i].phases; + break; + } + if (comp->phases & ~blacklisted_phases) + hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); } else { - fprintf(stderr, "Cannot find discovery component `%s'\n", curenv); + fprintf(stderr, "Cannot find discovery component `%s'\n", name); } /* restore chars (the second loop below needs env to be unmodified) */ curenv[s] = c; } -nextname: curenv += s; if (*curenv) /* Skip comma */ @@ -566,26 +729,24 @@ nextname: if (tryall) { comp = hwloc_disc_components; while (NULL != comp) { + unsigned blacklisted_phases = 0U; if (!comp->enabled_by_default) goto nextcomp; - /* check if this component was explicitly excluded in env */ - if (env) { - char *curenv = env; - while (*curenv) { - size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) { - if (hwloc_components_verbose) - fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n", - hwloc_disc_component_type_string(comp->type), comp->name); - goto nextcomp; - } - curenv += s; - if (*curenv) - /* Skip comma */ - curenv++; + /* check if this component was blacklisted by the application */ + for(i=0; inr_blacklisted_components; i++) + if (comp == topology->blacklisted_components[i].component) { + blacklisted_phases = topology->blacklisted_components[i].phases; + break; } + + if (!(comp->phases & ~blacklisted_phases)) { + if (hwloc_components_verbose) + fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n", + comp->name, comp->phases); + goto nextcomp; } - hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */); + + hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */, blacklisted_phases); nextcomp: comp = comp->next; } @@ -597,7 +758,7 @@ nextcomp: backend = topology->backends; fprintf(stderr, "Final list of enabled discovery components: "); while (backend != NULL) { - fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name); + fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases); backend = backend->next; first = 0; } @@ -638,7 +799,8 @@ hwloc_components_fini(void) } struct hwloc_backend * -hwloc_backend_alloc(struct hwloc_disc_component *component) +hwloc_backend_alloc(struct hwloc_topology *topology, + struct hwloc_disc_component *component) { struct hwloc_backend * backend = malloc(sizeof(*backend)); if (!backend) { @@ -646,6 +808,12 @@ hwloc_backend_alloc(struct hwloc_disc_component *component) return NULL; } backend->component = component; + backend->topology = topology; + /* filter-out component phases that are excluded */ + backend->phases = component->phases & ~topology->backend_excluded_phases; + if (backend->phases != component->phases && hwloc_components_verbose) + fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", + component->name, backend->phases, component->phases); backend->flags = 0; backend->discover = NULL; backend->get_pci_busid_cpuset = NULL; @@ -665,14 +833,15 @@ hwloc_backend_disable(struct hwloc_backend *backend) } int -hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend) +hwloc_backend_enable(struct hwloc_backend *backend) { + struct hwloc_topology *topology = backend->topology; struct hwloc_backend **pprev; /* check backend flags */ if (backend->flags) { - fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", + backend->component->name, backend->component->phases, backend->flags); return -1; } @@ -681,8 +850,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back while (NULL != *pprev) { if ((*pprev)->component == backend->component) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n", + backend->component->name, backend->component->phases); hwloc_backend_disable(backend); errno = EBUSY; return -1; @@ -691,8 +860,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back } if (hwloc_components_verbose) - fprintf(stderr, "Enabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", + backend->component->name, backend->phases, backend->component->phases); /* enqueue at the end */ pprev = &topology->backends; @@ -701,8 +870,8 @@ hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *back backend->next = *pprev; *pprev = backend; - backend->topology = topology; - topology->backend_excludes |= backend->component->excludes; + topology->backend_phases |= backend->component->phases; + topology->backend_excluded_phases |= backend->component->excluded_phases; return 0; } @@ -712,7 +881,7 @@ hwloc_backends_is_thissystem(struct hwloc_topology *topology) struct hwloc_backend *backend; const char *local_env; - /* Apply is_thissystem topology flag before we enforce envvar backends. + /* * If the application changed the backend with set_foo(), * it may use set_flags() update the is_thissystem flag here. * If it changes the backend with environment variables below, @@ -775,11 +944,20 @@ hwloc_backends_disable_all(struct hwloc_topology *topology) while (NULL != (backend = topology->backends)) { struct hwloc_backend *next = backend->next; if (hwloc_components_verbose) - fprintf(stderr, "Disabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Disabling discovery component `%s'\n", + backend->component->name); hwloc_backend_disable(backend); topology->backends = next; } topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_excluded_phases = 0; +} + +void +hwloc_topology_components_fini(struct hwloc_topology *topology) +{ + /* hwloc_backends_disable_all() must have been called earlier */ + assert(!topology->backends); + + free(topology->blacklisted_components); } diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 00811a7b5..7794358bb 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,11 +1,11 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include +#include "private/autogen/config.h" +#include "private/private.h" +#include "private/misc.h" int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff) { @@ -351,7 +351,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, err = 1; break; } - if (dist1->type != dist2->type + if (dist1->unique_type != dist2->unique_type + || dist1->different_types || dist2->different_types /* too lazy to support this case */ || dist1->nbobjs != dist2->nbobjs || dist1->kind != dist2->kind || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { @@ -463,6 +464,10 @@ int hwloc_topology_diff_apply(hwloc_topology_t topology, errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) { errno = EINVAL; diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index f0b91f019..9e56a9696 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,19 +1,22 @@ /* - * Copyright © 2010-2018 Inria. All rights reserved. + * Copyright © 2010-2019 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #include #include +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /****************************************************** * Global init, prepare, destroy, dup */ @@ -70,6 +73,8 @@ void hwloc_internal_distances_prepare(struct hwloc_topology *topology) static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist) { + free(dist->name); + free(dist->different_types); free(dist->indexes); free(dist->objs); free(dist->values); @@ -96,15 +101,35 @@ static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct h newdist = hwloc_tma_malloc(tma, sizeof(*newdist)); if (!newdist) return -1; + if (olddist->name) { + newdist->name = hwloc_tma_strdup(tma, olddist->name); + if (!newdist->name) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + } else { + newdist->name = NULL; + } - newdist->type = olddist->type; + if (olddist->different_types) { + newdist->different_types = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->different_types)); + if (!newdist->different_types) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + memcpy(newdist->different_types, olddist->different_types, nbobjs * sizeof(*newdist->different_types)); + } else + newdist->different_types = NULL; + newdist->unique_type = olddist->unique_type; newdist->nbobjs = nbobjs; newdist->kind = olddist->kind; newdist->id = olddist->id; newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes)); newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs)); - newdist->objs_are_valid = 0; + newdist->iflags = olddist->iflags & ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; /* must be revalidated after dup() */ newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values)); if (!newdist->indexes || !newdist->objs || !newdist->values) { assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ @@ -150,6 +175,10 @@ int hwloc_distances_remove(hwloc_topology_t topology) errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } hwloc_internal_distances_destroy(topology); return 0; } @@ -163,6 +192,10 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } /* switch back to types since we don't support groups for now */ type = hwloc_get_depth_type(topology, depth); @@ -174,7 +207,7 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) next = topology->first_dist; while ((dist = next) != NULL) { next = dist->next; - if (dist->type == type) { + if (dist->unique_type == type) { if (next) next->prev = dist->prev; else @@ -190,6 +223,27 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) return 0; } +int hwloc_distances_release_remove(hwloc_topology_t topology, + struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + if (!dist) { + errno = EINVAL; + return -1; + } + if (dist->prev) + dist->prev->next = dist->next; + else + topology->first_dist = dist->next; + if (dist->next) + dist->next->prev = dist->prev; + else + topology->last_dist = dist->prev; + hwloc_internal_distances_free(dist); + hwloc_distances_release(topology, distances); + return 0; +} + /****************************************************** * Add distances to the topology */ @@ -201,17 +255,34 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, str * the caller gives us the distances and objs pointers, we'll free them later. */ static int -hwloc_internal_distances__add(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, - unsigned long kind) +hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, + unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, + unsigned long kind, unsigned iflags) { - struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist)); + struct hwloc_internal_distances_s *dist; + + if (different_types) { + kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */ + } else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) { + errno = EINVAL; + goto err; + } + + dist = calloc(1, sizeof(*dist)); if (!dist) goto err; - dist->type = type; + if (name) + dist->name = strdup(name); /* ignore failure */ + + dist->unique_type = unique_type; + dist->different_types = different_types; dist->nbobjs = nbobjs; dist->kind = kind; + dist->iflags = iflags; + + assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs); if (!objs) { assert(indexes); @@ -220,18 +291,16 @@ hwloc_internal_distances__add(hwloc_topology_t topology, dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); if (!dist->objs) goto err_with_dist; - dist->objs_are_valid = 0; } else { unsigned i; assert(!indexes); /* we only have objs, generate the indexes arrays so that we can refresh objs later */ dist->objs = objs; - dist->objs_are_valid = 1; dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); if (!dist->indexes) goto err_with_dist; - if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) { + if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) { for(i=0; iindexes[i] = objs[i]->os_index; } else { @@ -256,16 +325,19 @@ hwloc_internal_distances__add(hwloc_topology_t topology, err_with_dist: free(dist); err: + free(different_types); free(objs); free(indexes); free(values); return -1; } -int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, +int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags) { + unsigned iflags = 0; /* objs not valid */ + if (nbobjs < 2) { errno = EINVAL; goto err; @@ -279,24 +351,71 @@ int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, goto err; } - return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags); err: free(indexes); free(values); + free(different_types); return -1; } -int hwloc_internal_distances_add(hwloc_topology_t topology, +static void +hwloc_internal_distances_restrict(hwloc_obj_t *objs, + uint64_t *indexes, + uint64_t *values, + unsigned nbobjs, unsigned disappeared); + +int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags) { + hwloc_obj_type_t unique_type, *different_types; + unsigned i, disappeared = 0; + unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; + if (nbobjs < 2) { errno = EINVAL; goto err; } - if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) { + /* is there any NULL object? (useful in case of problem during insert in backends) */ + for(i=0; itype; + for(i=1; itype != unique_type) { + unique_type = HWLOC_OBJ_TYPE_NONE; + break; + } + if (unique_type == HWLOC_OBJ_TYPE_NONE) { + /* heterogeneous types */ + different_types = malloc(nbobjs * sizeof(*different_types)); + if (!different_types) + goto err; + for(i=0; itype; + + } else { + /* homogeneous types */ + different_types = NULL; + } + + if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) { float full_accuracy = 0.f; float *accuracies; unsigned nbaccuracies; @@ -310,8 +429,8 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, } if (topology->grouping_verbose) { - unsigned i, j; - int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU); + unsigned j; + int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type); fprintf(stderr, "Trying to group objects using distance matrix:\n"); fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); for(j=0; jtype, nbobjs, objs, NULL, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags); err: free(objs); @@ -348,7 +467,6 @@ int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, unsigned long kind, unsigned long flags) { - hwloc_obj_type_t type; unsigned i; uint64_t *_values; hwloc_obj_t *_objs; @@ -358,6 +476,10 @@ int hwloc_distances_add(hwloc_topology_t topology, errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if ((kind & ~HWLOC_DISTANCES_KIND_ALL) || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1 @@ -368,15 +490,8 @@ int hwloc_distances_add(hwloc_topology_t topology, /* no strict need to check for duplicates, things shouldn't break */ - type = objs[0]->type; - if (type == HWLOC_OBJ_GROUP) { - /* not supported yet, would require we save the subkind together with the type. */ - errno = EINVAL; - return -1; - } - for(i=1; itype != type) { + if (!objs[i]) { errno = EINVAL; return -1; } @@ -389,7 +504,7 @@ int hwloc_distances_add(hwloc_topology_t topology, memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); - err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags); + err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags); if (err < 0) goto out; /* _objs and _values freed in hwloc_internal_distances_add() */ @@ -409,9 +524,9 @@ int hwloc_distances_add(hwloc_topology_t topology, * Refresh objects in distances */ -static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) { - hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0); + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); while (obj) { if (obj->gp_index == gp_index) return obj; @@ -420,12 +535,31 @@ static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology return NULL; } -static void -hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, - hwloc_obj_t *objs, - unsigned disappeared) +static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + int topodepth = hwloc_topology_get_depth(topology); + for(depth=0; depthnbobjs; unsigned i, newi; unsigned j, newj; @@ -433,7 +567,7 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, if (objs[i]) { for(j=0, newj=0; jvalues[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j]; + values[newi*(nbobjs-disappeared)+newj] = values[i*nbobjs+j]; newj++; } newi++; @@ -442,25 +576,25 @@ hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, for(i=0, newi=0; iindexes[newi] = dist->indexes[i]; + if (indexes) + indexes[newi] = indexes[i]; newi++; } - - dist->nbobjs -= disappeared; } static int hwloc_internal_distances_refresh_one(hwloc_topology_t topology, struct hwloc_internal_distances_s *dist) { - hwloc_obj_type_t type = dist->type; + hwloc_obj_type_t unique_type = dist->unique_type; + hwloc_obj_type_t *different_types = dist->different_types; unsigned nbobjs = dist->nbobjs; hwloc_obj_t *objs = dist->objs; uint64_t *indexes = dist->indexes; unsigned disappeared = 0; unsigned i; - if (dist->objs_are_valid) + if (dist->iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) return 0; for(i=0; iindexes, dist->values, nbobjs, disappeared); + dist->nbobjs -= disappeared; + } - dist->objs_are_valid = 1; + dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; return 0; } @@ -520,32 +660,64 @@ hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology) { struct hwloc_internal_distances_s *dist; for(dist = topology->first_dist; dist; dist = dist->next) - dist->objs_are_valid = 0; + dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; } /****************************************************** * User API for getting distances */ +/* what we actually allocate for user queries, even if we only + * return the distances part of it. + */ +struct hwloc_distances_container_s { + unsigned id; + struct hwloc_distances_s distances; +}; + +#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL) +#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET ) + +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->id == cont->id) + return dist; + return NULL; +} + void hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_distances_s *distances) { + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); free(distances->values); free(distances->objs); - free(distances); + free(cont); +} + +const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + return dist ? dist->name : NULL; } static struct hwloc_distances_s * hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_internal_distances_s *dist) { + struct hwloc_distances_container_s *cont; struct hwloc_distances_s *distances; unsigned nbobjs; - distances = malloc(sizeof(*distances)); - if (!distances) + cont = malloc(sizeof(*cont)); + if (!cont) return NULL; + distances = &cont->distances; nbobjs = distances->nbobjs = dist->nbobjs; @@ -560,18 +732,20 @@ hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values)); distances->kind = dist->kind; + + cont->id = dist->id; return distances; out_with_objs: free(distances->objs); out: - free(distances); + free(cont); return NULL; } static int hwloc__distances_get(hwloc_topology_t topology, - hwloc_obj_type_t type, + const char *name, hwloc_obj_type_t type, unsigned *nrp, struct hwloc_distances_s **distancesp, unsigned long kind, unsigned long flags __hwloc_attribute_unused) { @@ -602,7 +776,10 @@ hwloc__distances_get(hwloc_topology_t topology, unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL; unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL; - if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type) + if (name && (!dist->name || strcmp(name, dist->name))) + continue; + + if (type != HWLOC_OBJ_TYPE_NONE && type != dist->unique_type) continue; if (kind_from && !(kind_from & dist->kind)) @@ -640,7 +817,7 @@ hwloc_distances_get(hwloc_topology_t topology, return -1; } - return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); } int @@ -655,14 +832,40 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, return -1; } - /* switch back to types since we don't support groups for now */ + /* FIXME: passing the depth of a group level may return group distances at a different depth */ type = hwloc_get_depth_type(topology, depth); if (type == (hwloc_obj_type_t)-1) { errno = EINVAL; return -1; } - return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); +} + +int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, name, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, HWLOC_DISTANCES_KIND_ALL, flags); +} + +int +hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); } /****************************************************** @@ -823,10 +1026,14 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, float *accuracies, int needcheck) { - HWLOC_VLA(unsigned, groupids, nbobjs); + unsigned *groupids; unsigned nbgroups = 0; unsigned i,j; int verbose = topology->grouping_verbose; + hwloc_obj_t *groupobjs; + unsigned * groupsizes; + uint64_t *groupvalues; + unsigned failed = 0; if (nbobjs <= 2) return; @@ -836,6 +1043,10 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, /* TODO hwloc__find_groups_by_max_distance() for bandwidth */ return; + groupids = malloc(nbobjs * sizeof(*groupids)); + if (!groupids) + return; + for(i=0; i -#include -#include +#include "private/autogen/config.h" +#include "private/private.h" +#include "private/misc.h" #include #ifdef HAVE_SYS_UTSNAME_H @@ -28,6 +28,7 @@ extern char *program_invocation_name; extern char *__progname; #endif +#ifndef HWLOC_HAVE_CORRECT_SNPRINTF int hwloc_snprintf(char *str, size_t size, const char *format, ...) { int ret; @@ -77,21 +78,7 @@ int hwloc_snprintf(char *str, size_t size, const char *format, ...) return ret; } - -int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n) -{ - size_t i = 0; - while (*haystack && *haystack != ':') { - int ha = *haystack++; - int low_h = tolower(ha); - int ne = *needle++; - int low_n = tolower(ne); - if (low_h != low_n) - return 1; - i++; - } - return i < n; -} +#endif void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused, void *cached_uname __hwloc_attribute_unused) diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index 00f08a9e7..deca5cce5 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -1,14 +1,14 @@ /* - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/plugins.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #include #ifdef HAVE_UNISTD_H @@ -23,6 +23,11 @@ #define close _close #endif + +/************************************** + * Init/Exit and Forced PCI localities + */ + static void hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology, const char *string /* must contain a ' ' */, @@ -109,11 +114,11 @@ hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_en void hwloc_pci_discovery_init(struct hwloc_topology *topology) { - topology->need_pci_belowroot_apply_locality = 0; - topology->pci_has_forced_locality = 0; topology->pci_forced_locality_nr = 0; topology->pci_forced_locality = NULL; + + topology->first_pci_locality = topology->last_pci_locality = NULL; } void @@ -135,7 +140,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) if (!err) { if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */ buffer = malloc(st.st_size+1); - if (read(fd, buffer, st.st_size) == st.st_size) { + if (buffer && read(fd, buffer, st.st_size) == st.st_size) { buffer[st.st_size] = '\0'; hwloc_pci_forced_locality_parse(topology, buffer); } @@ -152,16 +157,31 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) } void -hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused) +hwloc_pci_discovery_exit(struct hwloc_topology *topology) { + struct hwloc_pci_locality_s *cur; unsigned i; + for(i=0; ipci_forced_locality_nr; i++) hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset); free(topology->pci_forced_locality); + cur = topology->first_pci_locality; + while (cur) { + struct hwloc_pci_locality_s *next = cur->next; + hwloc_bitmap_free(cur->cpuset); + free(cur); + cur = next; + } + hwloc_pci_discovery_init(topology); } + +/****************************** + * Inserting in Tree by Bus ID + */ + #ifdef HWLOC_DEBUG static void hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, @@ -324,32 +344,16 @@ hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj); } -int -hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree) + +/********************** + * Attaching PCI Trees + */ + +static struct hwloc_obj * +hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, + struct hwloc_obj *old_tree) { - struct hwloc_obj **next_hb_p; - enum hwloc_type_filter_e bfilter; - - if (!old_tree) - /* found nothing, exit */ - return 0; - -#ifdef HWLOC_DEBUG - hwloc_debug("%s", "\nPCI hierarchy:\n"); - hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb); - hwloc_debug("%s", "\n"); -#endif - - next_hb_p = &hwloc_get_root_obj(topology)->io_first_child; - while (*next_hb_p) - next_hb_p = &((*next_hb_p)->next_sibling); - - bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE]; - if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) { - *next_hb_p = old_tree; - topology->modified = 1; - goto done; - } + struct hwloc_obj * new = NULL, **newp = &new; /* * tree points to all objects connected to any upstream bus in the machine. @@ -358,15 +362,29 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old */ while (old_tree) { /* start a new host bridge */ - struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); - struct hwloc_obj **dstnextp = &hostbridge->io_first_child; - struct hwloc_obj **srcnextp = &old_tree; - struct hwloc_obj *child = *srcnextp; - unsigned short current_domain = child->attr->pcidev.domain; - unsigned char current_bus = child->attr->pcidev.bus; - unsigned char current_subordinate = current_bus; + struct hwloc_obj *hostbridge; + struct hwloc_obj **dstnextp; + struct hwloc_obj **srcnextp; + struct hwloc_obj *child; + unsigned short current_domain; + unsigned char current_bus; + unsigned char current_subordinate; - hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus); + hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); + if (!hostbridge) { + /* just queue remaining things without hostbridges and return */ + *newp = old_tree; + return new; + } + dstnextp = &hostbridge->io_first_child; + + srcnextp = &old_tree; + child = *srcnextp; + current_domain = child->attr->pcidev.domain; + current_bus = child->attr->pcidev.bus; + current_subordinate = current_bus; + + hwloc_debug("Adding new PCI hostbridge %04x:%02x\n", current_domain, current_bus); next_child: /* remove next child from tree */ @@ -395,19 +413,14 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old hostbridge->attr->bridge.downstream.pci.domain = current_domain; hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus; hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate; - hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n", + hwloc_debug(" new PCI hostbridge covers %04x:[%02x-%02x]\n", current_domain, current_bus, current_subordinate); - *next_hb_p = hostbridge; - next_hb_p = &hostbridge->next_sibling; - topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality() - * or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root. - */ + *newp = hostbridge; + newp = &hostbridge->next_sibling; } - done: - topology->need_pci_belowroot_apply_locality = 1; - return 0; + return new; } static struct hwloc_obj * @@ -458,6 +471,9 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide unsigned i; int err; + hwloc_debug("Looking for parent of PCI busid %04x:%02x:%02x.%01x\n", + busid->domain, busid->bus, busid->dev, busid->func); + /* try to match a forced locality */ if (topology->pci_has_forced_locality) { for(i=0; ipci_forced_locality_nr; i++) { @@ -489,7 +505,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide } if (*env) { /* force the cpuset */ - hwloc_debug("Overriding localcpus using %s in the environment\n", envname); + hwloc_debug("Overriding PCI locality using %s in the environment\n", envname); hwloc_bitmap_sscanf(cpuset, env); forced = 1; } @@ -499,7 +515,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide } if (!forced) { - /* get the cpuset by asking the OS backend. */ + /* get the cpuset by asking the backend that provides the relevant hook, if any. */ struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; if (backend) err = backend->get_pci_busid_cpuset(backend, busid, cpuset); @@ -510,7 +526,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology)); } - hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset); + hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset); parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); if (parent) { @@ -526,11 +542,129 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide return parent; } +int +hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree) +{ + enum hwloc_type_filter_e bfilter; + + if (!tree) + /* found nothing, exit */ + return 0; + +#ifdef HWLOC_DEBUG + hwloc_debug("%s", "\nPCI hierarchy:\n"); + hwloc_pci_traverse(NULL, tree, hwloc_pci_traverse_print_cb); + hwloc_debug("%s", "\n"); +#endif + + bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE]; + if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE) { + tree = hwloc_pcidisc_add_hostbridges(topology, tree); + } + + while (tree) { + struct hwloc_obj *obj, *pciobj; + struct hwloc_obj *parent; + struct hwloc_pci_locality_s *loc; + unsigned domain, bus_min, bus_max; + + obj = tree; + + /* hostbridges don't have a PCI busid for looking up locality, use their first child */ + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + pciobj = obj->io_first_child; + else + pciobj = obj; + /* now we have a pci device or a pci bridge */ + assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE + || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)); + + if (obj->type == HWLOC_OBJ_BRIDGE) { + domain = obj->attr->bridge.downstream.pci.domain; + bus_min = obj->attr->bridge.downstream.pci.secondary_bus; + bus_max = obj->attr->bridge.downstream.pci.subordinate_bus; + } else { + domain = pciobj->attr->pcidev.domain; + bus_min = pciobj->attr->pcidev.bus; + bus_max = pciobj->attr->pcidev.bus; + } + + /* find where to attach that PCI bus */ + parent = hwloc__pci_find_busid_parent(topology, &pciobj->attr->pcidev); + + /* reuse the previous locality if possible */ + if (topology->last_pci_locality + && parent == topology->last_pci_locality->parent + && domain == topology->last_pci_locality->domain + && (bus_min == topology->last_pci_locality->bus_max + || bus_min == topology->last_pci_locality->bus_max+1)) { + hwloc_debug(" Reusing PCI locality up to bus %04x:%02x\n", + domain, bus_max); + topology->last_pci_locality->bus_max = bus_max; + goto done; + } + + loc = malloc(sizeof(*loc)); + if (!loc) { + /* fallback to attaching to root */ + parent = hwloc_get_root_obj(topology); + goto done; + } + + loc->domain = domain; + loc->bus_min = bus_min; + loc->bus_max = bus_max; + loc->parent = parent; + loc->cpuset = hwloc_bitmap_dup(parent->cpuset); + if (!loc->cpuset) { + /* fallback to attaching to root */ + free(loc); + parent = hwloc_get_root_obj(topology); + goto done; + } + + hwloc_debug("Adding PCI locality %s P#%u for bus %04x:[%02x:%02x]\n", + hwloc_obj_type_string(parent->type), parent->os_index, loc->domain, loc->bus_min, loc->bus_max); + if (topology->last_pci_locality) { + loc->prev = topology->last_pci_locality; + loc->next = NULL; + topology->last_pci_locality->next = loc; + topology->last_pci_locality = loc; + } else { + loc->prev = NULL; + loc->next = NULL; + topology->first_pci_locality = loc; + topology->last_pci_locality = loc; + } + + done: + /* dequeue this object */ + tree = obj->next_sibling; + obj->next_sibling = NULL; + hwloc_insert_object_by_parent(topology, parent, obj); + } + + return 0; +} + + +/********************************* + * Finding PCI objects or parents + */ + struct hwloc_obj * -hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { struct hwloc_pcidev_attr_s busid; + hwloc_obj_t parent; + + /* try to find that exact busid */ + parent = hwloc_pci_find_by_busid(topology, domain, bus, dev, func); + if (parent) + return parent; + + /* try to find the locality of that bus instead */ busid.domain = domain; busid.bus = bus; busid.dev = dev; @@ -538,66 +672,10 @@ hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, return hwloc__pci_find_busid_parent(topology, &busid); } -int -hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology) -{ - struct hwloc_obj *root = hwloc_get_root_obj(topology); - struct hwloc_obj **listp, *obj; - - if (!topology->need_pci_belowroot_apply_locality) - return 0; - topology->need_pci_belowroot_apply_locality = 0; - - /* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things. - * insert the PCI trees according to their PCI-locality. - */ - listp = &root->io_first_child; - while ((obj = *listp) != NULL) { - struct hwloc_pcidev_attr_s *busid; - struct hwloc_obj *parent; - - /* skip non-PCI objects */ - if (obj->type != HWLOC_OBJ_PCI_DEVICE - && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) - && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) { - listp = &obj->next_sibling; - continue; - } - - if (obj->type == HWLOC_OBJ_PCI_DEVICE - || (obj->type == HWLOC_OBJ_BRIDGE - && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) - busid = &obj->attr->pcidev; - else { - /* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */ - hwloc_obj_t child = obj->io_first_child; - if (child && (child->type == HWLOC_OBJ_PCI_DEVICE - || (child->type == HWLOC_OBJ_BRIDGE - && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))) - busid = &obj->io_first_child->attr->pcidev; - else - continue; - } - - /* attach the object (and children) where it belongs */ - parent = hwloc__pci_find_busid_parent(topology, busid); - if (parent == root) { - /* keep this object here */ - listp = &obj->next_sibling; - } else { - /* dequeue this object */ - *listp = obj->next_sibling; - obj->next_sibling = NULL; - hwloc_insert_object_by_parent(topology, parent, obj); - } - } - - return 0; -} - +/* return the smallest object that contains the desired busid */ static struct hwloc_obj * -hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc__pci_find_by_busid(hwloc_obj_t parent, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { hwloc_obj_t child; @@ -622,7 +700,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, && child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.subordinate_bus >= bus) /* not the right bus id, but it's included in the bus below that bridge */ - return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + return hwloc__pci_find_by_busid(child, domain, bus, dev, func); } else if (child->type == HWLOC_OBJ_BRIDGE && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI @@ -632,7 +710,7 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, && child->attr->bridge.downstream.pci.secondary_bus <= bus && child->attr->bridge.downstream.pci.subordinate_bus >= bus) { /* contains our bus, recurse */ - return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + return hwloc__pci_find_by_busid(child, domain, bus, dev, func); } } /* didn't find anything, return parent */ @@ -640,17 +718,54 @@ hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, } struct hwloc_obj * -hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, - unsigned domain, unsigned bus, unsigned dev, unsigned func) +hwloc_pci_find_by_busid(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) { + struct hwloc_pci_locality_s *loc; hwloc_obj_t root = hwloc_get_root_obj(topology); - hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func); - if (parent == root) + hwloc_obj_t parent = NULL; + + hwloc_debug("pcidisc looking for bus id %04x:%02x:%02x.%01x\n", domain, bus, dev, func); + loc = topology->first_pci_locality; + while (loc) { + if (loc->domain == domain && loc->bus_min <= bus && loc->bus_max >= bus) { + parent = loc->parent; + assert(parent); + hwloc_debug(" found pci locality for %04x:[%02x:%02x]\n", + loc->domain, loc->bus_min, loc->bus_max); + break; + } + loc = loc->next; + } + /* if we failed to insert localities, look at root too */ + if (!parent) + parent = root; + + hwloc_debug(" looking for bus %04x:%02x:%02x.%01x below %s P#%u\n", + domain, bus, dev, func, + hwloc_obj_type_string(parent->type), parent->os_index); + parent = hwloc__pci_find_by_busid(parent, domain, bus, dev, func); + if (parent == root) { + hwloc_debug(" found nothing better than root object, ignoring\n"); return NULL; - else + } else { + if (parent->type == HWLOC_OBJ_PCI_DEVICE + || (parent->type == HWLOC_OBJ_BRIDGE && parent->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) + hwloc_debug(" found busid %04x:%02x:%02x.%01x\n", + parent->attr->pcidev.domain, parent->attr->pcidev.bus, + parent->attr->pcidev.dev, parent->attr->pcidev.func); + else + hwloc_debug(" found parent %s P#%u\n", + hwloc_obj_type_string(parent->type), parent->os_index); return parent; + } } + +/******************************* + * Parsing the PCI Config Space + */ + #define HWLOC_PCI_STATUS 0x06 #define HWLOC_PCI_STATUS_CAP_LIST 0x10 #define HWLOC_PCI_CAPABILITY_LIST 0x34 @@ -703,13 +818,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config, * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane + * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane */ /* lanespeed in Gbit/s */ if (speed <= 2) lanespeed = 2.5f * speed * 0.8f; else - lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */ /* linkspeed in GB/s */ *linkspeed = lanespeed * width / 8; @@ -738,30 +854,27 @@ hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *conf #define HWLOC_PCI_SUBORDINATE_BUS 0x1a int -hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, +hwloc_pcidisc_find_bridge_buses(unsigned domain, unsigned bus, unsigned dev, unsigned func, + unsigned *secondary_busp, unsigned *subordinate_busp, const unsigned char *config) { - struct hwloc_bridge_attr_s *battr = &obj->attr->bridge; - struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci; + unsigned secondary_bus, subordinate_bus; - if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) { + if (config[HWLOC_PCI_PRIMARY_BUS] != bus) { /* Sometimes the config space contains 00 instead of the actual primary bus number. * Always trust the bus ID because it was built by the system which has more information * to workaround such problems (e.g. ACPI information about PCI parent/children). */ hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n", - pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]); + domain, bus, dev, func, config[HWLOC_PCI_PRIMARY_BUS]); } - battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI; - battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI; - battr->downstream.pci.domain = pattr->domain; - battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; - battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; + secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; + subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; - if (battr->downstream.pci.secondary_bus <= pattr->bus - || battr->downstream.pci.subordinate_bus <= pattr->bus - || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) { + if (secondary_bus <= bus + || subordinate_bus <= bus + || secondary_bus > subordinate_bus) { /* This should catch most cases of invalid bridge information * (e.g. 00 for secondary and subordinate). * Ideally we would also check that [secondary-subordinate] is included @@ -769,15 +882,21 @@ hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, * because objects may be discovered out of order (especially in the fsroot case). */ hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n", - pattr->domain, pattr->bus, pattr->dev, pattr->func, - battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus); - hwloc_free_unlinked_object(obj); + domain, bus, dev, func, + secondary_bus, subordinate_bus); return -1; } + *secondary_busp = secondary_bus; + *subordinate_busp = subordinate_bus; return 0; } + +/**************** + * Class Strings + */ + const char * hwloc_pci_class_string(unsigned short class_id) { diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c index 6c507f522..94d55eef7 100644 --- a/src/3rdparty/hwloc/src/shmem.c +++ b/src/3rdparty/hwloc/src/shmem.c @@ -1,12 +1,12 @@ /* - * Copyright © 2017-2018 Inria. All rights reserved. + * Copyright © 2017-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/shmem.h" +#include "private/private.h" #ifndef HWLOC_WIN_SYS @@ -214,6 +214,8 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.membind = malloc(sizeof(*new->support.membind)); + if (!new->support.discovery || !new->support.cpubind || !new->support.membind) + goto out_with_support; memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); @@ -230,6 +232,11 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, *topologyp = new; return 0; + out_with_support: + free(new->support.discovery); + free(new->support.cpubind); + free(new->support.membind); + free(new); out_with_components: hwloc_components_fini(); out_with_mmap: diff --git a/src/3rdparty/hwloc/src/topology-noos.c b/src/3rdparty/hwloc/src/topology-noos.c index 77871eb17..174b6fd8c 100644 --- a/src/3rdparty/hwloc/src/topology-noos.c +++ b/src/3rdparty/hwloc/src/topology-noos.c @@ -1,26 +1,34 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" static int -hwloc_look_noos(struct hwloc_backend *backend) +hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend uses the underlying OS. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + struct hwloc_topology *topology = backend->topology; int nbprocs; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; - nbprocs = hwloc_fallback_nbprocessors(topology); + nbprocs = hwloc_fallback_nbprocessors(0); if (nbprocs >= 1) topology->support.discovery->pu = 1; else @@ -33,13 +41,15 @@ hwloc_look_noos(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_noos_component_instantiate(struct hwloc_disc_component *component, +hwloc_noos_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) return NULL; backend->discover = hwloc_look_noos; @@ -47,9 +57,9 @@ hwloc_noos_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_noos_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "no_os", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_noos_component_instantiate, 40, /* lower than native OS component, higher than globals */ 1, diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c index 1fe334d1c..686efce1f 100644 --- a/src/3rdparty/hwloc/src/topology-synthetic.c +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -6,11 +6,11 @@ * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" #include #include @@ -122,6 +122,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, unsigned long nbs = 1; unsigned j, mul; const char *tmp; + struct hwloc_synthetic_intlv_loop_s *loops; tmp = attr; while (tmp) { @@ -132,9 +133,10 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, tmp++; } - { /* nr_loops colon-separated fields, but we may need one more at the end */ - HWLOC_VLA(struct hwloc_synthetic_intlv_loop_s, loops, nr_loops+1); + loops = malloc((nr_loops+1) * sizeof(*loops)); + if (!loops) + goto out_with_array; if (*attr >= '0' && *attr <= '9') { /* interleaving as x*y:z*t:... */ @@ -148,11 +150,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (tmp2 == tmp || *tmp2 != '*') { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp); + free(loops); goto out_with_array; } if (!step) { if (verbose) fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp); + free(loops); goto out_with_array; } tmp2++; @@ -160,11 +164,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp); + free(loops); goto out_with_array; } if (!nb) { if (verbose) fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2); + free(loops); goto out_with_array; } loops[cur_loop].step = step; @@ -192,11 +198,13 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (err < 0) { if (verbose) fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { if (verbose) fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } for(i=0; ; i++) { @@ -217,6 +225,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (verbose) fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n", tmp); + free(loops); goto out_with_array; } tmp = strchr(tmp, ':'); @@ -235,6 +244,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, if (loops[i].level_depth == mydepth && i != cur_loop) { if (verbose) fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr); + free(loops); goto out_with_array; } if (loops[i].level_depth < mydepth @@ -264,6 +274,7 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, } else { if (verbose) fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total); + free(loops); goto out_with_array; } } @@ -278,6 +289,8 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, mul *= nb; } + free(loops); + /* check that we have the right values (cannot pass total, cannot give duplicate 0) */ for(j=0; j= total) { @@ -293,7 +306,6 @@ hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, } indexes->array = array; - } } return; @@ -527,7 +539,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, if (*pos < '0' || *pos > '9') { if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { - if (!strncmp(pos, "Die", 3) || !strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + if (!strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + /* possible future types */ type = HWLOC_OBJ_GROUP; } else { /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ @@ -645,6 +658,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; return -1; } + if (type_count[HWLOC_OBJ_DIE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several die levels\n"); + errno = EINVAL; + return -1; + } if (type_count[HWLOC_OBJ_NUMANODE] > 1) { if (verbose) fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n"); @@ -829,6 +848,7 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr, obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; break; case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: break; case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L2CACHE: @@ -953,13 +973,19 @@ hwloc__look_synthetic(struct hwloc_topology *topology, } static int -hwloc_look_synthetic(struct hwloc_backend *backend) +hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend enforces !topology->is_thissystem by default. + */ + struct hwloc_topology *topology = backend->topology; struct hwloc_synthetic_backend_data_s *data = backend->private_data; hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); unsigned i; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + assert(!topology->levels[0][0]->cpuset); hwloc_alloc_root_sets(topology->levels[0][0]); @@ -1001,7 +1027,9 @@ hwloc_synthetic_backend_disable(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, +hwloc_synthetic_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1021,7 +1049,7 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, } } - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) goto out; @@ -1051,8 +1079,8 @@ hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_synthetic_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, "synthetic", + HWLOC_DISC_PHASE_GLOBAL, ~0, hwloc_synthetic_component_instantiate, 30, @@ -1267,6 +1295,12 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag /* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */ res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys); + } else if (obj->type == HWLOC_OBJ_DIE + && (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) { + /* if exporting to v1 or without extended-types, use all-v1-compatible Group name */ + res = hwloc_snprintf(tmp, tmplen, "Group%s", aritys); + } else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */ || flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) { res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys); @@ -1323,16 +1357,26 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign } while (mchild) { - /* v2: export all NUMA children */ - - assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ + /* FIXME: really recurse to export memcaches and numanode, + * but it requires clever parsing of [ memcache [numa] [numa] ] during import, + * better attaching of things to describe the hierarchy. + */ + hwloc_obj_t numanode = mchild; + /* only export the first NUMA node leaf of each memory child + * FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms + */ + while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) { + assert(numanode->arity == 1); + numanode = numanode->memory_first_child; + } + assert(numanode); /* there's always a numanode at the bottom of the memory tree */ if (needprefix) hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '['); - res = hwloc__export_synthetic_obj(topology, flags, mchild, (unsigned)-1, tmp, tmplen); + res = hwloc__export_synthetic_obj(topology, flags, numanode, (unsigned)-1, tmp, tmplen); if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) return -1; @@ -1366,9 +1410,8 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology) assert(node); first_parent = node->parent; - assert(hwloc__obj_type_is_normal(first_parent->type)); /* only depth-1 memory children for now */ - /* check whether all object on parent's level have same number of NUMA children */ + /* check whether all object on parent's level have same number of NUMA bits */ for(i=0; idepth); i++) { hwloc_obj_t parent, mchild; @@ -1379,10 +1422,9 @@ hwloc_check_memory_symmetric(struct hwloc_topology * topology) if (parent->memory_arity != first_parent->memory_arity) goto out_with_bitmap; - /* clear these NUMA children from remaining_nodes */ + /* clear children NUMA bits from remaining_nodes */ mchild = parent->memory_first_child; while (mchild) { - assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */ mchild = mchild->next_sibling; } diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index d03645c0f..22521aa31 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -9,10 +9,10 @@ /* To try to get all declarations duplicated below. */ #define _WIN32_WINNT 0x0601 -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" #include @@ -731,8 +731,14 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse */ static int -hwloc_look_windows(struct hwloc_backend *backend) +hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend uses the underlying OS. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + struct hwloc_topology *topology = backend->topology; hwloc_bitmap_t groups_pu_set = NULL; SYSTEM_INFO SystemInfo; @@ -740,6 +746,8 @@ hwloc_look_windows(struct hwloc_backend *backend) int gotnuma = 0; int gotnumamemory = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -1136,13 +1144,15 @@ static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribu } static struct hwloc_backend * -hwloc_windows_component_instantiate(struct hwloc_disc_component *component, +hwloc_windows_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) return NULL; backend->discover = hwloc_look_windows; @@ -1150,9 +1160,9 @@ hwloc_windows_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_windows_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "windows", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_windows_component_instantiate, 50, 1, @@ -1168,10 +1178,12 @@ const struct hwloc_component hwloc_windows_component = { }; int -hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { +hwloc_fallback_nbprocessors(unsigned flags __hwloc_attribute_unused) { int n; SYSTEM_INFO sysinfo; + /* TODO handle flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE */ + /* by default, ignore groups (return only the number in the current group) */ GetSystemInfo(&sysinfo); n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */ diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 4aefdcf1f..1060157de 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -14,13 +14,12 @@ * on various architectures, without having to use this x86-specific code. */ -#include -#include -#include -#include -#include - -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" +#include "private/cpuid-x86.h" #include #ifdef HAVE_DIRENT_H @@ -70,6 +69,8 @@ cpuiddump_read(const char *dirpath, unsigned idx) { struct cpuiddump *cpuiddump; struct cpuiddump_entry *cur; + size_t filenamelen; + char *filename; FILE *file; char line[128]; unsigned nr; @@ -80,16 +81,16 @@ cpuiddump_read(const char *dirpath, unsigned idx) goto out; } - { - size_t filenamelen = strlen(dirpath) + 15; - HWLOC_VLA(char, filename, filenamelen); + filenamelen = strlen(dirpath) + 15; + filename = malloc(filenamelen); + if (!filename) + goto out_with_dump; snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx); file = fopen(filename, "r"); if (!file) { fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); - goto out_with_dump; + goto out_with_filename; } - } nr = 0; while (fgets(line, sizeof(line), file)) @@ -117,10 +118,13 @@ cpuiddump_read(const char *dirpath, unsigned idx) cpuiddump->nr = nr; fclose(file); + free(filename); return cpuiddump; out_with_file: fclose(file); + out_with_filename: + free(filename); out_with_dump: free(cpuiddump); out: @@ -170,6 +174,11 @@ static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsi * Core detection routines and structures */ +enum hwloc_x86_disc_flags { + HWLOC_X86_DISC_FLAG_FULL = (1<<0), /* discover everything instead of only annotating */ + HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES = (1<<1) /* use AMD topoext numanode information */ +}; + #define has_topoext(features) ((features)[6] & (1 << 22)) #define has_x2apic(features) ((features)[4] & (1 << 21)) @@ -190,12 +199,15 @@ struct cacheinfo { struct procinfo { unsigned present; unsigned apicid; - unsigned packageid; - unsigned dieid; - unsigned nodeid; - unsigned unitid; - unsigned threadid; - unsigned coreid; +#define PKG 0 +#define CORE 1 +#define NODE 2 +#define UNIT 3 +#define TILE 4 +#define MODULE 5 +#define DIE 6 +#define HWLOC_X86_PROCINFO_ID_NR 7 + unsigned ids[HWLOC_X86_PROCINFO_ID_NR]; unsigned *otherids; unsigned levels; unsigned numcaches; @@ -215,7 +227,8 @@ enum cpuid_type { unknown }; -static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid) +/* AMD legacy cache information from specific CPUID 0x80000005-6 leaves */ +static void setup__amd_cache_legacy(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid) { struct cacheinfo *cache, *tmpcaches; unsigned cachenum; @@ -262,7 +275,249 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cac hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); } -static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) +/* AMD legacy cache information from CPUID 0x80000005-6 leaves */ +static void read_amd_caches_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump, unsigned legacy_max_log_proc) +{ + unsigned eax, ebx, ecx, edx; + + eax = 0x80000005; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */ + setup__amd_cache_legacy(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */ + + eax = 0x80000006; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (ecx & 0xf000) + /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11. + * Could be useful if some Intels (at least before Core micro-architecture) + * support this leaf without leaf 0x4. + */ + setup__amd_cache_legacy(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */ + if (edx & 0xf000) + setup__amd_cache_legacy(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */ +} + +/* AMD caches from CPUID 0x8000001d leaf (topoext) */ +static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned eax, ebx, ecx, edx; + unsigned cachenum; + struct cacheinfo *cache; + + /* the code below doesn't want any other cache yet */ + assert(!infos->numcaches); + + for (cachenum = 0; ; cachenum++) { + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if ((eax & 0x1f) == 0) + break; + infos->numcaches++; + } + + cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); + if (cache) { + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = (eax >> 5) & 0x7; + /* Note: actually number of cores */ + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + + cache++; + } + } else { + infos->numcaches = 0; + } +} + +/* Intel cache info from CPUID 0x04 leaf */ +static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned level; + struct cacheinfo *tmpcaches; + unsigned eax, ebx, ecx, edx; + unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ + unsigned cachenum; + struct cacheinfo *cache; + + for (cachenum = 0; ; cachenum++) { + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f); + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + infos->numcaches++; + } + + tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); + if (!tmpcaches) { + infos->numcaches = oldnumcaches; + } else { + infos->cache = tmpcaches; + cache = &infos->cache[oldnumcaches]; + + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = level; + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + cache++; + } + } +} + +/* AMD core/thread info from CPUID 0x80000008 leaf */ +static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_cpuiddump) +{ + unsigned eax, ebx, ecx, edx; + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned coreidsize; + unsigned logprocid; + unsigned threadid __hwloc_attribute_unused; + + eax = 0x80000008; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + coreidsize = (ecx >> 12) & 0xf; + hwloc_debug("core ID size: %u\n", coreidsize); + if (!coreidsize) { + max_nbcores = (ecx & 0xff) + 1; + } else + max_nbcores = 1 << coreidsize; + hwloc_debug("Thus max # of cores: %u\n", max_nbcores); + + /* No multithreaded AMD for this old CPUID leaf */ + max_nbthreads = 1 ; + hwloc_debug("and max # of threads: %u\n", max_nbthreads); + + /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores, + * which is the maximum number of cores that the processor could theoretically support + * (see "Multiple Core Calculation" in the AMD CPUID specification). + * Recompute packageid/coreid accordingly. + */ + infos->ids[PKG] = infos->apicid / max_nbcores; + logprocid = infos->apicid % max_nbcores; + infos->ids[CORE] = logprocid / max_nbthreads; + threadid = logprocid % max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); +} + +/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */ +static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) +{ + unsigned apic_id, nodes_per_proc = 0; + unsigned eax, ebx, ecx, edx; + + eax = 0x8000001e; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->apicid = apic_id = eax; + + if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) { + if (infos->cpufamilynumber == 0x16) { + /* ecx is reserved */ + infos->ids[NODE] = 0; + nodes_per_proc = 1; + } else { + /* AMD other families or Hygon family 18h */ + infos->ids[NODE] = ecx & 0xff; + nodes_per_proc = ((ecx >> 8) & 7) + 1; + } + if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) + || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { + hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); + } + } + + if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ + unsigned cores_per_unit; + /* coreid was obtained from read_amd_cores_legacy() earlier */ + infos->ids[UNIT] = ebx & 0xff; + cores_per_unit = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]); + /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). + * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively). + * It's not clear if we should do this as well. + */ + } else { + unsigned threads_per_core; + infos->ids[CORE] = ebx & 0xff; + threads_per_core = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, infos->ids[NODE], threads_per_core, infos->ids[CORE]); + } +} + +/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */ +static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) { unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ @@ -302,11 +557,19 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid /* apic_number is the actual number of threads per core */ break; case 2: - infos->coreid = id; - /* apic_number is the actual number of threads per module */ + infos->ids[CORE] = id; + /* apic_number is the actual number of threads per die */ + break; + case 3: + infos->ids[MODULE] = id; + /* apic_number is the actual number of threads per tile */ + break; + case 4: + infos->ids[TILE] = id; + /* apic_number is the actual number of threads per die */ break; case 5: - infos->dieid = id; + infos->ids[DIE] = id; /* apic_number is the actual number of threads per package */ break; default: @@ -317,16 +580,16 @@ static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuid apic_shift = apic_nextshift; } infos->apicid = apic_id; - infos->packageid = apic_id >> apic_shift; - hwloc_debug("x2APIC remainder: %u\n", infos->packageid); - hwloc_debug("this is thread %u of core %u\n", threadid, infos->coreid); + infos->ids[PKG] = apic_id >> apic_shift; + hwloc_debug("x2APIC remainder: %u\n", infos->ids[PKG]); + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); } } } /* Fetch information from the processor itself thanks to cpuid and store it in * infos for summarize to analyze them globally */ -static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) +static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) { struct hwloc_x86_backend_data_s *data = backend->private_data; unsigned eax, ebx, ecx = 0, edx; @@ -348,9 +611,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns else legacy_max_log_proc = 1; hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc); - infos->packageid = infos->apicid / legacy_max_log_proc; + infos->ids[PKG] = infos->apicid / legacy_max_log_proc; legacy_log_proc_id = infos->apicid % legacy_max_log_proc; - hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id); + hwloc_debug("phys %u legacy thread %u\n", infos->ids[PKG], legacy_log_proc_id); /* Get cpu model/family/stepping numbers from same cpuid */ _model = (eax>>4) & 0xf; @@ -397,258 +660,88 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns /* infos was calloc'ed, already ends with \0 */ } - /* Get core/thread information from cpuid 0x80000008 - * (not supported on Intel) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) { - unsigned max_nbcores; - unsigned max_nbthreads; - unsigned coreidsize; - unsigned logprocid; - eax = 0x80000008; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - coreidsize = (ecx >> 12) & 0xf; - hwloc_debug("core ID size: %u\n", coreidsize); - if (!coreidsize) { - max_nbcores = (ecx & 0xff) + 1; - } else - max_nbcores = 1 << coreidsize; - hwloc_debug("Thus max # of cores: %u\n", max_nbcores); - /* Still no multithreaded AMD */ - max_nbthreads = 1 ; - hwloc_debug("and max # of threads: %u\n", max_nbthreads); - /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores, - * which is the maximum number of cores that the processor could theoretically support - * (see "Multiple Core Calculation" in the AMD CPUID specification). - * Recompute packageid/threadid/coreid accordingly. - */ - infos->packageid = infos->apicid / max_nbcores; - logprocid = infos->apicid % max_nbcores; - infos->threadid = logprocid % max_nbthreads; - infos->coreid = logprocid / max_nbthreads; - hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); - } - - infos->numcaches = 0; - infos->cache = NULL; - - /* Get apicid, nodeid, unitid from cpuid 0x8000001e - * and cache information from cpuid 0x8000001d - * (AMD topology extension) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { - unsigned apic_id, node_id, nodes_per_proc; - - /* the code below doesn't want any other cache yet */ - assert(!infos->numcaches); - - eax = 0x8000001e; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - infos->apicid = apic_id = eax; - - if (infos->cpufamilynumber == 0x16) { - /* ecx is reserved */ - node_id = 0; - nodes_per_proc = 1; - } else { - /* AMD other families or Hygon family 18h */ - node_id = ecx & 0xff; - nodes_per_proc = ((ecx >> 8) & 7) + 1; - } - infos->nodeid = node_id; - if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) - || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { - hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); - } - - if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ - unsigned unit_id, cores_per_unit; - infos->unitid = unit_id = ebx & 0xff; - cores_per_unit = ((ebx >> 8) & 0xff) + 1; - hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id); - /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). - * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively). - * It's not clear if we should do this as well. - */ - } else { - unsigned core_id, threads_per_core; - infos->coreid = core_id = ebx & 0xff; - threads_per_core = ((ebx >> 8) & 0xff) + 1; - hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id); - } - - for (cachenum = 0; ; cachenum++) { - eax = 0x8000001d; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if ((eax & 0x1f) == 0) - break; - infos->numcaches++; - } - - cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); - if (cache) { - for (cachenum = 0; ; cachenum++) { - unsigned long linesize, linepart, ways, sets; - eax = 0x8000001d; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - - if ((eax & 0x1f) == 0) - break; - switch (eax & 0x1f) { - case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; - case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; - default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; - } - - cache->level = (eax >> 5) & 0x7; - /* Note: actually number of cores */ - cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; - - cache->linesize = linesize = (ebx & 0xfff) + 1; - cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; - ways = ((ebx >> 22) & 0x3ff) + 1; - - if (eax & (1 << 9)) - /* Fully associative */ - cache->ways = -1; - else - cache->ways = ways; - cache->sets = sets = ecx + 1; - cache->size = linesize * linepart * ways * sets; - cache->inclusive = edx & 0x2; - - hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", - cachenum, cache->level, - cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', - cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); - - cache++; - } - } else { - infos->numcaches = 0; - } - } else { - /* If there's no topoext, - * get cache information from cpuid 0x80000005 and 0x80000006 - * (not supported on Intel) - */ - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) { - eax = 0x80000005; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */ - fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */ - } - if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) { - eax = 0x80000006; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if (ecx & 0xf000) - /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11. - * Could be useful if some Intels (at least before Core micro-architecture) - * support this leaf without leaf 0x4. - */ - fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */ - if (edx & 0xf000) - fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */ - } - } - - /* Get thread/core + cache information from cpuid 0x04 - * (not supported on AMD) - */ if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) { - unsigned max_nbcores; - unsigned max_nbthreads; - unsigned level; - struct cacheinfo *tmpcaches; - unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ - - for (cachenum = 0; ; cachenum++) { - eax = 0x04; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - - hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f); - if ((eax & 0x1f) == 0) - break; - level = (eax >> 5) & 0x7; - if (data->is_knl && level == 3) - /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ - break; - infos->numcaches++; - - if (!cachenum) { - /* by the way, get thread/core information from the first cache */ - max_nbcores = ((eax >> 26) & 0x3f) + 1; - max_nbthreads = legacy_max_log_proc / max_nbcores; - hwloc_debug("thus %u threads\n", max_nbthreads); - infos->threadid = legacy_log_proc_id % max_nbthreads; - infos->coreid = legacy_log_proc_id / max_nbthreads; - hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); - } + /* Get core/thread information from first cache reported by cpuid 0x04 + * (not supported on AMD) + */ + eax = 0x04; + ecx = 0; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if ((eax & 0x1f) != 0) { + /* cache looks valid */ + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned threadid __hwloc_attribute_unused; + max_nbcores = ((eax >> 26) & 0x3f) + 1; + max_nbthreads = legacy_max_log_proc / max_nbcores; + hwloc_debug("thus %u threads\n", max_nbthreads); + threadid = legacy_log_proc_id % max_nbthreads; + infos->ids[CORE] = legacy_log_proc_id / max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); } + } - tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); - if (!tmpcaches) { - infos->numcaches = oldnumcaches; - } else { - infos->cache = tmpcaches; - cache = &infos->cache[oldnumcaches]; + /********************************************************************************* + * Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves + */ - for (cachenum = 0; ; cachenum++) { - unsigned long linesize, linepart, ways, sets; - eax = 0x04; - ecx = cachenum; - cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008 && !has_x2apic(features)) { + /* Get core/thread information from cpuid 0x80000008 + * (not supported on Intel) + * We could ignore this codepath when x2apic is supported, but we may need + * nodeids if HWLOC_X86_TOPOEXT_NUMANODES is set. + */ + read_amd_cores_legacy(infos, src_cpuiddump); + } - if ((eax & 0x1f) == 0) - break; - level = (eax >> 5) & 0x7; - if (data->is_knl && level == 3) - /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ - break; - switch (eax & 0x1f) { - case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; - case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; - default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; - } - - cache->level = level; - cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; - - cache->linesize = linesize = (ebx & 0xfff) + 1; - cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; - ways = ((ebx >> 22) & 0x3ff) + 1; - if (eax & (1 << 9)) - /* Fully associative */ - cache->ways = -1; - else - cache->ways = ways; - cache->sets = sets = ecx + 1; - cache->size = linesize * linepart * ways * sets; - cache->inclusive = edx & 0x2; - - hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", - cachenum, cache->level, - cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', - cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); - cache++; - } - } + if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { + /* Get apicid, nodeid, unitid/coreid from cpuid 0x8000001e (AMD topology extension). + * Requires read_amd_cores_legacy() for coreid on family 0x15-16. + * + * Only needed when x2apic supported if NUMA nodes are needed. + */ + read_amd_cores_topoext(infos, flags, src_cpuiddump); } if ((cpuid_type == intel) && highest_cpuid >= 0x1f) { /* Get package/die/module/tile/core/thread information from cpuid 0x1f * (Intel v2 Extended Topology Enumeration) */ - look_exttopoenum(infos, 0x1f, src_cpuiddump); + read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump); - } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) { + } else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin) + && highest_cpuid >= 0x0b && has_x2apic(features)) { /* Get package/core/thread information from cpuid 0x0b * (Intel v1 Extended Topology Enumeration) */ - look_exttopoenum(infos, 0x0b, src_cpuiddump); + read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump); + } + + /************************************** + * Get caches from CPU-specific leaves + */ + + infos->numcaches = 0; + infos->cache = NULL; + + if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { + /* Get cache information from cpuid 0x8000001d (AMD topology extension) */ + read_amd_caches_topoext(infos, src_cpuiddump); + + } else if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) { + /* If there's no topoext, + * get cache information from cpuid 0x80000005 and 0x80000006. + * (not supported on Intel) + * It looks like we cannot have 0x80000005 without 0x80000006. + */ + read_amd_caches_legacy(infos, src_cpuiddump, legacy_max_log_proc); + } + + if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) { + /* Get cache information from cpuid 0x04 + * (not supported on AMD) + */ + read_intel_caches(data, infos, src_cpuiddump); } /* Now that we have all info, compute cacheids and apply quirks */ @@ -736,8 +829,55 @@ hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int replace) hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUStepping", number, replace); } +static void +hwloc_x86_add_groups(hwloc_topology_t topology, + struct procinfo *infos, + unsigned nbprocs, + hwloc_bitmap_t remaining_cpuset, + unsigned type, + const char *subtype, + unsigned kind, + int dont_merge) +{ + hwloc_bitmap_t obj_cpuset; + hwloc_obj_t obj; + unsigned i, j; + + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].ids[PKG]; + unsigned id = infos[i].ids[type]; + + if (id == (unsigned)-1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + obj_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].ids[type] == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].ids[PKG] == packageid && infos[j].ids[type] == id) { + hwloc_bitmap_set(obj_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); + obj->cpuset = obj_cpuset; + obj->subtype = strdup(subtype); + obj->attr->group.kind = kind; + obj->attr->group.dont_merge = dont_merge; + hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n", + subtype, id, obj_cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } +} + /* Analyse information stored in infos, and build/annotate topology levels accordingly */ -static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery) +static void summarize(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags) { struct hwloc_topology *topology = backend->topology; struct hwloc_x86_backend_data_s *data = backend->private_data; @@ -747,6 +887,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int int one = -1; hwloc_bitmap_t remaining_cpuset; int gotnuma = 0; + int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL); for (i = 0; i < nbprocs; i++) if (infos[i].present) { @@ -773,11 +914,11 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { if (fulldiscovery) { - unsigned packageid = infos[i].packageid; + unsigned packageid = infos[i].ids[PKG]; hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].packageid == packageid) { + if (infos[j].ids[PKG] == packageid) { hwloc_bitmap_set(package_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -811,7 +952,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } /* Look for Numa nodes inside packages (cannot be filtered-out) */ - if (fulldiscovery && getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { + if (fulldiscovery && (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES)) { hwloc_bitmap_t node_cpuset; hwloc_obj_t node; @@ -819,8 +960,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned nodeid = infos[i].nodeid; + unsigned packageid = infos[i].ids[PKG]; + unsigned nodeid = infos[i].ids[NODE]; if (nodeid == (unsigned)-1) { hwloc_bitmap_clr(remaining_cpuset, i); @@ -829,12 +970,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int node_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].nodeid == (unsigned) -1) { + if (infos[j].ids[NODE] == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) { + if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid) { hwloc_bitmap_set(node_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -852,77 +993,21 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { if (fulldiscovery) { - char *env; - int dont_merge; - hwloc_bitmap_t unit_cpuset, die_cpuset; - hwloc_obj_t unit, die; - - /* Look for Compute units inside packages */ + /* Look for AMD Compute units inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned unitid = infos[i].unitid; - - if (unitid == (unsigned)-1) { - hwloc_bitmap_clr(remaining_cpuset, i); - continue; - } - - unit_cpuset = hwloc_bitmap_alloc(); - for (j = i; j < nbprocs; j++) { - if (infos[j].unitid == (unsigned) -1) { - hwloc_bitmap_clr(remaining_cpuset, j); - continue; - } - - if (infos[j].packageid == packageid && infos[j].unitid == unitid) { - hwloc_bitmap_set(unit_cpuset, j); - hwloc_bitmap_clr(remaining_cpuset, j); - } - } - unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid); - unit->cpuset = unit_cpuset; - unit->subtype = strdup("ComputeUnit"); - unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT; - hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n", - unitid, unit_cpuset); - hwloc_insert_object_by_cpuset(topology, unit); - } - - /* Look for Dies inside packages */ - env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); - dont_merge = env && atoi(env); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + UNIT, "Compute Unit", + HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); + /* Look for Intel Modules inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned dieid = infos[i].dieid; - - if (dieid == (unsigned)-1) { - hwloc_bitmap_clr(remaining_cpuset, i); - continue; - } - - die_cpuset = hwloc_bitmap_alloc(); - for (j = i; j < nbprocs; j++) { - if (infos[j].dieid == (unsigned) -1) { - hwloc_bitmap_clr(remaining_cpuset, j); - continue; - } - - if (infos[j].packageid == packageid && infos[j].dieid == dieid) { - hwloc_bitmap_set(die_cpuset, j); - hwloc_bitmap_clr(remaining_cpuset, j); - } - } - die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, dieid); - die->cpuset = die_cpuset; - die->subtype = strdup("Die"); - die->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; - die->attr->group.dont_merge = dont_merge; - hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", - dieid, die_cpuset); - hwloc_insert_object_by_cpuset(topology, die); - } + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + MODULE, "Module", + HWLOC_GROUP_KIND_INTEL_MODULE, 0); + /* Look for Intel Tiles inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + TILE, "Tile", + HWLOC_GROUP_KIND_INTEL_TILE, 0); /* Look for unknown objects */ if (infos[one].otherids) { @@ -956,6 +1041,43 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { + /* Look for Intel Dies inside packages */ + if (fulldiscovery) { + hwloc_bitmap_t die_cpuset; + hwloc_obj_t die; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].ids[PKG]; + unsigned dieid = infos[i].ids[DIE]; + + if (dieid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + die_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].ids[DIE] == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].ids[PKG] == packageid && infos[j].ids[DIE] == dieid) { + hwloc_bitmap_set(die_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_DIE, dieid); + die->cpuset = die_cpuset; + hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", + dieid, die_cpuset); + hwloc_insert_object_by_cpuset(topology, die); + } + } + } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { /* Look for cores */ if (fulldiscovery) { @@ -964,9 +1086,9 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { - unsigned packageid = infos[i].packageid; - unsigned nodeid = infos[i].nodeid; - unsigned coreid = infos[i].coreid; + unsigned packageid = infos[i].ids[PKG]; + unsigned nodeid = infos[i].ids[NODE]; + unsigned coreid = infos[i].ids[CORE]; if (coreid == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, i); @@ -975,12 +1097,12 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int core_cpuset = hwloc_bitmap_alloc(); for (j = i; j < nbprocs; j++) { - if (infos[j].coreid == (unsigned) -1) { + if (infos[j].ids[CORE] == (unsigned) -1) { hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) { + if (infos[j].ids[PKG] == packageid && infos[j].ids[NODE] == nodeid && infos[j].ids[CORE] == coreid) { hwloc_bitmap_set(core_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -1056,7 +1178,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } else { /* Add the missing cache */ hwloc_bitmap_t cache_cpuset; - unsigned packageid = infos[i].packageid; + unsigned packageid = infos[i].ids[PKG]; unsigned cacheid = infos[i].cache[l].cacheid; /* Now look for others sharing it */ cache_cpuset = hwloc_bitmap_alloc(); @@ -1071,7 +1193,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_clr(remaining_cpuset, j); continue; } - if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) { + if (infos[j].ids[PKG] == packageid && infos[j].cache[l2].cacheid == cacheid) { hwloc_bitmap_set(cache_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } @@ -1103,7 +1225,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int } static int -look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery, +look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags), int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags)) @@ -1139,7 +1261,7 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov } } - look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + look_proc(backend, &infos[i], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); if (data->src_cpuiddump_path) { cpuiddump_free(src_cpuiddump); @@ -1152,10 +1274,10 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscov hwloc_bitmap_free(orig_cpuset); } - if (!data->apicid_unique) - fulldiscovery = 0; - else - summarize(backend, infos, fulldiscovery); + if (data->apicid_unique) + summarize(backend, infos, flags); + /* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */ + return 0; } @@ -1223,7 +1345,7 @@ static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, } static -int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) +int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) { struct hwloc_x86_backend_data_s *data = backend->private_data; unsigned nbprocs = data->nbprocs; @@ -1245,13 +1367,18 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) int ret = -1; if (data->src_cpuiddump_path) { - /* just read cpuid from the dump */ + /* Just read cpuid from the dump (implies !topology->is_thissystem by default) */ src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0); if (!src_cpuiddump) goto out; } else { - /* otherwise check if binding works */ + /* Using real hardware. + * However we don't enforce topology->is_thissystem so that + * we may still force use this backend when debugging with !thissystem. + */ + + /* check if binding works */ memset(&hooks, 0, sizeof(hooks)); support.membind = &memsupport; hwloc_set_native_binding_hooks(&hooks, &support); @@ -1281,12 +1408,13 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (NULL == infos) goto out; for (i = 0; i < nbprocs; i++) { - infos[i].nodeid = (unsigned) -1; - infos[i].packageid = (unsigned) -1; - infos[i].dieid = (unsigned) -1; - infos[i].unitid = (unsigned) -1; - infos[i].coreid = (unsigned) -1; - infos[i].threadid = (unsigned) -1; + infos[i].ids[PKG] = (unsigned) -1; + infos[i].ids[CORE] = (unsigned) -1; + infos[i].ids[NODE] = (unsigned) -1; + infos[i].ids[UNIT] = (unsigned) -1; + infos[i].ids[TILE] = (unsigned) -1; + infos[i].ids[MODULE] = (unsigned) -1; + infos[i].ids[DIE] = (unsigned) -1; } eax = 0x00; @@ -1334,7 +1462,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) hwloc_x86_os_state_save(&os_state, src_cpuiddump); - ret = look_procs(backend, infos, fulldiscovery, + ret = look_procs(backend, infos, flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, get_cpubind, set_cpubind); if (!ret) @@ -1343,8 +1471,8 @@ int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) if (nbprocs == 1) { /* only one processor, no need to bind */ - look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); - summarize(backend, infos, fulldiscovery); + look_proc(backend, &infos[0], flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + summarize(backend, infos, flags); ret = 0; } @@ -1367,13 +1495,20 @@ out: } static int -hwloc_x86_discover(struct hwloc_backend *backend) +hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_topology *topology = backend->topology; + unsigned long flags = 0; int alreadypus = 0; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + + if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { + flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES; + } + #if HAVE_DECL_RUNNING_ON_VALGRIND if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) { fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n" @@ -1387,7 +1522,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) assert(data->nbprocs > 0); /* enforced by hwloc_x86_component_instantiate() */ topology->support.discovery->pu = 1; } else { - int nbprocs = hwloc_fallback_nbprocessors(topology); + int nbprocs = hwloc_fallback_nbprocessors(HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE); if (nbprocs >= 1) topology->support.discovery->pu = 1; else @@ -1405,7 +1540,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) /* several object types were added, we can't easily complete, just do partial discovery */ hwloc_topology_reconnect(topology, 0); - ret = hwloc_look_x86(backend, 0); + ret = hwloc_look_x86(backend, flags); if (ret) hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); return 0; @@ -1415,7 +1550,7 @@ hwloc_x86_discover(struct hwloc_backend *backend) } fulldiscovery: - if (hwloc_look_x86(backend, 1) < 0) { + if (hwloc_look_x86(backend, flags | HWLOC_X86_DISC_FLAG_FULL) < 0) { /* if failed, create PUs */ if (!alreadypus) hwloc_setup_pu_level(topology, data->nbprocs); @@ -1446,6 +1581,7 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s #if !(defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined __CYGWIN__) /* needs a lot of work */ struct dirent *dirent; DIR *dir; + char *path; FILE *file; char line [32]; @@ -1453,23 +1589,26 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s if (!dir) return -1; - char path[strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1]; + path = malloc(strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1); + if (!path) + goto out_with_dir; sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path); file = fopen(path, "r"); if (!file) { fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path); - goto out_with_dir; + goto out_with_path; } if (!fgets(line, sizeof(line), file)) { fprintf(stderr, "Found read dumped cpuid summary in %s\n", path); fclose(file); - goto out_with_dir; + goto out_with_path; } fclose(file); if (strcmp(line, "Architecture: x86\n")) { fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line); - goto out_with_dir; + goto out_with_path; } + free(path); while ((dirent = readdir(dir)) != NULL) { if (!strncmp(dirent->d_name, "pu", 2)) { @@ -1497,7 +1636,9 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s return 0; -out_with_dir: + out_with_path: + free(path); + out_with_dir: closedir(dir); #endif /* HWLOC_WIN_SYS & !__MINGW32__ needs a lot of work */ return -1; @@ -1513,7 +1654,9 @@ hwloc_x86_backend_disable(struct hwloc_backend *backend) } static struct hwloc_backend * -hwloc_x86_component_instantiate(struct hwloc_disc_component *component, +hwloc_x86_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1522,7 +1665,7 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, struct hwloc_x86_backend_data_s *data; const char *src_cpuiddump_path; - backend = hwloc_backend_alloc(component); + backend = hwloc_backend_alloc(topology, component); if (!backend) goto out; @@ -1565,9 +1708,9 @@ hwloc_x86_component_instantiate(struct hwloc_disc_component *component, } static struct hwloc_disc_component hwloc_x86_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_x86_component_instantiate, 45, /* between native and no_os */ 1, diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index 5a0d02da4..d0e9ec164 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -1,18 +1,18 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/plugins.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/xml.h" +#include "private/debug.h" #include #include @@ -27,9 +27,8 @@ *******************/ struct hwloc__nolibxml_backend_data_s { - size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */ + size_t buflen; /* size of both buffer, set during backend_init() */ char *buffer; /* allocated and filled during backend_init() */ - char *copy; /* allocated during backend_init(), used later during actual parsing */ }; typedef struct hwloc__nolibxml_import_state_data_s { @@ -260,14 +259,11 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata, struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; unsigned major, minor; char *end; - char *buffer; + char *buffer = nbdata->buffer; + char *tagname; HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data)); - /* use a copy in the temporary buffer, we may modify during parsing */ - buffer = nbdata->copy; - memcpy(buffer, nbdata->buffer, nbdata->buflen); - /* skip headers */ while (!strncmp(buffer, "version_major = major; bdata->version_minor = minor; end = strchr(buffer, '>') + 1; + tagname = "topology"; } else if (!strncmp(buffer, "", 10)) { bdata->version_major = 1; bdata->version_minor = 0; end = buffer + 10; + tagname = "topology"; } else if (!strncmp(buffer, "", 6)) { bdata->version_major = 0; bdata->version_minor = 9; end = buffer + 6; + tagname = "root"; } else goto failed; @@ -301,7 +300,7 @@ hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata, state->parent = NULL; nstate->closed = 0; nstate->tagbuffer = end; - nstate->tagname = (char *) "topology"; + nstate->tagname = tagname; nstate->attrbuffer = NULL; return 0; /* success */ @@ -320,10 +319,6 @@ hwloc_nolibxml_free_buffers(struct hwloc_xml_backend_data_s *bdata) free(nbdata->buffer); nbdata->buffer = NULL; } - if (nbdata->copy) { - free(nbdata->copy); - nbdata->copy = NULL; - } } static void @@ -429,19 +424,11 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata, goto out_with_nbdata; } - /* allocate a temporary copy buffer that we may modify during parsing */ - nbdata->copy = malloc(nbdata->buflen+1); - if (!nbdata->copy) - goto out_with_buffer; - nbdata->copy[nbdata->buflen] = '\0'; - bdata->look_init = hwloc_nolibxml_look_init; bdata->look_done = hwloc_nolibxml_look_done; bdata->backend_exit = hwloc_nolibxml_backend_exit; return 0; -out_with_buffer: - free(nbdata->buffer); out_with_nbdata: free(nbdata); out: @@ -666,7 +653,7 @@ hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *n } static void -hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length) +hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length __hwloc_attribute_unused) { hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; int res; @@ -678,7 +665,7 @@ hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char * } ndata->has_content = 1; - res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length); + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%s", buffer); hwloc__nolibxml_export_update_buffer(ndata, res); } @@ -799,6 +786,7 @@ hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *ref state.new_prop = hwloc__nolibxml_export_new_prop; state.add_content = hwloc__nolibxml_export_add_content; state.end_object = hwloc__nolibxml_export_end_object; + state.global = NULL; ndata->indent = 0; ndata->written = 0; diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index e7c5ef621..f6bb210c9 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -6,12 +6,12 @@ * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/xml.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" #include @@ -158,7 +158,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_size")) { unsigned long long lvalue = strtoull(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.size = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n", @@ -167,7 +167,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_linesize")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.linesize = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n", @@ -176,7 +176,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_associativity")) { int lvalue = atoi(value); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) obj->attr->cache.associativity = lvalue; else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n", @@ -185,7 +185,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "cache_type")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) { if (lvalue == HWLOC_OBJ_CACHE_UNIFIED || lvalue == HWLOC_OBJ_CACHE_DATA || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) @@ -211,7 +211,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, else if (!strcmp(name, "depth")) { unsigned long lvalue = strtoul(value, NULL, 10); - if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD || obj->type == HWLOC_OBJ_MEMCACHE) { obj->attr->cache.depth = lvalue; } else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) { /* will be overwritten by the core */ @@ -805,21 +805,13 @@ hwloc__xml_import_object(hwloc_topology_t topology, state->global->msgprefix); goto error_with_object; } - } else if (!strcasecmp(attrvalue, "Die")) { - /* deal with possible future type */ - obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Die"); - obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; - obj->attr->group.dont_merge = data->dont_merge_die_groups; } else if (!strcasecmp(attrvalue, "Tile")) { /* deal with possible future type */ obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Tile"); obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE; } else if (!strcasecmp(attrvalue, "Module")) { /* deal with possible future type */ obj->type = HWLOC_OBJ_GROUP; - obj->subtype = strdup("Module"); obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; } else if (!strcasecmp(attrvalue, "MemCache")) { /* ignore possible future type */ @@ -1053,6 +1045,13 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* end of 1.x specific checks */ } + /* 2.0 backward compatibility */ + if (obj->type == HWLOC_OBJ_GROUP) { + if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE + || (obj->subtype && !strcmp(obj->subtype, "Die"))) + obj->type = HWLOC_OBJ_DIE; + } + /* check that cache attributes are coherent with the actual type */ if (hwloc__obj_type_is_cache(obj->type) && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) { @@ -1212,19 +1211,24 @@ hwloc__xml_import_object(hwloc_topology_t topology, static int hwloc__xml_v2import_distances(hwloc_topology_t topology, - hwloc__xml_import_state_t state) + hwloc__xml_import_state_t state, + int heterotypes) { - hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t unique_type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t *different_types = NULL; unsigned nbobjs = 0; - int indexing = 0; + int indexing = heterotypes; int os_indexing = 0; - int gp_indexing = 0; + int gp_indexing = heterotypes; + char *name = NULL; unsigned long kind = 0; unsigned nr_indexes, nr_u64values; uint64_t *indexes; uint64_t *u64values; int ret; +#define _TAG_NAME (heterotypes ? "distances2hetero" : "distances2") + /* process attributes */ while (1) { char *attrname, *attrvalue; @@ -1233,8 +1237,12 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (!strcmp(attrname, "nbobjs")) nbobjs = strtoul(attrvalue, NULL, 10); else if (!strcmp(attrname, "type")) { - if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0) + if (hwloc_type_sscanf(attrvalue, &unique_type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: unrecognized %s type %s\n", + state->global->msgprefix, _TAG_NAME, attrvalue); goto out; + } } else if (!strcmp(attrname, "indexing")) { indexing = 1; @@ -1246,27 +1254,32 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, else if (!strcmp(attrname, "kind")) { kind = strtoul(attrvalue, NULL, 10); } + else if (!strcmp(attrname, "name")) { + name = attrvalue; + } else { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring unknown distance attribute %s\n", - state->global->msgprefix, attrname); + fprintf(stderr, "%s: ignoring unknown %s attribute %s\n", + state->global->msgprefix, _TAG_NAME, attrname); } } /* abort if missing attribute */ - if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) { + if (!nbobjs || (!heterotypes && unique_type == HWLOC_OBJ_TYPE_NONE) || !indexing || !kind) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 missing some attributes\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s missing some attributes\n", + state->global->msgprefix, _TAG_NAME); goto out; } indexes = malloc(nbobjs*sizeof(*indexes)); u64values = malloc(nbobjs*nbobjs*sizeof(*u64values)); - if (!indexes || !u64values) { + if (heterotypes) + different_types = malloc(nbobjs*sizeof(*different_types)); + if (!indexes || !u64values || (heterotypes && !different_types)) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: failed to allocate %s arrays for %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1290,16 +1303,16 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, is_u64values = 1; if (!is_index && !is_u64values) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with unrecognized child %s\n", - state->global->msgprefix, tag); + fprintf(stderr, "%s: %s with unrecognized child %s\n", + state->global->msgprefix, _TAG_NAME, tag); goto out_with_arrays; } if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0 || strcmp(attrname, "length")) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child must have length attribute\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s child must have length attribute\n", + state->global->msgprefix, _TAG_NAME); goto out_with_arrays; } length = atoi(attrvalue); @@ -1307,24 +1320,43 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->get_content(&childstate, &buffer, length); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child needs content of length %d\n", - state->global->msgprefix, length); + fprintf(stderr, "%s: %s child needs content of length %d\n", + state->global->msgprefix, _TAG_NAME, length); goto out_with_arrays; } if (is_index) { /* get indexes */ - char *tmp; + char *tmp, *tmp2; if (nr_indexes >= nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } tmp = buffer; while (1) { char *next; - unsigned long long u = strtoull(tmp, &next, 0); + unsigned long long u; + if (heterotypes) { + hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE; + if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp2 = strchr(tmp, ':'); + if (!tmp2) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with missing colon after heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp = tmp2+1; + different_types[nr_indexes] = t; + } + u = strtoull(tmp, &next, 0); if (next == tmp) break; indexes[nr_indexes++] = u; @@ -1340,8 +1372,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, char *tmp; if (nr_u64values >= nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with more than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } tmp = buffer; @@ -1364,8 +1396,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->close_tag(&childstate); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1374,56 +1406,60 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (nr_indexes != nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with less than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } if (nr_u64values != nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with less than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } if (nbobjs < 2) { /* distances with a single object are useless, even if the XML isn't invalid */ if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring distances2 with only %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: ignoring %s with only %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_ignore; } - if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) { + if (unique_type == HWLOC_OBJ_PU || unique_type == HWLOC_OBJ_NUMANODE) { if (!os_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring PU or NUMA %s without os_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } else { if (!gp_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring !PU or !NUMA %s without gp_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } - hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0); + hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0); /* prevent freeing below */ indexes = NULL; u64values = NULL; + different_types = NULL; out_ignore: + free(different_types); free(indexes); free(u64values); return state->global->close_tag(state); out_with_arrays: + free(different_types); free(indexes); free(u64values); out: return -1; +#undef _TAG_NAME } static int @@ -1625,8 +1661,12 @@ hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, flo /* this canNOT be the first XML call */ static int -hwloc_look_xml(struct hwloc_backend *backend) +hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { + /* + * This backend enforces !topology->is_thissystem by default. + */ + struct hwloc_topology *topology = backend->topology; struct hwloc_xml_backend_data_s *data = backend->private_data; struct hwloc__xml_import_state_s state, childstate; @@ -1634,9 +1674,10 @@ hwloc_look_xml(struct hwloc_backend *backend) char *tag; int gotignored = 0; hwloc_localeswitch_declare; - char *env; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + state.global = data; assert(!root->cpuset); @@ -1647,9 +1688,6 @@ hwloc_look_xml(struct hwloc_backend *backend) data->first_numanode = data->last_numanode = NULL; data->first_v1dist = data->last_v1dist = NULL; - env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); - data->dont_merge_die_groups = env && atoi(env); - ret = data->look_init(data, &state); if (ret < 0) goto failed; @@ -1684,15 +1722,20 @@ hwloc_look_xml(struct hwloc_backend *backend) goto failed; if (!ret) break; - if (strcmp(tag, "distances2")) { + if (!strcmp(tag, "distances2")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 0); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "distances2hetero")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 1); + if (ret < 0) + goto failed; + } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n", data->msgprefix, tag); goto done; } - ret = hwloc__xml_v2import_distances(topology, &childstate); - if (ret < 0) - goto failed; state.global->close_child(&childstate); } } @@ -1742,8 +1785,8 @@ done: inext_cousin) objs[i] = node; -hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); - hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0); + hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); + hwloc_internal_distances_add(topology, NULL, nbobjs, objs, values, v1dist->kind, 0); } else { free(objs); free(values); @@ -1791,9 +1834,11 @@ hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ topology->support.discovery->pu = 1; + topology->support.discovery->disallowed_pu = 1; if (data->nbnumanodes) { topology->support.discovery->numa = 1; topology->support.discovery->numa_memory = 1; // FIXME + topology->support.discovery->disallowed_numa = 1; } if (data->look_done) @@ -1936,6 +1981,9 @@ hwloc__xml_export_safestrdup(const char *old) char *new = malloc(strlen(old)+1); char *dst = new; const char *src = old; + if (!new) + return NULL; + while (*src) { if (HWLOC_XML_CHAR_VALID(*src)) *(dst++) = *src; @@ -1955,6 +2003,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (v1export && obj->type == HWLOC_OBJ_PACKAGE) state->new_prop(state, "type", "Socket"); + else if (v1export && obj->type == HWLOC_OBJ_DIE) + state->new_prop(state, "type", "Group"); else if (v1export && hwloc__obj_type_is_cache(obj->type)) state->new_prop(state, "type", "Cache"); else @@ -1966,8 +2016,23 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo } if (obj->cpuset) { - if (v1export && obj->type == HWLOC_OBJ_NUMANODE && obj->sibling_rank > 0) { - /* v1 non-first NUMA nodes have empty cpusets */ + int empty_cpusets = 0; + + if (v1export && obj->type == HWLOC_OBJ_NUMANODE) { + /* walk up this memory hierarchy to find-out if we are the first numa node. + * v1 non-first NUMA nodes have empty cpusets. + */ + hwloc_obj_t parent = obj; + while (!hwloc_obj_type_is_normal(parent->type)) { + if (parent->sibling_rank > 0) { + empty_cpusets = 1; + break; + } + parent = parent->parent; + } + } + + if (empty_cpusets) { state->new_prop(state, "cpuset", "0x0"); state->new_prop(state, "online_cpuset", "0x0"); state->new_prop(state, "complete_cpuset", "0x0"); @@ -2024,13 +2089,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (obj->name) { char *name = hwloc__xml_export_safestrdup(obj->name); - state->new_prop(state, "name", name); - free(name); + if (name) { + state->new_prop(state, "name", name); + free(name); + } } if (!v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); - state->new_prop(state, "subtype", subtype); - free(subtype); + if (subtype) { + state->new_prop(state, "subtype", subtype); + free(subtype); + } } switch (obj->type) { @@ -2057,6 +2126,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: + case HWLOC_OBJ_MEMCACHE: sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size); state->new_prop(state, "cache_size", tmp); sprintf(tmp, "%u", obj->attr->cache.depth); @@ -2125,23 +2195,34 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo for(i=0; iinfos_count; i++) { char *name = hwloc__xml_export_safestrdup(obj->infos[i].name); char *value = hwloc__xml_export_safestrdup(obj->infos[i].value); - struct hwloc__xml_export_state_s childstate; - state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", name); - childstate.new_prop(&childstate, "value", value); - childstate.end_object(&childstate, "info"); + if (name && value) { + struct hwloc__xml_export_state_s childstate; + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", name); + childstate.new_prop(&childstate, "value", value); + childstate.end_object(&childstate, "info"); + } free(name); free(value); } if (v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); + if (subtype) { + struct hwloc__xml_export_state_s childstate; + int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); + childstate.new_prop(&childstate, "value", subtype); + childstate.end_object(&childstate, "info"); + free(subtype); + } + } + if (v1export && obj->type == HWLOC_OBJ_DIE) { struct hwloc__xml_export_state_s childstate; - int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); - childstate.new_prop(&childstate, "value", subtype); + childstate.new_prop(&childstate, "name", "Type"); + childstate.new_prop(&childstate, "value", "Die"); childstate.end_object(&childstate, "info"); - free(subtype); } if (v1export && !obj->parent) { @@ -2152,19 +2233,27 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo for(dist = topology->first_dist; dist; dist = dist->next) { struct hwloc__xml_export_state_s childstate; unsigned nbobjs = dist->nbobjs; + unsigned *logical_to_v2array; int depth; - if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type)) + if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->unique_type)) continue; if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) continue; - { - HWLOC_VLA(unsigned, logical_to_v2array, nbobjs); + if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) + continue; + + logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); + if (!logical_to_v2array) { + fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n"); + continue; + } + for(i=0; iobjs[i]->logical_index] = i; /* compute the relative depth */ - if (dist->type == HWLOC_OBJ_NUMANODE) { + if (dist->unique_type == HWLOC_OBJ_NUMANODE) { /* for NUMA nodes, use the highest normal-parent depth + 1 */ depth = -1; for(i=0; itype) + parent_with_memory; + depth = hwloc_get_type_depth(topology, dist->unique_type) + parent_with_memory; } state->new_child(state, &childstate, "distances"); @@ -2210,7 +2299,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo } } childstate.end_object(&childstate, "distances"); - } + free(logical_to_v2array); } } @@ -2243,13 +2332,90 @@ hwloc__xml_v2export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog static void hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags); +static hwloc_obj_t +hwloc__xml_v1export_object_next_numanode(hwloc_obj_t obj, hwloc_obj_t cur) +{ + hwloc_obj_t parent; + + if (!cur) { + /* first numa node is on the very bottom left */ + cur = obj->memory_first_child; + goto find_first; + } + + /* walk-up until there's a next sibling */ + parent = cur; + while (1) { + if (parent->next_sibling) { + /* found a next sibling, we'll walk down-left from there */ + cur = parent->next_sibling; + break; + } + parent = parent->parent; + if (parent == obj) + return NULL; + } + + find_first: + while (cur->type != HWLOC_OBJ_NUMANODE) + cur = cur->memory_first_child; + assert(cur); + return cur; +} + +static unsigned +hwloc__xml_v1export_object_list_numanodes(hwloc_obj_t obj, hwloc_obj_t *first_p, hwloc_obj_t **nodes_p) +{ + hwloc_obj_t *nodes, cur; + int nr; + + if (!obj->memory_first_child) { + *first_p = NULL; + *nodes_p = NULL; + return 0; + } + /* we're sure there's at least one numa node */ + + nr = hwloc_bitmap_weight(obj->nodeset); + assert(nr > 0); + /* these are local nodes, but some of them may be attached above instead of here */ + + nodes = calloc(nr, sizeof(*nodes)); + if (!nodes) { + /* only return the first node */ + cur = hwloc__xml_v1export_object_next_numanode(obj, NULL); + assert(cur); + *first_p = cur; + *nodes_p = NULL; + return 1; + } + + nr = 0; + cur = NULL; + while (1) { + cur = hwloc__xml_v1export_object_next_numanode(obj, cur); + if (!cur) + break; + nodes[nr++] = cur; + } + + *first_p = nodes[0]; + *nodes_p = nodes; + return nr; +} + static void hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) { struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate; hwloc_obj_t child; + unsigned nr_numanodes; + hwloc_obj_t *numanodes, first_numanode; + unsigned i; - if (obj->parent->arity > 1 && obj->memory_arity > 1 && parentstate->global->v1_memory_group) { + nr_numanodes = hwloc__xml_v1export_object_list_numanodes(obj, &first_numanode, &numanodes); + + if (obj->parent->arity > 1 && nr_numanodes > 1 && parentstate->global->v1_memory_group) { /* child has sibling, we must add a Group around those memory children */ hwloc_obj_t group = parentstate->global->v1_memory_group; parentstate->new_child(parentstate, &gstate, "object"); @@ -2266,10 +2432,8 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw } /* export first memory child */ - child = obj->memory_first_child; - assert(child->type == HWLOC_OBJ_NUMANODE); state->new_child(state, &mstate, "object"); - hwloc__xml_export_object_contents (&mstate, topology, child, flags); + hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags); /* then the actual object */ mstate.new_child(&mstate, &ostate, "object"); @@ -2288,9 +2452,10 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw mstate.end_object(&mstate, "object"); /* now other memory children */ - for_each_memory_child(child, obj) - if (child->sibling_rank > 0) - hwloc__xml_v1export_object (state, topology, child, flags); + for(i=1; inew_child(state, &_childstate, tagname); \ + for(_j=0; \ + _i+_j<(nr) && _jtype), (unsigned long long) (objs)[_i+_j]->gp_index); \ + _i += _j; \ + sprintf(_tmp2, "%lu", (unsigned long) _len); \ + _childstate.new_prop(&_childstate, "length", _tmp2); \ + _childstate.add_content(&_childstate, _tmp, _len); \ + _childstate.end_object(&_childstate, tagname); \ + } \ +} while (0) + +static void +hwloc___xml_v2export_distances(hwloc__xml_export_state_t parentstate, struct hwloc_internal_distances_s *dist) +{ + char tmp[255]; + unsigned nbobjs = dist->nbobjs; + struct hwloc__xml_export_state_s state; + + if (dist->different_types) { + parentstate->new_child(parentstate, &state, "distances2hetero"); + } else { + parentstate->new_child(parentstate, &state, "distances2"); + state.new_prop(&state, "type", hwloc_obj_type_string(dist->unique_type)); + } + + sprintf(tmp, "%u", nbobjs); + state.new_prop(&state, "nbobjs", tmp); + sprintf(tmp, "%lu", dist->kind); + state.new_prop(&state, "kind", tmp); + if (dist->name) + state.new_prop(&state, "name", dist->name); + + if (!dist->different_types) { + state.new_prop(&state, "indexing", + HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type) ? "os" : "gp"); + } + + /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ + if (dist->different_types) { + EXPORT_TYPE_GPINDEX_ARRAY(&state, nbobjs, dist->objs, "indexes", 10); + } else { + EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); + } + EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); + state.end_object(&state, dist->different_types ? "distances2hetero" : "distances2"); +} + static void hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) { struct hwloc_internal_distances_s *dist; - for(dist = topology->first_dist; dist; dist = dist->next) { - char tmp[255]; - unsigned nbobjs = dist->nbobjs; - struct hwloc__xml_export_state_s state; - - parentstate->new_child(parentstate, &state, "distances2"); - - state.new_prop(&state, "type", hwloc_obj_type_string(dist->type)); - sprintf(tmp, "%u", nbobjs); - state.new_prop(&state, "nbobjs", tmp); - sprintf(tmp, "%lu", dist->kind); - state.new_prop(&state, "kind", tmp); - - state.new_prop(&state, "indexing", - (dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp"); - /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ - EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); - EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); - state.end_object(&state, "distances2"); - } + for(dist = topology->first_dist; dist; dist = dist->next) + if (!dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); + /* export homogeneous distances first in case the importer doesn't support heterogeneous and stops there */ + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); } void @@ -2378,18 +2587,22 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top hwloc_obj_t root = hwloc_get_root_obj(topology); if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { - if (root->memory_first_child) { + hwloc_obj_t *numanodes, first_numanode; + unsigned nr_numanodes; + + nr_numanodes = hwloc__xml_v1export_object_list_numanodes(root, &first_numanode, &numanodes); + + if (nr_numanodes) { /* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */ struct hwloc__xml_export_state_s rstate, mstate; hwloc_obj_t child; + unsigned i; /* export the root */ state->new_child(state, &rstate, "object"); hwloc__xml_export_object_contents (&rstate, topology, root, flags); /* export first memory child */ - child = root->memory_first_child; - assert(child->type == HWLOC_OBJ_NUMANODE); rstate.new_child(&rstate, &mstate, "object"); - hwloc__xml_export_object_contents (&mstate, topology, child, flags); + hwloc__xml_export_object_contents (&mstate, topology, first_numanode, flags); /* then its normal/io/misc children */ for_each_child(child, root) hwloc__xml_v1export_object (&mstate, topology, child, flags); @@ -2400,15 +2613,16 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top /* close first memory child */ mstate.end_object(&mstate, "object"); /* now other memory children */ - for_each_memory_child(child, root) - if (child->sibling_rank > 0) - hwloc__xml_v1export_object (&rstate, topology, child, flags); + for(i=1; i +#include "private/autogen/config.h" #define _ATFILE_SOURCE #include @@ -25,10 +25,10 @@ #include #include -#include -#include -#include -#include +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" +#include "private/misc.h" #ifdef HAVE_MACH_MACH_INIT_H #include @@ -136,14 +136,28 @@ int hwloc_get_sysctl(int name[], unsigned namelen, int *ret) } #endif -/* Return the OS-provided number of processors. Unlike other methods such as - reading sysfs on Linux, this method is not virtualizable; thus it's only - used as a fall-back method, allowing virtual backends (FSROOT, etc) to - have the desired effect. */ +/* Return the OS-provided number of processors. + * Assumes topology->is_thissystem is true. + */ #ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */ int -hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { +hwloc_fallback_nbprocessors(unsigned flags) { int n; + + if (flags & HWLOC_FALLBACK_NBPROCESSORS_INCLUDE_OFFLINE) { + /* try to get all CPUs for Linux and Solaris that can handle offline CPUs */ +#if HAVE_DECL__SC_NPROCESSORS_CONF + n = sysconf(_SC_NPROCESSORS_CONF); +#elif HAVE_DECL__SC_NPROC_CONF + n = sysconf(_SC_NPROC_CONF); +#else + n = -1; +#endif + if (n != -1) + return n; + } + + /* try getting only online CPUs, or whatever we can get */ #if HAVE_DECL__SC_NPROCESSORS_ONLN n = sysconf(_SC_NPROCESSORS_ONLN); #elif HAVE_DECL__SC_NPROC_ONLN @@ -762,9 +776,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology, /* place us for real */ assert(newobj->logical_index < level_width); level[newobj->logical_index] = newobj; - /* link to already-inserted cousins - * (hwloc_pci_belowroot_apply_locality() can cause out-of-order logical indexes) - */ + /* link to already-inserted cousins */ if (newobj->logical_index > 0 && level[newobj->logical_index-1]) { newobj->prev_cousin = level[newobj->logical_index-1]; level[newobj->logical_index-1]->next_cousin = newobj; @@ -991,31 +1003,35 @@ hwloc_topology_dup(hwloc_topology_t *newp, /***** Make sure you update obj_type_priority[] below as well. *****/ static const unsigned obj_type_order[] = { /* first entry is HWLOC_OBJ_MACHINE */ 0, - /* next entry is HWLOC_OBJ_PACKAGE */ 3, - /* next entry is HWLOC_OBJ_CORE */ 12, - /* next entry is HWLOC_OBJ_PU */ 16, - /* next entry is HWLOC_OBJ_L1CACHE */ 10, - /* next entry is HWLOC_OBJ_L2CACHE */ 8, - /* next entry is HWLOC_OBJ_L3CACHE */ 6, - /* next entry is HWLOC_OBJ_L4CACHE */ 5, - /* next entry is HWLOC_OBJ_L5CACHE */ 4, - /* next entry is HWLOC_OBJ_L1ICACHE */ 11, - /* next entry is HWLOC_OBJ_L2ICACHE */ 9, - /* next entry is HWLOC_OBJ_L3ICACHE */ 7, + /* next entry is HWLOC_OBJ_PACKAGE */ 4, + /* next entry is HWLOC_OBJ_CORE */ 14, + /* next entry is HWLOC_OBJ_PU */ 18, + /* next entry is HWLOC_OBJ_L1CACHE */ 12, + /* next entry is HWLOC_OBJ_L2CACHE */ 10, + /* next entry is HWLOC_OBJ_L3CACHE */ 8, + /* next entry is HWLOC_OBJ_L4CACHE */ 7, + /* next entry is HWLOC_OBJ_L5CACHE */ 6, + /* next entry is HWLOC_OBJ_L1ICACHE */ 13, + /* next entry is HWLOC_OBJ_L2ICACHE */ 11, + /* next entry is HWLOC_OBJ_L3ICACHE */ 9, /* next entry is HWLOC_OBJ_GROUP */ 1, - /* next entry is HWLOC_OBJ_NUMANODE */ 2, - /* next entry is HWLOC_OBJ_BRIDGE */ 13, - /* next entry is HWLOC_OBJ_PCI_DEVICE */ 14, - /* next entry is HWLOC_OBJ_OS_DEVICE */ 15, - /* next entry is HWLOC_OBJ_MISC */ 17 + /* next entry is HWLOC_OBJ_NUMANODE */ 3, + /* next entry is HWLOC_OBJ_BRIDGE */ 15, + /* next entry is HWLOC_OBJ_PCI_DEVICE */ 16, + /* next entry is HWLOC_OBJ_OS_DEVICE */ 17, + /* next entry is HWLOC_OBJ_MISC */ 19, + /* next entry is HWLOC_OBJ_MEMCACHE */ 2, + /* next entry is HWLOC_OBJ_DIE */ 5 }; #ifndef NDEBUG /* only used in debug check assert if !NDEBUG */ static const hwloc_obj_type_t obj_order_type[] = { HWLOC_OBJ_MACHINE, HWLOC_OBJ_GROUP, + HWLOC_OBJ_MEMCACHE, HWLOC_OBJ_NUMANODE, HWLOC_OBJ_PACKAGE, + HWLOC_OBJ_DIE, HWLOC_OBJ_L5CACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L3CACHE, @@ -1040,6 +1056,7 @@ static const hwloc_obj_type_t obj_order_type[] = { * Always keep Machine/NUMANode/PU/PCIDev/OSDev * then Core * then Package + * then Die * then Cache, * then Instruction Caches * then always drop Group/Misc/Bridge. @@ -1065,7 +1082,9 @@ static const int obj_type_priority[] = { /* next entry is HWLOC_OBJ_BRIDGE */ 0, /* next entry is HWLOC_OBJ_PCI_DEVICE */ 100, /* next entry is HWLOC_OBJ_OS_DEVICE */ 100, - /* next entry is HWLOC_OBJ_MISC */ 0 + /* next entry is HWLOC_OBJ_MISC */ 0, + /* next entry is HWLOC_OBJ_MEMCACHE */ 19, + /* next entry is HWLOC_OBJ_DIE */ 30 }; int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) @@ -1118,12 +1137,10 @@ hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2) /* * How to compare objects based on cpusets. */ - static int hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) { hwloc_bitmap_t set1, set2; - int res = HWLOC_OBJ_DIFFERENT; assert(!hwloc__obj_type_is_special(obj1->type)); assert(!hwloc__obj_type_is_special(obj2->type)); @@ -1136,45 +1153,10 @@ hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) set1 = obj1->cpuset; set2 = obj2->cpuset; } - if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { - res = hwloc_bitmap_compare_inclusion(set1, set2); - if (res == HWLOC_OBJ_INTERSECTS) - return HWLOC_OBJ_INTERSECTS; - } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) + return hwloc_bitmap_compare_inclusion(set1, set2); - /* then compare nodesets, and combine the results */ - if (obj1->complete_nodeset && obj2->complete_nodeset) { - set1 = obj1->complete_nodeset; - set2 = obj2->complete_nodeset; - } else { - set1 = obj1->nodeset; - set2 = obj2->nodeset; - } - if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { - int noderes = hwloc_bitmap_compare_inclusion(set1, set2); - /* deal with conflicting cpusets/nodesets inclusions */ - if (noderes == HWLOC_OBJ_INCLUDED) { - if (res == HWLOC_OBJ_CONTAINS) - /* contradicting order for cpusets and nodesets */ - return HWLOC_OBJ_INTERSECTS; - res = HWLOC_OBJ_INCLUDED; - - } else if (noderes == HWLOC_OBJ_CONTAINS) { - if (res == HWLOC_OBJ_INCLUDED) - /* contradicting order for cpusets and nodesets */ - return HWLOC_OBJ_INTERSECTS; - res = HWLOC_OBJ_CONTAINS; - - } else if (noderes == HWLOC_OBJ_INTERSECTS) { - return HWLOC_OBJ_INTERSECTS; - - } else { - /* nodesets are different, keep the cpuset order */ - - } - } - - return res; + return HWLOC_OBJ_DIFFERENT; } /* Compare object cpusets based on complete_cpuset if defined (always correctly ordered), @@ -1189,10 +1171,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2) return hwloc_bitmap_compare_first(obj1->complete_cpuset, obj2->complete_cpuset); else if (obj1->cpuset && obj2->cpuset) return hwloc_bitmap_compare_first(obj1->cpuset, obj2->cpuset); - else if (obj1->complete_nodeset && obj2->complete_nodeset) - return hwloc_bitmap_compare_first(obj1->complete_nodeset, obj2->complete_nodeset); - else if (obj1->nodeset && obj2->nodeset) - return hwloc_bitmap_compare_first(obj1->nodeset, obj2->nodeset); return 0; } @@ -1346,7 +1324,11 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) } } -/* Try to insert OBJ in CUR, recurse if needed. +/* + * The main insertion routine, only used for CPU-side object (normal types) + * uisng cpuset only (or complete_cpuset). + * + * Try to insert OBJ in CUR, recurse if needed. * Returns the object if it was inserted, * the remaining object it was merged, * NULL if failed to insert. @@ -1546,17 +1528,116 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob return group; } -/*attach the given memory object below the given normal parent. */ +/* only works for MEMCACHE and NUMAnode with a single bit in nodeset */ +static hwloc_obj_t +hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent, + hwloc_obj_t obj, + hwloc_report_error_t report_error) +{ + hwloc_obj_t *curp = &parent->memory_first_child; + unsigned first = hwloc_bitmap_first(obj->nodeset); + + while (*curp) { + hwloc_obj_t cur = *curp; + unsigned curfirst = hwloc_bitmap_first(cur->nodeset); + + if (first < curfirst) { + /* insert before cur */ + obj->next_sibling = cur; + *curp = obj; + obj->memory_first_child = NULL; + obj->parent = parent; + topology->modified = 1; + return obj; + } + + if (first == curfirst) { + /* identical nodeset */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (cur->type == HWLOC_OBJ_NUMANODE) { + /* identical NUMA nodes? ignore the new one */ + if (report_error) { + char curstr[512]; + char objstr[512]; + char msg[1100]; + hwloc__report_error_format_obj(curstr, sizeof(curstr), cur); + hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); + snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr); + report_error(msg, __LINE__); + } + return NULL; + } + assert(cur->type == HWLOC_OBJ_MEMCACHE); + /* insert the new NUMA node below that existing memcache */ + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + + } else { + assert(obj->type == HWLOC_OBJ_MEMCACHE); + if (cur->type == HWLOC_OBJ_MEMCACHE) { + if (cur->attr->cache.depth == obj->attr->cache.depth) + /* memcache with same nodeset and depth, ignore the new one */ + return NULL; + if (cur->attr->cache.depth > obj->attr->cache.depth) + /* memcache with higher cache depth is actually *higher* in the hierarchy + * (depth starts from the NUMA node). + * insert the new memcache below the existing one + */ + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + } + /* insert the memcache above the existing memcache or numa node */ + obj->next_sibling = cur->next_sibling; + cur->next_sibling = NULL; + obj->memory_first_child = cur; + cur->parent = obj; + *curp = obj; + obj->parent = parent; + topology->modified = 1; + return obj; + } + } + + curp = &cur->next_sibling; + } + + /* append to the end of the list */ + obj->next_sibling = NULL; + *curp = obj; + obj->memory_first_child = NULL; + obj->parent = parent; + topology->modified = 1; + return obj; +} + +/* Attach the given memory object below the given normal parent. + * + * Only the nodeset is used to find the location inside memory children below parent. + * + * Nodeset inclusion inside the given memory hierarchy is guaranteed by this function, + * but nodesets are not propagated to CPU-side parent yet. It will be done by + * propagate_nodeset() later. + */ struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj, - hwloc_report_error_t report_error __hwloc_attribute_unused) + hwloc_report_error_t report_error) { - hwloc_obj_t *cur_children; + hwloc_obj_t result; assert(parent); assert(hwloc__obj_type_is_normal(parent->type)); + /* Check the nodeset */ + if (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) + return NULL; + /* Initialize or check the complete nodeset */ + if (!obj->complete_nodeset) { + obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); + } else if (!hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset)) { + return NULL; + } + /* Neither ACPI nor Linux support multinode mscache */ + assert(hwloc_bitmap_weight(obj->nodeset) == 1); + #if 0 /* TODO: enable this instead of hack in fixup_sets once NUMA nodes are inserted late */ /* copy the parent cpuset in case it's larger than expected. @@ -1565,35 +1646,22 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, * However, the user decided the ignore Groups, so hierarchy/locality loss is expected. */ hwloc_bitmap_copy(obj->cpuset, parent->cpuset); + hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset); #endif - /* only NUMA nodes are memory for now, just append to the end of the list */ - assert(obj->type == HWLOC_OBJ_NUMANODE); - assert(obj->nodeset); - cur_children = &parent->memory_first_child; - while (*cur_children) { - /* TODO check that things are inserted in order. - * it's OK for KNL, the only user so far - */ - cur_children = &(*cur_children)->next_sibling; - } - *cur_children = obj; - obj->next_sibling = NULL; - - /* Initialize the complete nodeset if needed */ - if (!obj->complete_nodeset) { - obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); - } - - /* Add the bit to the top sets, and to the parent CPU-side object */ - if (obj->type == HWLOC_OBJ_NUMANODE) { - if (hwloc_bitmap_isset(obj->nodeset, obj->os_index)) + result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error); + if (result == obj) { + /* Add the bit to the top sets, and to the parent CPU-side object */ + if (obj->type == HWLOC_OBJ_NUMANODE) { hwloc_bitmap_set(topology->levels[0][0]->nodeset, obj->os_index); - hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + } } - - topology->modified = 1; - return obj; + if (result != obj) { + /* either failed to insert, or got merged, free the original object */ + hwloc_free_unlinked_object(obj); + } + return result; } /* insertion routine that lets you change the error reporting callback */ @@ -1699,11 +1767,18 @@ hwloc_alloc_setup_object(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned os_index) { struct hwloc_obj *obj = hwloc_tma_malloc(topology->tma, sizeof(*obj)); + if (!obj) + return NULL; memset(obj, 0, sizeof(*obj)); obj->type = type; obj->os_index = os_index; obj->gp_index = topology->next_gp_index++; obj->attr = hwloc_tma_malloc(topology->tma, sizeof(*obj->attr)); + if (!obj->attr) { + assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */ + free(obj); + return NULL; + } memset(obj->attr, 0, sizeof(*obj->attr)); /* do not allocate the cpuset here, let the caller do it */ return obj; @@ -1717,6 +1792,10 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology) errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); } @@ -1736,6 +1815,10 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) { hwloc_free_unlinked_object(obj); @@ -1754,12 +1837,30 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t hwloc_bitmap_and(obj->complete_nodeset, obj->complete_nodeset, root->complete_nodeset); if ((!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset)) - && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset)) - && (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) - && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) { - hwloc_free_unlinked_object(obj); - errno = EINVAL; - return NULL; + && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset))) { + /* we'll insert by cpuset, so build cpuset from the nodeset */ + hwloc_const_bitmap_t nodeset = obj->nodeset ? obj->nodeset : obj->complete_nodeset; + hwloc_obj_t numa; + + if ((!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) + && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) { + hwloc_free_unlinked_object(obj); + errno = EINVAL; + return NULL; + } + + if (!obj->cpuset) { + obj->cpuset = hwloc_bitmap_alloc(); + if (!obj->cpuset) { + hwloc_free_unlinked_object(obj); + return NULL; + } + } + + numa = NULL; + while ((numa = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa)) != NULL) + if (hwloc_bitmap_isset(nodeset, numa->os_index)) + hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset); } cmp = hwloc_obj_cmp_sets(obj, root); @@ -1806,6 +1907,10 @@ hwloc_topology_insert_misc_object(struct hwloc_topology *topology, hwloc_obj_t p errno = EINVAL; return NULL; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, HWLOC_UNKNOWN_INDEX); if (name) @@ -1963,6 +2068,7 @@ fixup_sets(hwloc_obj_t obj) in_memory_list = 0; /* iterate over normal children first, we'll come back for memory children later */ + /* FIXME: if memory objects are inserted late, we should update their cpuset and complete_cpuset at insertion instead of here */ iterate: while (child) { /* our cpuset must be included in our parent's one */ @@ -1980,6 +2086,12 @@ fixup_sets(hwloc_obj_t obj) child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); } + if (hwloc_obj_type_is_memory(child->type)) { + /* update memory children cpusets in case some CPU-side parent was removed */ + hwloc_bitmap_copy(child->cpuset, obj->cpuset); + hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset); + } + fixup_sets(child); child = child->next_sibling; } @@ -2025,9 +2137,8 @@ hwloc_obj_add_children_sets(hwloc_obj_t obj) /* CPU objects are inserted by cpusets, we know their cpusets are properly included. * We just need fixup_sets() to make sure they aren't too wide. * - * Memory objects are inserted by cpusets to find their CPU parent, - * but nodesets are only used inside the memory hierarchy below that parent. - * Thus we need to propagate nodesets to CPU-side parents and children. + * Within each memory hierarchy, nodeset are consistent as well. + * However they must be propagated to their CPU-side parents. * * A memory object nodeset consists of NUMA nodes below it. * A normal object nodeset consists in NUMA nodes attached to any @@ -2060,27 +2171,12 @@ propagate_nodeset(hwloc_obj_t obj) /* now add our local nodeset */ for_each_memory_child(child, obj) { - /* FIXME rather recurse in the memory hierarchy */ - - /* first, update children complete_nodeset if needed */ - if (!child->complete_nodeset) - child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); - else - hwloc_bitmap_or(child->complete_nodeset, child->complete_nodeset, child->nodeset); - /* add memory children nodesets to ours */ hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset); hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset); - - /* by the way, copy our cpusets to memory children */ - if (child->cpuset) - hwloc_bitmap_copy(child->cpuset, obj->cpuset); - else - child->cpuset = hwloc_bitmap_dup(obj->cpuset); - if (child->complete_cpuset) - hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset); - else - child->complete_cpuset = hwloc_bitmap_dup(obj->complete_cpuset); + /* no need to recurse because hwloc__attach_memory_object() + * makes sure nodesets are consistent within each memory hierarchy. + */ } /* Propagate our nodeset to CPU children. */ @@ -2219,6 +2315,7 @@ hwloc_reset_normal_type_depths(hwloc_topology_t topology) for (i=HWLOC_OBJ_TYPE_MIN; i<=HWLOC_OBJ_GROUP; i++) topology->type_depth[i] = HWLOC_TYPE_DEPTH_UNKNOWN; /* type contiguity is asserted in topology_check() */ + topology->type_depth[HWLOC_OBJ_DIE] = HWLOC_TYPE_DEPTH_UNKNOWN; } static int @@ -2245,6 +2342,8 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i) return -1; for(j=0; jlevel_nbobjects[i]; j++) { + if (topology->levels[i-1][j] != topology->levels[i][j]->parent) + return -1; if (topology->levels[i-1][j]->arity != 1) return -1; if (checkmemory && topology->levels[i-1][j]->memory_arity) @@ -2434,6 +2533,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) { hwloc_obj_t child; unsigned arity = root->arity; + hwloc_obj_t *array; int ok; /* assume we're not symmetric by default */ @@ -2465,8 +2565,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) /* now check that children subtrees are identical. * just walk down the first child in each tree and compare their depth and arities */ -{ - HWLOC_VLA(hwloc_obj_t, array, arity); + array = malloc(arity * sizeof(*array)); + if (!array) + return; memcpy(array, root->children, arity * sizeof(*array)); while (1) { unsigned i; @@ -2474,8 +2575,9 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) for(i=1; idepth != array[0]->depth || array[i]->arity != array[0]->arity) { - return; - } + free(array); + return; + } if (!array[0]->arity) /* no more children level, we're ok */ break; @@ -2483,7 +2585,7 @@ hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) for(i=0; ifirst_child; } -} + free(array); /* everything went fine, we're symmetric */ good: @@ -2601,57 +2703,23 @@ hwloc_connect_children(hwloc_obj_t parent) } /* - * Check whether there is an object below ROOT that has the same type as OBJ + * Check whether there is an object strictly below ROOT that has the same type as OBJ */ static int find_same_type(hwloc_obj_t root, hwloc_obj_t obj) { hwloc_obj_t child; - if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL) - return 1; - - for_each_child (child, root) + for_each_child (child, root) { + if (hwloc_type_cmp(child, obj) == HWLOC_OBJ_EQUAL) + return 1; if (find_same_type(child, obj)) return 1; + } return 0; } -/* traverse the array of current object and compare them with top_obj. - * if equal, take the object and put its children into the remaining objs. - * if not equal, put the object into the remaining objs. - */ -static unsigned -hwloc_level_take_objects(hwloc_obj_t top_obj, - hwloc_obj_t *current_objs, unsigned n_current_objs, - hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused, - hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused) -{ - unsigned taken_i = 0; - unsigned new_i = 0; - unsigned i, j; - - for (i = 0; i < n_current_objs; i++) - if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) { - /* Take it, add main children. */ - taken_objs[taken_i++] = current_objs[i]; - for (j = 0; j < current_objs[i]->arity; j++) - remaining_objs[new_i++] = current_objs[i]->children[j]; - } else { - /* Leave it. */ - remaining_objs[new_i++] = current_objs[i]; - } - -#ifdef HWLOC_DEBUG - /* Make sure we didn't mess up. */ - assert(taken_i == n_taken_objs); - assert(new_i == n_current_objs - n_taken_objs + n_remaining_objs); -#endif - - return new_i; -} - static int hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) { @@ -2670,6 +2738,9 @@ hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) if (nb) { /* allocate and fill level */ slevel->objs = malloc(nb * sizeof(struct hwloc_obj *)); + if (!slevel->objs) + return -1; + obj = slevel->first; i = 0; while (obj) { @@ -2709,7 +2780,17 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) /* Insert the main NUMA node list */ hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_NUMANODE], obj); - /* Recurse */ + /* Recurse, NUMA nodes only have Misc children */ + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + + } else if (obj->type == HWLOC_OBJ_MEMCACHE) { + obj->next_cousin = NULL; + obj->depth = HWLOC_TYPE_DEPTH_MEMCACHE; + /* Insert the main MemCache list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_MEMCACHE], obj); + + /* Recurse, MemCaches have NUMA nodes or Misc children */ for_each_memory_child(child, obj) hwloc_list_special_objects(topology, child); for_each_misc_child(child, obj) @@ -2742,6 +2823,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) /* Insert in the main osdev list */ hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_OSDEV], obj); } + /* Recurse, I/O only have I/O and Misc children */ for_each_io_child(child, obj) hwloc_list_special_objects(topology, child); @@ -2762,7 +2844,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) } /* Build I/O levels */ -static void +static int hwloc_connect_io_misc_levels(hwloc_topology_t topology) { unsigned i; @@ -2773,8 +2855,12 @@ hwloc_connect_io_misc_levels(hwloc_topology_t topology) hwloc_list_special_objects(topology, topology->levels[0][0]); - for(i=0; islevels[i]); + for(i=0; islevels[i]) < 0) + return -1; + } + + return 0; } /* @@ -2849,32 +2935,48 @@ hwloc_connect_levels(hwloc_topology_t topology) /* Now peek all objects of the same type, build a level with that and * replace them with their children. */ - /* First count them. */ - n_taken_objs = 0; - n_new_objs = 0; - for (i = 0; i < n_objs; i++) - if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { - n_taken_objs++; - n_new_objs += objs[i]->arity; - } - - /* New level. */ - taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0])); - /* New list of pending objects. */ - if (n_objs - n_taken_objs + n_new_objs) { - new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0])); - } else { -#ifdef HWLOC_DEBUG - assert(!n_new_objs); - assert(n_objs == n_taken_objs); -#endif - new_objs = NULL; + /* allocate enough to take all current objects and an ending NULL */ + taken_objs = malloc((n_objs+1) * sizeof(taken_objs[0])); + if (!taken_objs) { + free(objs); + errno = ENOMEM; + return -1; } - n_new_objs = hwloc_level_take_objects(top_obj, - objs, n_objs, - taken_objs, n_taken_objs, - new_objs, n_new_objs); + /* allocate enough to keep all current objects or their children */ + n_new_objs = 0; + for (i = 0; i < n_objs; i++) { + if (objs[i]->arity) + n_new_objs += objs[i]->arity; + else + n_new_objs++; + } + new_objs = malloc(n_new_objs * sizeof(new_objs[0])); + if (!new_objs) { + free(objs); + free(taken_objs); + errno = ENOMEM; + return -1; + } + + /* now actually take these objects */ + n_new_objs = 0; + n_taken_objs = 0; + for (i = 0; i < n_objs; i++) + if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { + /* Take it, add main children. */ + taken_objs[n_taken_objs++] = objs[i]; + memcpy(&new_objs[n_new_objs], objs[i]->children, objs[i]->arity * sizeof(new_objs[0])); + n_new_objs += objs[i]->arity; + } else { + /* Leave it. */ + new_objs[n_new_objs++] = objs[i]; + } + + if (!n_new_objs) { + free(new_objs); + new_objs = NULL; + } /* Ok, put numbers in the level and link cousins. */ for (i = 0; i < n_taken_objs; i++) { @@ -2964,13 +3066,69 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags) if (hwloc_connect_levels(topology) < 0) return -1; - hwloc_connect_io_misc_levels(topology); + if (hwloc_connect_io_misc_levels(topology) < 0) + return -1; topology->modified = 0; return 0; } +/* for regression testing, make sure the order of io devices + * doesn't change with the dentry order in the filesystem + * + * Only needed for OSDev for now. + */ +static hwloc_obj_t +hwloc_debug_insert_osdev_sorted(hwloc_obj_t queue, hwloc_obj_t obj) +{ + hwloc_obj_t *pcur = &queue; + while (*pcur && strcmp((*pcur)->name, obj->name) < 0) + pcur = &((*pcur)->next_sibling); + obj->next_sibling = *pcur; + *pcur = obj; + return queue; +} + +static void +hwloc_debug_sort_children(hwloc_obj_t root) +{ + hwloc_obj_t child; + + if (root->io_first_child) { + hwloc_obj_t osdevqueue, *pchild; + + pchild = &root->io_first_child; + osdevqueue = NULL; + while ((child = *pchild) != NULL) { + if (child->type != HWLOC_OBJ_OS_DEVICE) { + /* keep non-osdev untouched */ + pchild = &child->next_sibling; + continue; + } + + /* dequeue this child */ + *pchild = child->next_sibling; + child->next_sibling = NULL; + + /* insert in osdev queue in order */ + osdevqueue = hwloc_debug_insert_osdev_sorted(osdevqueue, child); + } + + /* requeue the now-sorted osdev queue */ + *pchild = osdevqueue; + } + + /* Recurse */ + for_each_child(child, root) + hwloc_debug_sort_children(child); + for_each_memory_child(child, root) + hwloc_debug_sort_children(child); + for_each_io_child(child, root) + hwloc_debug_sort_children(child); + /* no I/O under Misc */ +} + void hwloc_alloc_root_sets(hwloc_obj_t root) { /* @@ -2992,11 +3150,32 @@ void hwloc_alloc_root_sets(hwloc_obj_t root) root->complete_nodeset = hwloc_bitmap_alloc(); } -/* Main discovery loop */ -static int -hwloc_discover(struct hwloc_topology *topology) +static void +hwloc_discover_by_phase(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus, + const char *phasename __hwloc_attribute_unused) { struct hwloc_backend *backend; + hwloc_debug("%s phase discovery...\n", phasename); + for(backend = topology->backends; backend; backend = backend->next) { + if (dstatus->phase & dstatus->excluded_phases) + break; + if (!(backend->phases & dstatus->phase)) + continue; + if (!backend->discover) + continue; + hwloc_debug("%s phase discovery in component %s...\n", phasename, backend->component->name); + backend->discover(backend, dstatus); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } +} + +/* Main discovery loop */ +static int +hwloc_discover(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus) +{ + const char *env; topology->modified = 0; /* no need to reconnect yet */ @@ -3038,38 +3217,70 @@ hwloc_discover(struct hwloc_topology *topology) * automatically propagated to the whole tree after detection. */ - /* - * Discover CPUs first - */ - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU - && backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* not yet */ - goto next_cpubackend; - if (!backend->discover) - goto next_cpubackend; - backend->discover(backend); - hwloc_debug_print_objects(0, topology->levels[0][0]); + if (topology->backend_phases & HWLOC_DISC_PHASE_GLOBAL) { + /* usually, GLOBAL is alone. + * but HWLOC_ANNOTATE_GLOBAL_COMPONENTS=1 allows optional ANNOTATE steps. + */ + struct hwloc_backend *global_backend = topology->backends; + assert(global_backend); + assert(global_backend->phases == HWLOC_DISC_PHASE_GLOBAL); -next_cpubackend: - backend = backend->next; + /* + * Perform the single-component-based GLOBAL discovery + */ + hwloc_debug("GLOBAL phase discovery...\n"); + hwloc_debug("GLOBAL phase discovery with component %s...\n", global_backend->component->name); + dstatus->phase = HWLOC_DISC_PHASE_GLOBAL; + global_backend->discover(global_backend, dstatus); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } + /* Don't explicitly ignore other phases, in case there's ever + * a need to bring them back. + * The component with usually exclude them by default anyway. + * Except if annotating global components is explicitly requested. + */ + + if (topology->backend_phases & HWLOC_DISC_PHASE_CPU) { + /* + * Discover CPUs first + */ + dstatus->phase = HWLOC_DISC_PHASE_CPU; + hwloc_discover_by_phase(topology, dstatus, "CPU"); + } + + if (!(topology->backend_phases & (HWLOC_DISC_PHASE_GLOBAL|HWLOC_DISC_PHASE_CPU))) { + hwloc_debug("No GLOBAL or CPU component phase found\n"); + /* we'll fail below */ } /* One backend should have called hwloc_alloc_root_sets() * and set bits during PU and NUMA insert. */ if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { - hwloc_debug("%s", "No PU added by any CPU and global backend\n"); + hwloc_debug("%s", "No PU added by any CPU or GLOBAL component phase\n"); errno = EINVAL; return -1; } - if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) { - const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES"); - if ((env && atoi(env)) - || (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) - topology->binding_hooks.get_allowed_resources(topology); + /* + * Memory-specific discovery + */ + if (topology->backend_phases & HWLOC_DISC_PHASE_MEMORY) { + dstatus->phase = HWLOC_DISC_PHASE_MEMORY; + hwloc_discover_by_phase(topology, dstatus, "MEMORY"); + } + + if (/* check if getting the sets of locally allowed resources is possible */ + topology->binding_hooks.get_allowed_resources + && topology->is_thissystem + /* check whether it has been done already */ + && !(dstatus->flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) + /* check whether it was explicitly requested */ + && ((topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) != 0 + || ((env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES")) != NULL && atoi(env)))) { + /* OK, get the sets of locally allowed resources */ + topology->binding_hooks.get_allowed_resources(topology); + dstatus->flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; } /* If there's no NUMA node, add one with all the memory. @@ -3113,7 +3324,7 @@ next_cpubackend: hwloc_debug_print_objects(0, topology->levels[0][0]); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { hwloc_debug("%s", "\nRemoving unauthorized sets from all sets\n"); remove_unused_sets(topology, topology->levels[0][0]); hwloc_debug_print_objects(0, topology->levels[0][0]); @@ -3149,28 +3360,27 @@ next_cpubackend: hwloc_debug_print_objects(0, topology->levels[0][0]); /* - * Additional discovery with other backends + * Additional discovery */ - - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU - || backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* already done above */ - goto next_noncpubackend; - if (!backend->discover) - goto next_noncpubackend; - backend->discover(backend); - hwloc_debug_print_objects(0, topology->levels[0][0]); - -next_noncpubackend: - backend = backend->next; + if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) { + dstatus->phase = HWLOC_DISC_PHASE_PCI; + hwloc_discover_by_phase(topology, dstatus, "PCI"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_IO) { + dstatus->phase = HWLOC_DISC_PHASE_IO; + hwloc_discover_by_phase(topology, dstatus, "IO"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_MISC) { + dstatus->phase = HWLOC_DISC_PHASE_MISC; + hwloc_discover_by_phase(topology, dstatus, "MISC"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_ANNOTATE) { + dstatus->phase = HWLOC_DISC_PHASE_ANNOTATE; + hwloc_discover_by_phase(topology, dstatus, "ANNOTATE"); } - hwloc_pci_belowroot_apply_locality(topology); - - hwloc_debug("%s", "\nNow reconnecting\n"); - hwloc_debug_print_objects(0, topology->levels[0][0]); + if (getenv("HWLOC_DEBUG_SORT_CHILDREN")) + hwloc_debug_sort_children(topology->levels[0][0]); /* Remove some stuff */ @@ -3217,7 +3427,8 @@ next_noncpubackend: /* add some identification attributes if not loading from XML */ if (topology->backends - && strcmp(topology->backends->component->name, "xml")) { + && strcmp(topology->backends->component->name, "xml") + && !getenv("HWLOC_DONT_ADD_VERSION_INFO")) { char *value; /* add a hwlocVersion */ hwloc_obj_add_info(topology->levels[0][0], "hwlocVersion", HWLOC_VERSION); @@ -3269,6 +3480,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_BRIDGE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_BRIDGE)); HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_PCIDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_PCI_DEVICE)); HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_OSDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_OS_DEVICE)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_MEMCACHE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_MEMCACHE)); /* sane values to type_depth */ hwloc_reset_normal_type_depths(topology); @@ -3277,6 +3489,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; + topology->type_depth[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_DEPTH_MEMCACHE; /* Create the actual machine object, but don't touch its attributes yet * since the OS backend may still change the object into something else @@ -3303,7 +3516,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->tma = tma; hwloc_components_init(); /* uses malloc without tma, but won't need it since dup() caller already took a reference */ - hwloc_backends_init(topology); + hwloc_topology_components_init(topology); hwloc_pci_discovery_init(topology); /* make sure both dup() and load() get sane variables */ /* Setup topology context */ @@ -3320,7 +3533,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); - topology->nb_levels_allocated = nblevels; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); topology->level_nbobjects = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); @@ -3343,7 +3556,7 @@ int hwloc_topology_init (struct hwloc_topology **topologyp) { return hwloc__topology_init(topologyp, - 16, /* 16 is enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + 16, /* 16 is enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ NULL); /* no TMA for normal topologies, too many allocations to fix */ } @@ -3376,7 +3589,7 @@ hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *descri return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "synthetic", + "synthetic", description, NULL, NULL); } @@ -3391,7 +3604,7 @@ hwloc_topology_set_xml(struct hwloc_topology *topology, return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "xml", + "xml", xmlpath, NULL, NULL); } @@ -3407,7 +3620,7 @@ hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology, return hwloc_disc_component_force_enable(topology, 0 /* api */, - -1, "xml", NULL, + "xml", NULL, xmlbuffer, (void*) (uintptr_t) size); } @@ -3420,7 +3633,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { errno = EINVAL; return -1; } @@ -3445,6 +3658,7 @@ hwloc__topology_filter_init(struct hwloc_topology *topology) topology->type_filter[HWLOC_OBJ_L1ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_L2ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_L3ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_MEMCACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_GROUP] = HWLOC_TYPE_FILTER_KEEP_STRUCTURE; topology->type_filter[HWLOC_OBJ_MISC] = HWLOC_TYPE_FILTER_KEEP_NONE; topology->type_filter[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_FILTER_KEEP_NONE; @@ -3575,6 +3789,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology) } hwloc_backends_disable_all(topology); + hwloc_topology_components_fini(topology); hwloc_components_fini(); hwloc_topology_clear(topology); @@ -3591,6 +3806,8 @@ hwloc_topology_destroy (struct hwloc_topology *topology) int hwloc_topology_load (struct hwloc_topology *topology) { + struct hwloc_disc_status dstatus; + const char *env; int err; if (topology->is_loaded) { @@ -3617,7 +3834,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (fsroot_path_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - HWLOC_DISC_COMPONENT_TYPE_CPU, "linux", + "linux", NULL /* backend will getenv again */, NULL, NULL); } if (!topology->backends) { @@ -3625,7 +3842,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (cpuid_path_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", + "x86", NULL /* backend will getenv again */, NULL, NULL); } if (!topology->backends) { @@ -3633,7 +3850,7 @@ hwloc_topology_load (struct hwloc_topology *topology) if (synthetic_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - -1, "synthetic", + "synthetic", synthetic_env, NULL, NULL); } if (!topology->backends) { @@ -3641,11 +3858,19 @@ hwloc_topology_load (struct hwloc_topology *topology) if (xmlpath_env) hwloc_disc_component_force_enable(topology, 1 /* env force */, - -1, "xml", + "xml", xmlpath_env, NULL, NULL); } } + dstatus.excluded_phases = 0; + dstatus.flags = 0; /* did nothing yet */ + + env = getenv("HWLOC_ALLOW"); + if (env && !strcmp(env, "all")) + /* don't retrieve the sets of allowed resources */ + dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; + /* instantiate all possible other backends now */ hwloc_disc_components_enable_others(topology); /* now that backends are enabled, update the thissystem flag and some callbacks */ @@ -3660,7 +3885,7 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_pci_discovery_prepare(topology); /* actual topology discovery */ - err = hwloc_discover(topology); + err = hwloc_discover(topology, &dstatus); if (err < 0) goto out; @@ -3682,6 +3907,12 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_internal_distances_refresh(topology); topology->is_loaded = 1; + + if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { + dstatus.phase = HWLOC_DISC_PHASE_TWEAK; + hwloc_discover_by_phase(topology, &dstatus, "TWEAK"); + } + return 0; out: @@ -3740,7 +3971,75 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_ && hwloc_bitmap_iszero(obj->cpuset) && (obj->type != HWLOC_OBJ_NUMANODE || (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS))) { /* remove object */ - hwloc_debug("%s", "\nRemoving object during restrict"); + hwloc_debug("%s", "\nRemoving object during restrict by cpuset"); + hwloc_debug_print_object(0, obj); + + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + hwloc_free_object_siblings_and_children(obj->io_first_child); + obj->io_first_child = NULL; + } + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC)) { + hwloc_free_object_siblings_and_children(obj->misc_first_child); + obj->misc_first_child = NULL; + } + assert(!obj->first_child); + assert(!obj->memory_first_child); + unlink_and_free_single_object(pobj); + topology->modified = 1; + } +} + +/* adjust object nodesets according the given droppednodeset, + * drop object whose nodeset becomes empty and that have no children, + * and propagate PU removal as cpuset changes in parents. + */ +static void +restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj, + hwloc_bitmap_t droppedcpuset, hwloc_bitmap_t droppednodeset) +{ + hwloc_obj_t obj = *pobj, child, *pchild; + int modified = 0; + + if (hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) { + hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset); + hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset); + modified = 1; + } else { + if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + && hwloc_bitmap_iszero(obj->complete_nodeset)) { + /* we're empty, there's a PU below us, it'll be removed this time */ + modified = 1; + } + /* cpuset cannot intersect unless nodeset intersects or is empty */ + if (droppedcpuset) + assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset) + || hwloc_bitmap_iszero(obj->complete_nodeset)); + } + if (droppedcpuset) { + hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset); + hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset); + } + + if (modified) { + for_each_child_safe(child, obj, pchild) + restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset); + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + /* cpuset may have changed above where some NUMA nodes were removed. + * if some hwloc_bitmap_first(child->complete_cpuset) changed, children might need to be reordered */ + hwloc__reorder_children(obj); + + for_each_memory_child_safe(child, obj, pchild) + restrict_object_by_nodeset(topology, flags, pchild, droppedcpuset, droppednodeset); + /* FIXME: we may have to reorder CPU-less groups of NUMA nodes if some of their nodes were removed */ + + /* Nothing to restrict under I/O or Misc */ + } + + if (!obj->first_child && !obj->memory_first_child /* arity not updated before connect_children() */ + && hwloc_bitmap_iszero(obj->nodeset) + && (obj->type != HWLOC_OBJ_PU || (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS))) { + /* remove object */ + hwloc_debug("%s", "\nRemoving object during restrict by nodeset"); hwloc_debug_print_object(0, obj); if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) { @@ -3759,7 +4058,7 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_ } int -hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags) +hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t set, unsigned long flags) { hwloc_bitmap_t droppedcpuset, droppednodeset; @@ -3767,15 +4066,35 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp errno = EINVAL; return -1; } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } if (flags & ~(HWLOC_RESTRICT_FLAG_REMOVE_CPULESS - |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO + |HWLOC_RESTRICT_FLAG_BYNODESET|HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)) { errno = EINVAL; return -1; } + if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) { + /* cannot use CPULESS with BYNODESET */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { + errno = EINVAL; + return -1; + } + } else { + /* cannot use MEMLESS without BYNODESET */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) { + errno = EINVAL; + return -1; + } + } + /* make sure we'll keep something in the topology */ - if (!hwloc_bitmap_intersects(cpuset, topology->allowed_cpuset)) { + if (((flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_nodeset)) + || (!(flags & HWLOC_RESTRICT_FLAG_BYNODESET) && !hwloc_bitmap_intersects(set, topology->allowed_cpuset))) { errno = EINVAL; /* easy failure, just don't touch the topology */ return -1; } @@ -3788,39 +4107,76 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp return -1; } - /* cpuset to clear */ - hwloc_bitmap_not(droppedcpuset, cpuset); - /* nodeset to clear */ - if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { - hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); - do { - /* node will be removed if nodeset gets or was empty */ - if (hwloc_bitmap_iszero(node->cpuset) - || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset)) - hwloc_bitmap_set(droppednodeset, node->os_index); - node = node->next_cousin; - } while (node); + if (flags & HWLOC_RESTRICT_FLAG_BYNODESET) { + /* nodeset to clear */ + hwloc_bitmap_not(droppednodeset, set); + /* cpuset to clear */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) { + hwloc_obj_t pu = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); + do { + /* PU will be removed if cpuset gets or was empty */ + if (hwloc_bitmap_iszero(pu->cpuset) + || hwloc_bitmap_isincluded(pu->nodeset, droppednodeset)) + hwloc_bitmap_set(droppedcpuset, pu->os_index); + pu = pu->next_cousin; + } while (pu); - /* check we're not removing all NUMA nodes */ - if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) { - errno = EINVAL; /* easy failure, just don't touch the topology */ - hwloc_bitmap_free(droppedcpuset); - hwloc_bitmap_free(droppednodeset); - return -1; + /* check we're not removing all PUs */ + if (hwloc_bitmap_isincluded(topology->allowed_cpuset, droppedcpuset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + } + /* remove cpuset if empty */ + if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) + || hwloc_bitmap_iszero(droppedcpuset)) { + hwloc_bitmap_free(droppedcpuset); + droppedcpuset = NULL; } - } - /* remove nodeset if empty */ - if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) - || hwloc_bitmap_iszero(droppednodeset)) { - hwloc_bitmap_free(droppednodeset); - droppednodeset = NULL; - } - /* now recurse to filter sets and drop things */ - restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); - hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); - if (droppednodeset) + /* now recurse to filter sets and drop things */ + restrict_object_by_nodeset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset); + if (droppedcpuset) + hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); + + } else { + /* cpuset to clear */ + hwloc_bitmap_not(droppedcpuset, set); + /* nodeset to clear */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + do { + /* node will be removed if nodeset gets or was empty */ + if (hwloc_bitmap_iszero(node->cpuset) + || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset)) + hwloc_bitmap_set(droppednodeset, node->os_index); + node = node->next_cousin; + } while (node); + + /* check we're not removing all NUMA nodes */ + if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + } + /* remove nodeset if empty */ + if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) + || hwloc_bitmap_iszero(droppednodeset)) { + hwloc_bitmap_free(droppednodeset); + droppednodeset = NULL; + } + + /* now recurse to filter sets and drop things */ + restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); + hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); + if (droppednodeset) + hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset); + } hwloc_bitmap_free(droppedcpuset); hwloc_bitmap_free(droppednodeset); @@ -3849,6 +4205,72 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp return -1; } +int +hwloc_topology_allow(struct hwloc_topology *topology, + hwloc_const_cpuset_t cpuset, hwloc_const_nodeset_t nodeset, + unsigned long flags) +{ + if (!topology->is_loaded) + goto einval; + + if (topology->adopted_shmem_addr) { + errno = EPERM; + goto error; + } + + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) + goto einval; + + if (flags & ~(HWLOC_ALLOW_FLAG_ALL|HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS|HWLOC_ALLOW_FLAG_CUSTOM)) + goto einval; + + switch (flags) { + case HWLOC_ALLOW_FLAG_ALL: { + if (cpuset || nodeset) + goto einval; + hwloc_bitmap_copy(topology->allowed_cpuset, hwloc_get_root_obj(topology)->complete_cpuset); + hwloc_bitmap_copy(topology->allowed_nodeset, hwloc_get_root_obj(topology)->complete_nodeset); + break; + } + case HWLOC_ALLOW_FLAG_LOCAL_RESTRICTIONS: { + if (cpuset || nodeset) + goto einval; + if (!topology->is_thissystem) + goto einval; + if (!topology->binding_hooks.get_allowed_resources) { + errno = ENOSYS; + goto error; + } + topology->binding_hooks.get_allowed_resources(topology); + break; + } + case HWLOC_ALLOW_FLAG_CUSTOM: { + if (cpuset) { + /* keep the intersection with the full topology cpuset, if not empty */ + if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->cpuset, cpuset)) + goto einval; + hwloc_bitmap_and(topology->allowed_cpuset, hwloc_get_root_obj(topology)->cpuset, cpuset); + } + if (nodeset) { + /* keep the intersection with the full topology nodeset, if not empty */ + if (!hwloc_bitmap_intersects(hwloc_get_root_obj(topology)->nodeset, nodeset)) + goto einval; + hwloc_bitmap_and(topology->allowed_nodeset, hwloc_get_root_obj(topology)->nodeset, nodeset); + } + break; + } + default: + goto einval; + } + + return 0; + + einval: + errno = EINVAL; + error: + return -1; +} + int hwloc_topology_is_thissystem(struct hwloc_topology *topology) { @@ -4005,7 +4427,7 @@ hwloc__check_children_cpusets(hwloc_topology_t topology __hwloc_attribute_unused assert(hwloc_bitmap_first(obj->cpuset) == (int) obj->os_index); assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1); assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { assert(hwloc_bitmap_isset(topology->allowed_cpuset, (int) obj->os_index)); } assert(!obj->arity); @@ -4166,6 +4588,8 @@ hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_ assert(obj->cpuset); if (obj->type == HWLOC_OBJ_NUMANODE) assert(obj->depth == HWLOC_TYPE_DEPTH_NUMANODE); + else if (obj->type == HWLOC_OBJ_MEMCACHE) + assert(obj->depth == HWLOC_TYPE_DEPTH_MEMCACHE); else assert(obj->depth >= 0); } @@ -4219,7 +4643,7 @@ hwloc__check_nodesets(hwloc_topology_t topology, hwloc_obj_t obj, hwloc_bitmap_t assert(hwloc_bitmap_first(obj->nodeset) == (int) obj->os_index); assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1); assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index); - if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED)) { assert(hwloc_bitmap_isset(topology->allowed_nodeset, (int) obj->os_index)); } assert(!obj->arity); @@ -4362,7 +4786,9 @@ hwloc_topology_check(struct hwloc_topology *topology) HWLOC_BUILD_ASSERT(HWLOC_OBJ_BRIDGE + 1 == HWLOC_OBJ_PCI_DEVICE); HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE); HWLOC_BUILD_ASSERT(HWLOC_OBJ_OS_DEVICE + 1 == HWLOC_OBJ_MISC); - HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_TYPE_MAX); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_MEMCACHE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MEMCACHE + 1 == HWLOC_OBJ_DIE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_DIE + 1 == HWLOC_OBJ_TYPE_MAX); /* make sure order and priority arrays have the right size */ HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX); @@ -4408,6 +4834,7 @@ hwloc_topology_check(struct hwloc_topology *topology) int d; type = hwloc_get_depth_type(topology, j); assert(type != HWLOC_OBJ_NUMANODE); + assert(type != HWLOC_OBJ_MEMCACHE); assert(type != HWLOC_OBJ_PCI_DEVICE); assert(type != HWLOC_OBJ_BRIDGE); assert(type != HWLOC_OBJ_OS_DEVICE); @@ -4423,6 +4850,9 @@ hwloc_topology_check(struct hwloc_topology *topology) if (type == HWLOC_OBJ_NUMANODE) { assert(d == HWLOC_TYPE_DEPTH_NUMANODE); assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_NUMANODE); + } else if (type == HWLOC_OBJ_MEMCACHE) { + assert(d == HWLOC_TYPE_DEPTH_MEMCACHE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_MEMCACHE); } else if (type == HWLOC_OBJ_BRIDGE) { assert(d == HWLOC_TYPE_DEPTH_BRIDGE); assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_BRIDGE); @@ -4449,7 +4879,7 @@ hwloc_topology_check(struct hwloc_topology *topology) assert(!obj->depth); /* check that allowed sets are larger than the main sets */ - if (topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) { + if (topology->flags & HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED) { assert(hwloc_bitmap_isincluded(topology->allowed_cpuset, obj->cpuset)); assert(hwloc_bitmap_isincluded(topology->allowed_nodeset, obj->nodeset)); } else { diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c index 9c5e6268c..0b744d787 100644 --- a/src/3rdparty/hwloc/src/traversal.c +++ b/src/3rdparty/hwloc/src/traversal.c @@ -1,16 +1,17 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ -#include -#include -#include -#include -#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/misc.h" +#include "private/debug.h" + #ifdef HAVE_STRINGS_H #include #endif /* HAVE_STRINGS_H */ @@ -40,6 +41,8 @@ hwloc_get_depth_type (hwloc_topology_t topology, int depth) return HWLOC_OBJ_OS_DEVICE; case HWLOC_TYPE_DEPTH_MISC: return HWLOC_OBJ_MISC; + case HWLOC_TYPE_DEPTH_MEMCACHE: + return HWLOC_OBJ_MEMCACHE; default: return HWLOC_OBJ_TYPE_NONE; } @@ -237,8 +240,10 @@ hwloc_obj_type_string (hwloc_obj_type_t obj) case HWLOC_OBJ_MACHINE: return "Machine"; case HWLOC_OBJ_MISC: return "Misc"; case HWLOC_OBJ_GROUP: return "Group"; + case HWLOC_OBJ_MEMCACHE: return "MemCache"; case HWLOC_OBJ_NUMANODE: return "NUMANode"; case HWLOC_OBJ_PACKAGE: return "Package"; + case HWLOC_OBJ_DIE: return "Die"; case HWLOC_OBJ_L1CACHE: return "L1Cache"; case HWLOC_OBJ_L2CACHE: return "L2Cache"; case HWLOC_OBJ_L3CACHE: return "L3Cache"; @@ -256,6 +261,41 @@ hwloc_obj_type_string (hwloc_obj_type_t obj) } } +/* Check if string matches the given type at least on minmatch chars. + * On success, return the address of where matching stop, either pointing to \0 or to a suffix (digits, colon, etc) + * On error, return NULL; + */ +static __hwloc_inline const char * +hwloc__type_match(const char *string, + const char *type, /* type must be lowercase */ + size_t minmatch) +{ + const char *s, *t; + unsigned i; + for(i=0, s=string, t=type; ; i++, s++, t++) { + if (!*s) { + /* string ends before type */ + if (i= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || *s == '-') + /* valid character that doesn't match */ + return NULL; + /* invalid character, we reached the end of the type namein string, stop matching here */ + if (i= '0' && string[1] <= '9') { + char *suffix; depthattr = strtol(string+1, &end, 10); - if (*end == 'i') { + if (*end == 'i' || *end == 'I') { if (depthattr >= 1 && depthattr <= 3) { type = HWLOC_OBJ_L1ICACHE + depthattr-1; cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION; + suffix = end+1; } else return -1; } else { if (depthattr >= 1 && depthattr <= 5) { type = HWLOC_OBJ_L1CACHE + depthattr-1; - cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED; + if (*end == 'd' || *end == 'D') { + cachetypeattr = HWLOC_OBJ_CACHE_DATA; + suffix = end+1; + } else if (*end == 'u' || *end == 'U') { + cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED; + suffix = end+1; + } else { + cachetypeattr = HWLOC_OBJ_CACHE_UNIFIED; + suffix = end; + } } else return -1; } + /* check whether the optional suffix matches "cache" */ + if (!hwloc__type_match(suffix, "cache", 0)) + return -1; - } else if (!hwloc_strncasecmp(string, "group", 2)) { - size_t length; + } else if ((end = (char *) hwloc__type_match(string, "group", 2)) != NULL) { type = HWLOC_OBJ_GROUP; - length = strcspn(string, "0123456789"); - if (length <= 5 && !hwloc_strncasecmp(string, "group", length) - && string[length] >= '0' && string[length] <= '9') { - depthattr = strtol(string+length, &end, 10); + if (*end >= '0' && *end <= '9') { + depthattr = strtol(end, &end, 10); } } else @@ -421,7 +477,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_MISC: case HWLOC_OBJ_MACHINE: case HWLOC_OBJ_NUMANODE: + case HWLOC_OBJ_MEMCACHE: case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: case HWLOC_OBJ_CORE: case HWLOC_OBJ_PU: return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); @@ -523,6 +581,7 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: + case HWLOC_OBJ_MEMCACHE: if (verbose) { char assoc[32]; if (obj->attr->cache.associativity == -1) From 1fbbae1e4a186cbb82cdeaff6088bb03f450e8b8 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Thu, 5 Dec 2019 19:39:47 +0100 Subject: [PATCH 04/31] Added 1GB hugepages support for Linux --- src/Summary.cpp | 9 +++-- src/backend/common/interfaces/IRxStorage.h | 6 ++-- src/backend/cpu/CpuConfig.cpp | 11 +++--- src/backend/cpu/CpuConfig.h | 2 ++ src/backend/cpu/interfaces/ICpuInfo.h | 1 + src/backend/cpu/platform/BasicCpuInfo.cpp | 14 +++++++- src/backend/cpu/platform/BasicCpuInfo.h | 2 ++ src/config.json | 1 + src/core/Miner.cpp | 2 +- src/crypto/common/VirtualMemory.h | 2 ++ src/crypto/common/VirtualMemory_unix.cpp | 42 ++++++++++++++++++---- src/crypto/common/VirtualMemory_win.cpp | 12 +++++++ src/crypto/randomx/allocator.cpp | 8 +++++ src/crypto/randomx/allocator.hpp | 5 +++ src/crypto/randomx/randomx.cpp | 6 +++- src/crypto/randomx/randomx.h | 1 + src/crypto/randomx/virtual_memory.cpp | 10 ++++++ src/crypto/randomx/virtual_memory.hpp | 1 + src/crypto/rx/Rx.cpp | 4 +-- src/crypto/rx/Rx.h | 2 +- src/crypto/rx/RxBasicStorage.cpp | 8 ++--- src/crypto/rx/RxBasicStorage.h | 2 +- src/crypto/rx/RxDataset.cpp | 21 +++++++---- src/crypto/rx/RxDataset.h | 8 +++-- src/crypto/rx/RxNUMAStorage.cpp | 12 +++---- src/crypto/rx/RxNUMAStorage.h | 2 +- src/crypto/rx/RxQueue.cpp | 6 ++-- src/crypto/rx/RxQueue.h | 6 ++-- 28 files changed, 156 insertions(+), 50 deletions(-) diff --git a/src/Summary.cpp b/src/Summary.cpp index 757297c52..db8ce9fa3 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -63,6 +63,8 @@ static void print_memory(Config *config) { # ifdef _WIN32 Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", + "1GB PAGES", Cpu::info()->hasOneGbPages() ? GREEN_BOLD("available on Linux") : RED_BOLD("unavailable")); # endif } @@ -71,12 +73,13 @@ static void print_cpu(Config *) { const ICpuInfo *info = Cpu::info(); - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES", + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES %sPDPE1GB", "CPU", info->brand(), info->packages(), - info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-", - info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-" + info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-", + info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-", + info->hasOneGbPages() ? GREEN_BOLD_S : RED_BOLD_S "-" ); # if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC) Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB") diff --git a/src/backend/common/interfaces/IRxStorage.h b/src/backend/common/interfaces/IRxStorage.h index 9e0407b04..f64850f77 100644 --- a/src/backend/common/interfaces/IRxStorage.h +++ b/src/backend/common/interfaces/IRxStorage.h @@ -44,9 +44,9 @@ class IRxStorage public: virtual ~IRxStorage() = default; - virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; - virtual std::pair hugePages() const = 0; - virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) = 0; + virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; + virtual std::pair hugePages() const = 0; + virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) = 0; }; diff --git a/src/backend/cpu/CpuConfig.cpp b/src/backend/cpu/CpuConfig.cpp index 7ebe904bd..fb95f942e 100644 --- a/src/backend/cpu/CpuConfig.cpp +++ b/src/backend/cpu/CpuConfig.cpp @@ -34,6 +34,7 @@ namespace xmrig { static const char *kEnabled = "enabled"; static const char *kHugePages = "huge-pages"; +static const char *kOneGbPages = "1gb-pages"; static const char *kHwAes = "hw-aes"; static const char *kMaxThreadsHint = "max-threads-hint"; static const char *kMemoryPool = "memory-pool"; @@ -68,6 +69,7 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kEnabled), m_enabled, allocator); obj.AddMember(StringRef(kHugePages), m_hugePages, allocator); + obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); obj.AddMember(StringRef(kHwAes), m_aes == AES_AUTO ? Value(kNullType) : Value(m_aes == AES_HW), allocator); obj.AddMember(StringRef(kPriority), priority() != -1 ? Value(priority()) : Value(kNullType), allocator); obj.AddMember(StringRef(kMemoryPool), m_memoryPool < 1 ? Value(m_memoryPool < 0) : Value(m_memoryPool), allocator); @@ -119,10 +121,11 @@ std::vector xmrig::CpuConfig::get(const Miner *miner, cons void xmrig::CpuConfig::read(const rapidjson::Value &value) { if (value.IsObject()) { - m_enabled = Json::getBool(value, kEnabled, m_enabled); - m_hugePages = Json::getBool(value, kHugePages, m_hugePages); - m_limit = Json::getUint(value, kMaxThreadsHint, m_limit); - m_yield = Json::getBool(value, kYield, m_yield); + m_enabled = Json::getBool(value, kEnabled, m_enabled); + m_hugePages = Json::getBool(value, kHugePages, m_hugePages); + m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); + m_limit = Json::getUint(value, kMaxThreadsHint, m_limit); + m_yield = Json::getBool(value, kYield, m_yield); setAesMode(Json::getValue(value, kHwAes)); setPriority(Json::getInt(value, kPriority, -1)); diff --git a/src/backend/cpu/CpuConfig.h b/src/backend/cpu/CpuConfig.h index 2306057fc..74894dcd3 100644 --- a/src/backend/cpu/CpuConfig.h +++ b/src/backend/cpu/CpuConfig.h @@ -54,6 +54,7 @@ public: inline bool isEnabled() const { return m_enabled; } inline bool isHugePages() const { return m_hugePages; } + inline bool isOneGbPages() const { return m_oneGbPages; } inline bool isShouldSave() const { return m_shouldSave; } inline bool isYield() const { return m_yield; } inline const Assembly &assembly() const { return m_assembly; } @@ -72,6 +73,7 @@ private: Assembly m_assembly; bool m_enabled = true; bool m_hugePages = true; + bool m_oneGbPages = false; bool m_shouldSave = false; bool m_yield = true; int m_memoryPool = 0; diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index 20e72391b..2ffd00f2c 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -48,6 +48,7 @@ public: virtual Assembly::Id assembly() const = 0; virtual bool hasAES() const = 0; virtual bool hasAVX2() const = 0; + virtual bool hasOneGbPages() const = 0; virtual const char *backend() const = 0; virtual const char *brand() const = 0; virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index db3741ee1..cdc810c30 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -45,6 +45,10 @@ # define bit_AVX2 (1 << 5) #endif +#ifndef bit_PDPE1GB +# define bit_PDPE1GB (1 << 26) +#endif + #include "backend/cpu/platform/BasicCpuInfo.h" #include "crypto/common/Assembly.h" @@ -53,6 +57,7 @@ #define VENDOR_ID (0) #define PROCESSOR_INFO (1) #define EXTENDED_FEATURES (7) +#define PROCESSOR_EXT_INFO (0x80000001) #define PROCESSOR_BRAND_STRING_1 (0x80000002) #define PROCESSOR_BRAND_STRING_2 (0x80000003) #define PROCESSOR_BRAND_STRING_3 (0x80000004) @@ -136,6 +141,12 @@ static inline bool has_avx2() } +static inline bool has_pdpe1gb() +{ + return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB); +} + + } // namespace xmrig @@ -144,7 +155,8 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_threads(std::thread::hardware_concurrency()), m_assembly(Assembly::NONE), m_aes(has_aes_ni()), - m_avx2(has_avx2()) + m_avx2(has_avx2()), + m_pdpe1gb(has_pdpe1gb()) { cpu_brand_string(m_brand); diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index 4c68c5f81..ecbd3e237 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -44,6 +44,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *brand() const override { return m_brand; } inline size_t cores() const override { return 0; } inline size_t L2() const override { return 0; } @@ -60,6 +61,7 @@ private: Assembly m_assembly; bool m_aes; const bool m_avx2; + const bool m_pdpe1gb; }; diff --git a/src/config.json b/src/config.json index df366c0ff..dd64f407d 100644 --- a/src/config.json +++ b/src/config.json @@ -22,6 +22,7 @@ "cpu": { "enabled": true, "huge-pages": true, + "1gb-pages": false, "hw-aes": null, "priority": null, "memory-pool": false, diff --git a/src/core/Miner.cpp b/src/core/Miner.cpp index 5551268a6..4e2f24bac 100644 --- a/src/core/Miner.cpp +++ b/src/core/Miner.cpp @@ -236,7 +236,7 @@ public: # ifdef XMRIG_ALGO_RANDOMX inline bool initRX() { - return Rx::init(job, controller->config()->rx(), controller->config()->cpu().isHugePages()); + return Rx::init(job, controller->config()->rx(), controller->config()->cpu().isHugePages(), controller->config()->cpu().isOneGbPages()); } # endif diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h index 1c2e37d20..90ecdd691 100644 --- a/src/crypto/common/VirtualMemory.h +++ b/src/crypto/common/VirtualMemory.h @@ -61,6 +61,7 @@ public: static uint32_t bindToNUMANode(int64_t affinity); static void *allocateExecutableMemory(size_t size); static void *allocateLargePagesMemory(size_t size); + static void *allocateOneGbPagesMemory(size_t size); static void destroy(); static void flushInstructionCache(void *p, size_t size); static void freeLargePagesMemory(void *p, size_t size); @@ -81,6 +82,7 @@ private: static void osInit(bool hugePages); bool allocateLargePagesMemory(); + bool allocateOneGbPagesMemory(); void freeLargePagesMemory(); const size_t m_size; diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index ffa4b137d..2e6eed304 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -58,24 +58,33 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) { - int flag_1gb = 0; - # if defined(__APPLE__) void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); # elif defined(__FreeBSD__) void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); # else + void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); +# endif + + return mem == MAP_FAILED ? nullptr : mem; +} + + +void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) +{ +# if defined(__APPLE__) + void *mem = MAP_FAILED; +# elif defined(__FreeBSD__) + void *mem = MAP_FAILED; +# else # if defined(MAP_HUGE_1GB) - flag_1gb = (size > (1UL << 30)) ? MAP_HUGE_1GB : 0; + constexpr int flag_1gb = MAP_HUGE_1GB; # elif defined(MAP_HUGE_SHIFT) - flag_1gb = (size > (1UL << 30)) ? (30 << MAP_HUGE_SHIFT) : 0; + constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT); # endif void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0); - if (mem == MAP_FAILED) { - mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); - } # endif return mem == MAP_FAILED ? nullptr : mem; @@ -132,6 +141,25 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory() } +bool xmrig::VirtualMemory::allocateOneGbPagesMemory() +{ + m_scratchpad = static_cast(allocateOneGbPagesMemory(m_size)); + if (m_scratchpad) { + m_flags.set(FLAG_HUGEPAGES, true); + + madvise(m_scratchpad, m_size, MADV_RANDOM | MADV_WILLNEED); + + if (mlock(m_scratchpad, m_size) == 0) { + m_flags.set(FLAG_LOCK, true); + } + + return true; + } + + return false; +} + + void xmrig::VirtualMemory::freeLargePagesMemory() { if (m_flags.test(FLAG_LOCK)) { diff --git a/src/crypto/common/VirtualMemory_win.cpp b/src/crypto/common/VirtualMemory_win.cpp index bfd8de1d6..70e48d3a0 100644 --- a/src/crypto/common/VirtualMemory_win.cpp +++ b/src/crypto/common/VirtualMemory_win.cpp @@ -175,6 +175,12 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) } +void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) +{ + return nullptr; +} + + void xmrig::VirtualMemory::flushInstructionCache(void *p, size_t size) { ::FlushInstructionCache(GetCurrentProcess(), p, size); @@ -221,6 +227,12 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory() return false; } +bool xmrig::VirtualMemory::allocateOneGbPagesMemory() +{ + m_scratchpad = nullptr; + return false; +} + void xmrig::VirtualMemory::freeLargePagesMemory() { diff --git a/src/crypto/randomx/allocator.cpp b/src/crypto/randomx/allocator.cpp index ff708a62c..d46393f1f 100644 --- a/src/crypto/randomx/allocator.cpp +++ b/src/crypto/randomx/allocator.cpp @@ -57,4 +57,12 @@ namespace randomx { freePagedMemory(ptr, count); }; + void* OneGbPageAllocator::allocMemory(size_t count) { + return allocOneGbPagesMemory(count); + } + + void OneGbPageAllocator::freeMemory(void* ptr, size_t count) { + freePagedMemory(ptr, count); + }; + } diff --git a/src/crypto/randomx/allocator.hpp b/src/crypto/randomx/allocator.hpp index d7aa3f95d..74d8b0688 100644 --- a/src/crypto/randomx/allocator.hpp +++ b/src/crypto/randomx/allocator.hpp @@ -43,4 +43,9 @@ namespace randomx { static void freeMemory(void*, size_t); }; + struct OneGbPageAllocator { + static void* allocMemory(size_t); + static void freeMemory(void*, size_t); + }; + } \ No newline at end of file diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index f9ce93f8c..44d881ebf 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -333,7 +333,11 @@ extern "C" { try { dataset = new randomx_dataset(); - if (flags & RANDOMX_FLAG_LARGE_PAGES) { + if (flags & RANDOMX_FLAG_1GB_PAGES) { + dataset->dealloc = &randomx::deallocDataset; + dataset->memory = (uint8_t*)randomx::OneGbPageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE); + } + else if (flags & RANDOMX_FLAG_LARGE_PAGES) { dataset->dealloc = &randomx::deallocDataset; dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE); } diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index ea3bb0990..95bfdbf47 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -48,6 +48,7 @@ enum randomx_flags { RANDOMX_FLAG_HARD_AES = 2, RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_JIT = 8, + RANDOMX_FLAG_1GB_PAGES = 16, }; diff --git a/src/crypto/randomx/virtual_memory.cpp b/src/crypto/randomx/virtual_memory.cpp index 06165ffb6..48a8a8d25 100644 --- a/src/crypto/randomx/virtual_memory.cpp +++ b/src/crypto/randomx/virtual_memory.cpp @@ -53,6 +53,16 @@ void* allocLargePagesMemory(std::size_t bytes) { } +void* allocOneGbPagesMemory(std::size_t bytes) { + void* mem = xmrig::VirtualMemory::allocateOneGbPagesMemory(bytes); + if (mem == nullptr) { + throw std::runtime_error("Failed to allocate 1GB pages memory"); + } + + return mem; +} + + void freePagedMemory(void* ptr, std::size_t bytes) { xmrig::VirtualMemory::freeLargePagesMemory(ptr, bytes); } diff --git a/src/crypto/randomx/virtual_memory.hpp b/src/crypto/randomx/virtual_memory.hpp index d3b31db12..8c5b49005 100644 --- a/src/crypto/randomx/virtual_memory.hpp +++ b/src/crypto/randomx/virtual_memory.hpp @@ -32,4 +32,5 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void* allocExecutableMemory(std::size_t); void* allocLargePagesMemory(std::size_t); +void* allocOneGbPagesMemory(std::size_t); void freePagedMemory(void*, std::size_t); diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index b08f844e3..a5767352a 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -60,7 +60,7 @@ const char *xmrig::rx_tag() } -bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages) +bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages) { if (job.algorithm().family() != Algorithm::RANDOM_X) { return true; @@ -70,7 +70,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages) return true; } - d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, config.mode()); + d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, oneGbPages, config.mode()); return false; } diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index 4a81f5d5b..74a7ef595 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -46,7 +46,7 @@ class RxDataset; class Rx { public: - static bool init(const Job &job, const RxConfig &config, bool hugePages); + static bool init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages); static bool isReady(const Job &job); static RxDataset *dataset(const Job &job, uint32_t nodeId); static std::pair hugePages(); diff --git a/src/crypto/rx/RxBasicStorage.cpp b/src/crypto/rx/RxBasicStorage.cpp index bc1ceb2d3..67ede47f9 100644 --- a/src/crypto/rx/RxBasicStorage.cpp +++ b/src/crypto/rx/RxBasicStorage.cpp @@ -69,11 +69,11 @@ public: } - inline void createDataset(bool hugePages, RxConfig::Mode mode) + inline void createDataset(bool hugePages, bool oneGbPages, RxConfig::Mode mode) { const uint64_t ts = Chrono::steadyMSecs(); - m_dataset = new RxDataset(hugePages, true, mode); + m_dataset = new RxDataset(hugePages, oneGbPages, true, mode); printAllocStatus(ts); } @@ -157,12 +157,12 @@ std::pair xmrig::RxBasicStorage::hugePages() const } -void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) +void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) { d_ptr->setSeed(seed); if (!d_ptr->dataset()) { - d_ptr->createDataset(hugePages, mode); + d_ptr->createDataset(hugePages, oneGbPages, mode); } d_ptr->initDataset(threads); diff --git a/src/crypto/rx/RxBasicStorage.h b/src/crypto/rx/RxBasicStorage.h index bd6575d22..edabff658 100644 --- a/src/crypto/rx/RxBasicStorage.h +++ b/src/crypto/rx/RxBasicStorage.h @@ -50,7 +50,7 @@ public: protected: RxDataset *dataset(const Job &job, uint32_t nodeId) const override; std::pair hugePages() const override; - void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) override; + void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) override; private: RxBasicStoragePrivate *d_ptr; diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index ce41a58ea..2b387e3c6 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -29,7 +29,6 @@ #include "backend/common/Tags.h" #include "base/io/log/Log.h" #include "crypto/common/VirtualMemory.h" -#include "crypto/randomx/randomx.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" @@ -41,10 +40,10 @@ static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mismatch"); -xmrig::RxDataset::RxDataset(bool hugePages, bool cache, RxConfig::Mode mode) : +xmrig::RxDataset::RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode) : m_mode(mode) { - allocate(hugePages); + allocate(hugePages, oneGbPages); if (cache) { m_cache = new RxCache(hugePages); @@ -123,11 +122,13 @@ size_t xmrig::RxDataset::size(bool cache) const std::pair xmrig::RxDataset::hugePages(bool cache) const { constexpr size_t twoMiB = 2U * 1024U * 1024U; + constexpr size_t oneGiB = 1024U * 1024U * 1024U; constexpr size_t cacheSize = VirtualMemory::align(RxCache::maxSize(), twoMiB) / twoMiB; - size_t total = VirtualMemory::align(maxSize(), twoMiB) / twoMiB; + size_t datasetPageSize = isOneGbPages() ? oneGiB : twoMiB; + size_t total = VirtualMemory::align(maxSize(), datasetPageSize) / datasetPageSize; uint32_t count = 0; - if (isHugePages()) { + if (isHugePages() || isOneGbPages()) { count += total; } @@ -159,7 +160,7 @@ void xmrig::RxDataset::setRaw(const void *raw) } -void xmrig::RxDataset::allocate(bool hugePages) +void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages) { if (m_mode == RxConfig::LightMode) { LOG_ERR(CLEAR "%s" RED_BOLD_S "fast RandomX mode disabled by config", rx_tag()); @@ -174,8 +175,14 @@ void xmrig::RxDataset::allocate(bool hugePages) } if (hugePages) { - m_flags = RANDOMX_FLAG_LARGE_PAGES; + m_flags = oneGbPages ? RANDOMX_FLAG_1GB_PAGES : RANDOMX_FLAG_LARGE_PAGES; m_dataset = randomx_alloc_dataset(static_cast(m_flags)); + + if (oneGbPages && !m_dataset) { + LOG_ERR(CLEAR "%s" RED_BOLD_S "Failed to allocate RandomX dataset using 1GB pages", rx_tag()); + m_flags = RANDOMX_FLAG_LARGE_PAGES; + m_dataset = randomx_alloc_dataset(static_cast(m_flags)); + } } if (!m_dataset) { diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index 304559561..0dba79bb8 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -31,6 +31,7 @@ #include "base/tools/Object.h" #include "crypto/common/Algorithm.h" #include "crypto/randomx/configuration.h" +#include "crypto/randomx/randomx.h" #include "crypto/rx/RxConfig.h" @@ -50,11 +51,12 @@ class RxDataset public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxDataset) - RxDataset(bool hugePages, bool cache, RxConfig::Mode mode); + RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode); RxDataset(RxCache *cache); ~RxDataset(); - inline bool isHugePages() const { return m_flags & 1; } + inline bool isHugePages() const { return m_flags & RANDOMX_FLAG_LARGE_PAGES; } + inline bool isOneGbPages() const { return m_flags & RANDOMX_FLAG_1GB_PAGES; } inline randomx_dataset *get() const { return m_dataset; } inline RxCache *cache() const { return m_cache; } inline void setCache(RxCache *cache) { m_cache = cache; } @@ -68,7 +70,7 @@ public: static inline constexpr size_t maxSize() { return RANDOMX_DATASET_MAX_SIZE; } private: - void allocate(bool hugePages); + void allocate(bool hugePages, bool oneGbPages); const RxConfig::Mode m_mode = RxConfig::FastMode; int m_flags = 0; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index 646d3d954..c99750681 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -120,12 +120,12 @@ public: } - inline void createDatasets(bool hugePages) + inline void createDatasets(bool hugePages, bool oneGbPages) { const uint64_t ts = Chrono::steadyMSecs(); for (uint32_t node : m_nodeset) { - m_threads.emplace_back(allocate, this, node, hugePages); + m_threads.emplace_back(allocate, this, node, hugePages, oneGbPages); } join(); @@ -188,7 +188,7 @@ public: private: - static void allocate(RxNUMAStoragePrivate *d_ptr, uint32_t nodeId, bool hugePages) + static void allocate(RxNUMAStoragePrivate *d_ptr, uint32_t nodeId, bool hugePages, bool oneGbPages) { const uint64_t ts = Chrono::steadyMSecs(); @@ -198,7 +198,7 @@ private: return; } - auto dataset = new RxDataset(hugePages, false, RxConfig::FastMode); + auto dataset = new RxDataset(hugePages, oneGbPages, false, RxConfig::FastMode); if (!dataset->get()) { printSkipped(nodeId, "failed to allocate dataset"); @@ -346,12 +346,12 @@ std::pair xmrig::RxNUMAStorage::hugePages() const } -void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode) +void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode) { d_ptr->setSeed(seed); if (!d_ptr->isAllocated()) { - d_ptr->createDatasets(hugePages); + d_ptr->createDatasets(hugePages, oneGbPages); } d_ptr->initDatasets(threads); diff --git a/src/crypto/rx/RxNUMAStorage.h b/src/crypto/rx/RxNUMAStorage.h index bed0bc75e..c3d77000a 100644 --- a/src/crypto/rx/RxNUMAStorage.h +++ b/src/crypto/rx/RxNUMAStorage.h @@ -53,7 +53,7 @@ public: protected: RxDataset *dataset(const Job &job, uint32_t nodeId) const override; std::pair hugePages() const override; - void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) override; + void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) override; private: RxNUMAStoragePrivate *d_ptr; diff --git a/src/crypto/rx/RxQueue.cpp b/src/crypto/rx/RxQueue.cpp index b420d0c2d..286538f15 100644 --- a/src/crypto/rx/RxQueue.cpp +++ b/src/crypto/rx/RxQueue.cpp @@ -94,7 +94,7 @@ std::pair xmrig::RxQueue::hugePages() } -void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode) +void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) { std::unique_lock lock(m_mutex); @@ -114,7 +114,7 @@ void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &no return; } - m_queue.emplace_back(seed, nodeset, threads, hugePages, mode); + m_queue.emplace_back(seed, nodeset, threads, hugePages, oneGbPages, mode); m_seed = seed; m_state = STATE_PENDING; @@ -156,7 +156,7 @@ void xmrig::RxQueue::backgroundInit() Buffer::toHex(item.seed.data().data(), 8).data() ); - m_storage->init(item.seed, item.threads, item.hugePages, item.mode); + m_storage->init(item.seed, item.threads, item.hugePages, item.oneGbPages, item.mode); lock = std::unique_lock(m_mutex); diff --git a/src/crypto/rx/RxQueue.h b/src/crypto/rx/RxQueue.h index 2d77e2e3c..6d8fa3281 100644 --- a/src/crypto/rx/RxQueue.h +++ b/src/crypto/rx/RxQueue.h @@ -53,8 +53,9 @@ class RxDataset; class RxQueueItem { public: - RxQueueItem(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode) : + RxQueueItem(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) : hugePages(hugePages), + oneGbPages(oneGbPages), mode(mode), seed(seed), nodeset(nodeset), @@ -62,6 +63,7 @@ public: {} const bool hugePages; + const bool oneGbPages; const RxConfig::Mode mode; const RxSeed seed; const std::vector nodeset; @@ -80,7 +82,7 @@ public: bool isReady(const Job &job); RxDataset *dataset(const Job &job, uint32_t nodeId); std::pair hugePages(); - void enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode); + void enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode); private: enum State { From b8762ed428cfac4903f880b260000b274d2c1bd4 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 6 Dec 2019 10:56:43 +0700 Subject: [PATCH 05/31] #1306 Added some network workarounds. --- src/base/net/stratum/Client.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index 9729f3fbc..f9c56e05a 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -338,6 +338,10 @@ bool xmrig::Client::isCriticalError(const char *message) return true; } + if (strncasecmp(message, "Invalid job id", 14) == 0) { + return true; + } + return false; } @@ -558,7 +562,7 @@ void xmrig::Client::connect(sockaddr *addr) { setState(ConnectingState); - uv_connect_t *req = new uv_connect_t; + auto req = new uv_connect_t; req->data = m_storage.ptr(m_key); m_socket = new uv_tcp_t; @@ -799,7 +803,7 @@ void xmrig::Client::ping() void xmrig::Client::read(ssize_t nread) { - const size_t size = static_cast(nread); + const auto size = static_cast(nread); if (nread > 0 && size > m_recvBuf.available()) { nread = UV_ENOBUFS; @@ -859,7 +863,7 @@ void xmrig::Client::reconnect() void xmrig::Client::setState(SocketState state) { - LOG_DEBUG("[%s] state: \"%s\"", url(), states[state]); + LOG_DEBUG("[%s] state: \"%s\" -> \"%s\"", url(), states[m_state], states[state]); if (m_state == state) { return; @@ -956,6 +960,12 @@ void xmrig::Client::onConnect(uv_connect_t *req, int status) return; } + if (client->state() == ConnectedState) { + LOG_ERR("[%s] already connected"); + + return; + } + client->m_stream = static_cast(req->handle); client->m_stream->data = req->data; client->setState(ConnectedState); From f1827e925ec46db5e6f04e563659b8c6d4042b69 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 6 Dec 2019 11:56:13 +0700 Subject: [PATCH 06/31] Removed strdup from FileLog. --- src/base/io/log/backends/FileLog.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/base/io/log/backends/FileLog.cpp b/src/base/io/log/backends/FileLog.cpp index 1ff016371..c581b880d 100644 --- a/src/base/io/log/backends/FileLog.cpp +++ b/src/base/io/log/backends/FileLog.cpp @@ -24,13 +24,14 @@ */ -#include -#include - - #include "base/io/log/backends/FileLog.h" +#include +#include +#include + + xmrig::FileLog::FileLog(const char *fileName) { uv_fs_t req; @@ -45,13 +46,12 @@ void xmrig::FileLog::print(int, const char *line, size_t, size_t size, bool colo return; } -# ifdef _WIN32 - uv_buf_t buf = uv_buf_init(strdup(line), static_cast(size)); -# else - uv_buf_t buf = uv_buf_init(strdup(line), size); -# endif + assert(strlen(line) == size); - uv_fs_t *req = new uv_fs_t; + uv_buf_t buf = uv_buf_init(new char[size], size); + memcpy(buf.base, line, size); + + auto req = new uv_fs_t; req->data = buf.base; uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, -1, FileLog::onWrite); From 118b2e4a68070109e1c84e944753a5a4a81f1afc Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 6 Dec 2019 18:39:14 +0700 Subject: [PATCH 07/31] Updated libuv version in build_deps.sh. --- scripts/build_deps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build_deps.sh b/scripts/build_deps.sh index d89483232..7c4fc3dad 100755 --- a/scripts/build_deps.sh +++ b/scripts/build_deps.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -UV_VERSION="1.33.1" +UV_VERSION="1.34.0" OPENSSL_VERSION="1.1.1d" HWLOC_VERSION="2.1.0" From aa3dc7543430890de4d12c574a52f7b4737a0dd9 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 6 Dec 2019 13:43:59 +0100 Subject: [PATCH 08/31] Fix ARM compilation --- src/backend/cpu/platform/BasicCpuInfo_arm.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp index e52bdf942..5dd54fe92 100644 --- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp @@ -39,7 +39,8 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_brand(), m_threads(std::thread::hardware_concurrency()), m_aes(false), - m_avx2(false) + m_avx2(false), + m_pdpe1gb(false) { # ifdef XMRIG_ARMv8 memcpy(m_brand, "ARMv8", 5); From e3422979d1c56a93b8d8b44df77db6ca1d7adbe6 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 6 Dec 2019 13:55:33 +0100 Subject: [PATCH 09/31] Fixed compilation on systems without 1GB pages support --- src/crypto/common/VirtualMemory_unix.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 2e6eed304..a7790aaf6 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -82,6 +82,8 @@ void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) constexpr int flag_1gb = MAP_HUGE_1GB; # elif defined(MAP_HUGE_SHIFT) constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT); +# else + constexpr int flag_1gb = 0; # endif void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0); From 3a75f39935815a5cefc1c376fb706cf039acc963 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 6 Dec 2019 22:17:04 +0700 Subject: [PATCH 10/31] #1386 Added priority for RandomX dataset initialization threads. --- src/backend/common/interfaces/IRxStorage.h | 6 +++--- src/core/Miner.cpp | 4 +++- src/crypto/rx/Rx.cpp | 4 ++-- src/crypto/rx/Rx.h | 2 +- src/crypto/rx/RxBasicStorage.cpp | 8 ++++---- src/crypto/rx/RxBasicStorage.h | 2 +- src/crypto/rx/RxDataset.cpp | 21 ++++++++++++++++++--- src/crypto/rx/RxDataset.h | 2 +- src/crypto/rx/RxNUMAStorage.cpp | 8 ++++---- src/crypto/rx/RxNUMAStorage.h | 2 +- src/crypto/rx/RxQueue.cpp | 6 +++--- src/crypto/rx/RxQueue.h | 6 ++++-- 12 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/backend/common/interfaces/IRxStorage.h b/src/backend/common/interfaces/IRxStorage.h index f64850f77..372eee6f9 100644 --- a/src/backend/common/interfaces/IRxStorage.h +++ b/src/backend/common/interfaces/IRxStorage.h @@ -44,9 +44,9 @@ class IRxStorage public: virtual ~IRxStorage() = default; - virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; - virtual std::pair hugePages() const = 0; - virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) = 0; + virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; + virtual std::pair hugePages() const = 0; + virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) = 0; }; diff --git a/src/core/Miner.cpp b/src/core/Miner.cpp index 4e2f24bac..d9b734e5d 100644 --- a/src/core/Miner.cpp +++ b/src/core/Miner.cpp @@ -236,7 +236,9 @@ public: # ifdef XMRIG_ALGO_RANDOMX inline bool initRX() { - return Rx::init(job, controller->config()->rx(), controller->config()->cpu().isHugePages(), controller->config()->cpu().isOneGbPages()); + const auto &cpu = controller->config()->cpu(); + + return Rx::init(job, controller->config()->rx(), cpu.isHugePages(), cpu.isOneGbPages(), cpu.priority()); } # endif diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index a5767352a..7e99efa90 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -60,7 +60,7 @@ const char *xmrig::rx_tag() } -bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages) +bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages, int priority) { if (job.algorithm().family() != Algorithm::RANDOM_X) { return true; @@ -70,7 +70,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, boo return true; } - d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, oneGbPages, config.mode()); + d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, oneGbPages, config.mode(), priority); return false; } diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index 74a7ef595..6b1db8139 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -46,7 +46,7 @@ class RxDataset; class Rx { public: - static bool init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages); + static bool init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages, int priority); static bool isReady(const Job &job); static RxDataset *dataset(const Job &job, uint32_t nodeId); static std::pair hugePages(); diff --git a/src/crypto/rx/RxBasicStorage.cpp b/src/crypto/rx/RxBasicStorage.cpp index 67ede47f9..a0656e46b 100644 --- a/src/crypto/rx/RxBasicStorage.cpp +++ b/src/crypto/rx/RxBasicStorage.cpp @@ -78,11 +78,11 @@ public: } - inline void initDataset(uint32_t threads) + inline void initDataset(uint32_t threads, int priority) { const uint64_t ts = Chrono::steadyMSecs(); - m_dataset->init(m_seed.data(), threads); + m_dataset->init(m_seed.data(), threads, priority); LOG_INFO("%s" GREEN_BOLD("dataset ready") BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); @@ -157,7 +157,7 @@ std::pair xmrig::RxBasicStorage::hugePages() const } -void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) +void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) { d_ptr->setSeed(seed); @@ -165,5 +165,5 @@ void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool huge d_ptr->createDataset(hugePages, oneGbPages, mode); } - d_ptr->initDataset(threads); + d_ptr->initDataset(threads, priority); } diff --git a/src/crypto/rx/RxBasicStorage.h b/src/crypto/rx/RxBasicStorage.h index edabff658..bd8a5e00d 100644 --- a/src/crypto/rx/RxBasicStorage.h +++ b/src/crypto/rx/RxBasicStorage.h @@ -50,7 +50,7 @@ public: protected: RxDataset *dataset(const Job &job, uint32_t nodeId) const override; std::pair hugePages() const override; - void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) override; + void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) override; private: RxBasicStoragePrivate *d_ptr; diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index 2b387e3c6..5786926a3 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -28,6 +28,7 @@ #include "crypto/rx/RxDataset.h" #include "backend/common/Tags.h" #include "base/io/log/Log.h" +#include "base/kernel/Platform.h" #include "crypto/common/VirtualMemory.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" @@ -40,6 +41,20 @@ static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mismatch"); +namespace xmrig { + + +static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount, int priority) +{ + Platform::setThreadPriority(priority); + + randomx_init_dataset(dataset, cache, startItem, itemCount); +} + + +} // namespace xmrig + + xmrig::RxDataset::RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode) : m_mode(mode) { @@ -67,7 +82,7 @@ xmrig::RxDataset::~RxDataset() } -bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads) +bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads, int priority) { if (!m_cache) { return false; @@ -88,7 +103,7 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads) for (uint64_t i = 0; i < numThreads; ++i) { const uint32_t a = (datasetItemCount * i) / numThreads; const uint32_t b = (datasetItemCount * (i + 1)) / numThreads; - threads.emplace_back(randomx_init_dataset, m_dataset, m_cache->get(), a, b - a); + threads.emplace_back(init_dataset_wrapper, m_dataset, m_cache->get(), a, b - a, priority); } for (uint32_t i = 0; i < numThreads; ++i) { @@ -96,7 +111,7 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads) } } else { - randomx_init_dataset(m_dataset, m_cache->get(), 0, datasetItemCount); + init_dataset_wrapper(m_dataset, m_cache->get(), 0, datasetItemCount, priority); } return true; diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index 0dba79bb8..ec9c7c8a6 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -61,7 +61,7 @@ public: inline RxCache *cache() const { return m_cache; } inline void setCache(RxCache *cache) { m_cache = cache; } - bool init(const Buffer &seed, uint32_t numThreads); + bool init(const Buffer &seed, uint32_t numThreads, int priority); size_t size(bool cache = true) const; std::pair hugePages(bool cache = true) const; void *raw() const; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index c99750681..a6d191481 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -148,13 +148,13 @@ public: } - inline void initDatasets(uint32_t threads) + inline void initDatasets(uint32_t threads, int priority) { uint64_t ts = Chrono::steadyMSecs(); auto id = m_nodeset.front(); auto primary = dataset(id); - primary->init(m_seed.data(), threads); + primary->init(m_seed.data(), threads, priority); printDatasetReady(id, ts); @@ -346,7 +346,7 @@ std::pair xmrig::RxNUMAStorage::hugePages() const } -void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode) +void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode, int priority) { d_ptr->setSeed(seed); @@ -354,5 +354,5 @@ void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugeP d_ptr->createDatasets(hugePages, oneGbPages); } - d_ptr->initDatasets(threads); + d_ptr->initDatasets(threads, priority); } diff --git a/src/crypto/rx/RxNUMAStorage.h b/src/crypto/rx/RxNUMAStorage.h index c3d77000a..e7ea842fb 100644 --- a/src/crypto/rx/RxNUMAStorage.h +++ b/src/crypto/rx/RxNUMAStorage.h @@ -53,7 +53,7 @@ public: protected: RxDataset *dataset(const Job &job, uint32_t nodeId) const override; std::pair hugePages() const override; - void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) override; + void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) override; private: RxNUMAStoragePrivate *d_ptr; diff --git a/src/crypto/rx/RxQueue.cpp b/src/crypto/rx/RxQueue.cpp index 286538f15..8ca4a3791 100644 --- a/src/crypto/rx/RxQueue.cpp +++ b/src/crypto/rx/RxQueue.cpp @@ -94,7 +94,7 @@ std::pair xmrig::RxQueue::hugePages() } -void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) +void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) { std::unique_lock lock(m_mutex); @@ -114,7 +114,7 @@ void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector &no return; } - m_queue.emplace_back(seed, nodeset, threads, hugePages, oneGbPages, mode); + m_queue.emplace_back(seed, nodeset, threads, hugePages, oneGbPages, mode, priority); m_seed = seed; m_state = STATE_PENDING; @@ -156,7 +156,7 @@ void xmrig::RxQueue::backgroundInit() Buffer::toHex(item.seed.data().data(), 8).data() ); - m_storage->init(item.seed, item.threads, item.hugePages, item.oneGbPages, item.mode); + m_storage->init(item.seed, item.threads, item.hugePages, item.oneGbPages, item.mode, item.priority); lock = std::unique_lock(m_mutex); diff --git a/src/crypto/rx/RxQueue.h b/src/crypto/rx/RxQueue.h index 6d8fa3281..8a362b8ed 100644 --- a/src/crypto/rx/RxQueue.h +++ b/src/crypto/rx/RxQueue.h @@ -53,9 +53,10 @@ class RxDataset; class RxQueueItem { public: - RxQueueItem(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode) : + RxQueueItem(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) : hugePages(hugePages), oneGbPages(oneGbPages), + priority(priority), mode(mode), seed(seed), nodeset(nodeset), @@ -64,6 +65,7 @@ public: const bool hugePages; const bool oneGbPages; + const int priority; const RxConfig::Mode mode; const RxSeed seed; const std::vector nodeset; @@ -82,7 +84,7 @@ public: bool isReady(const Job &job); RxDataset *dataset(const Job &job, uint32_t nodeId); std::pair hugePages(); - void enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode); + void enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority); private: enum State { From e9e747f0d19a39e6637393ce82aca83fa175ee4d Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 7 Dec 2019 22:18:06 +0700 Subject: [PATCH 11/31] #1385 "max-threads-hint" option now also limit RandomX dataset initialization threads. --- src/backend/cpu/CpuConfig.h | 1 + src/backend/cpu/platform/BasicCpuInfo.cpp | 2 +- src/backend/cpu/platform/HwlocCpuInfo.cpp | 2 +- src/core/Miner.cpp | 7 +------ src/crypto/rx/Rx.cpp | 5 +++-- src/crypto/rx/Rx.h | 3 ++- src/crypto/rx/RxConfig.cpp | 13 +++++++++++-- src/crypto/rx/RxConfig.h | 2 +- 8 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/backend/cpu/CpuConfig.h b/src/backend/cpu/CpuConfig.h index 74894dcd3..035a1673d 100644 --- a/src/backend/cpu/CpuConfig.h +++ b/src/backend/cpu/CpuConfig.h @@ -61,6 +61,7 @@ public: inline const String &argon2Impl() const { return m_argon2Impl; } inline const Threads &threads() const { return m_threads; } inline int priority() const { return m_priority; } + inline uint32_t limit() const { return m_limit; } private: void generate(); diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index cdc810c30..e6145278c 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -23,7 +23,7 @@ */ #include -#include +#include #include diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index 3983e8b02..a66bf9fdd 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -262,7 +262,7 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint3 void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const { - constexpr size_t oneMiB = 1024u * 1024u; + constexpr size_t oneMiB = 1024U * 1024U; size_t PUs = countByType(cache, HWLOC_OBJ_PU); if (PUs == 0) { diff --git a/src/core/Miner.cpp b/src/core/Miner.cpp index d9b734e5d..ed988f267 100644 --- a/src/core/Miner.cpp +++ b/src/core/Miner.cpp @@ -234,12 +234,7 @@ public: # ifdef XMRIG_ALGO_RANDOMX - inline bool initRX() - { - const auto &cpu = controller->config()->cpu(); - - return Rx::init(job, controller->config()->rx(), cpu.isHugePages(), cpu.isOneGbPages(), cpu.priority()); - } + inline bool initRX() { return Rx::init(job, controller->config()->rx(), controller->config()->cpu()); } # endif diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 7e99efa90..84c9b5fc9 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -27,6 +27,7 @@ #include "crypto/rx/Rx.h" #include "backend/common/Tags.h" +#include "backend/cpu/CpuConfig.h" #include "base/io/log/Log.h" #include "crypto/rx/RxConfig.h" #include "crypto/rx/RxQueue.h" @@ -60,7 +61,7 @@ const char *xmrig::rx_tag() } -bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages, int priority) +bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cpu) { if (job.algorithm().family() != Algorithm::RANDOM_X) { return true; @@ -70,7 +71,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages, boo return true; } - d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, oneGbPages, config.mode(), priority); + d_ptr->queue.enqueue(job, config.nodeset(), config.threads(cpu.limit()), cpu.isHugePages(), cpu.isOneGbPages(), config.mode(), cpu.priority()); return false; } diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index 6b1db8139..abaca9200 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -37,6 +37,7 @@ namespace xmrig class Algorithm; +class CpuConfig; class IRxListener; class Job; class RxConfig; @@ -46,7 +47,7 @@ class RxDataset; class Rx { public: - static bool init(const Job &job, const RxConfig &config, bool hugePages, bool oneGbPages, int priority); + static bool init(const Job &job, const RxConfig &config, const CpuConfig &cpu); static bool isReady(const Job &job); static RxDataset *dataset(const Job &job, uint32_t nodeId); static std::pair hugePages(); diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index ef823e60e..7ae7d35d8 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -30,6 +30,7 @@ #include #include +#include #ifdef _MSC_VER @@ -52,9 +53,17 @@ const char *xmrig::RxConfig::modeName() const } -uint32_t xmrig::RxConfig::threads() const +uint32_t xmrig::RxConfig::threads(uint32_t limit) const { - return m_threads < 1 ? static_cast(Cpu::info()->threads()) : static_cast(m_threads); + if (m_threads > 0) { + return m_threads; + } + + if (limit < 100) { + return std::max(static_cast(round(Cpu::info()->threads() * (limit / 100.0))), 1U); + } + + return Cpu::info()->threads(); } diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index 13ebb9d15..e3381fec6 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -55,7 +55,7 @@ public: # endif const char *modeName() const; - uint32_t threads() const; + uint32_t threads(uint32_t limit = 100) const; inline Mode mode() const { return m_mode; } From 8ef3e2ec14a464c0905fa54b5b83f956da11e448 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 10:20:23 +0700 Subject: [PATCH 12/31] Fixed build without hwloc. --- src/backend/cpu/platform/AdvancedCpuInfo.cpp | 46 +++++++++++++++++--- src/backend/cpu/platform/AdvancedCpuInfo.h | 6 ++- src/backend/cpu/platform/BasicCpuInfo.cpp | 4 +- 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp index 5cae55e28..37a958dbc 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp @@ -22,6 +22,17 @@ * along with this program. If not, see . */ +#include "backend/cpu/platform/AdvancedCpuInfo.h" +#include "3rdparty/libcpuid/libcpuid.h" + + +#ifdef _MSC_VER +# include +#else +# include +#endif + + #include #include #include @@ -29,10 +40,6 @@ #include -#include "3rdparty/libcpuid/libcpuid.h" -#include "backend/cpu/platform/AdvancedCpuInfo.h" - - namespace xmrig { @@ -54,11 +61,38 @@ static inline void cpu_brand_string(char out[64], const char *in) { } +static inline void cpuid(uint32_t level, int32_t output[4]) +{ + memset(output, 0, sizeof(int32_t) * 4); + +# ifdef _MSC_VER + __cpuid(output, static_cast(level)); +# else + __cpuid_count(level, 0, output[0], output[1], output[2], output[3]); +# endif +} + + +static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit) +{ + int32_t cpu_info[4] = { 0 }; + cpuid(level, cpu_info); + + return (cpu_info[reg] & bit) != 0; +} + + +static inline bool has_pdpe1gb() +{ + return has_feature(0x80000001, 3, 1 << 26); +} + + } // namespace xmrig xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : - m_brand() + m_pdpe1gb(has_pdpe1gb()) { struct cpu_raw_data_t raw = {}; struct cpu_id_t data = {}; @@ -74,7 +108,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : m_cores = static_cast(data.num_cores) * m_packages; m_L3 = data.l3_cache > 0 ? static_cast(data.l3_cache) * m_packages : 0; - const size_t l2 = static_cast(data.l2_cache); + const auto l2 = static_cast(data.l2_cache); // Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h index e2909a91d..f590ccfb3 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.h +++ b/src/backend/cpu/platform/AdvancedCpuInfo.h @@ -43,6 +43,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *backend() const override { return m_backend; } inline const char *brand() const override { return m_brand; } inline size_t cores() const override { return m_cores; } @@ -57,8 +58,9 @@ private: bool m_aes = false; bool m_avx2 = false; bool m_L2_exclusive = false; - char m_backend[32]; - char m_brand[64 + 5]; + char m_backend[32]{}; + char m_brand[64 + 5]{}; + const bool m_pdpe1gb = false; size_t m_cores = 0; size_t m_L2 = 0; size_t m_L3 = 0; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index e6145278c..e2ad19eac 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -113,7 +113,7 @@ static void cpu_brand_string(char out[64 + 6]) { } -static bool has_feature(uint32_t level, uint32_t reg, int32_t bit) +static inline bool has_feature(uint32_t level, uint32_t reg, int32_t bit) { int32_t cpu_info[4] = { 0 }; cpuid(level, cpu_info); @@ -191,7 +191,7 @@ const char *xmrig::BasicCpuInfo::backend() const } -xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const +xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const { const size_t count = std::thread::hardware_concurrency(); From 91b50f1ac894bb66564e48cb28334588c616db51 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 13:30:26 +0700 Subject: [PATCH 13/31] Added os.cmake --- CMakeLists.txt | 30 +++++++++++------------------- cmake/os.cmake | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 cmake/os.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 849c1257e..e507a03c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") include (CheckIncludeFile) include (cmake/cpu.cmake) +include (cmake/os.cmake) include (src/base/base.cmake) include (src/backend/backend.cmake) @@ -131,40 +132,31 @@ if (WITH_HWLOC) ) endif() -if (WIN32) - set(SOURCES_OS - "${SOURCES_OS}" +if (XMRIG_OS_WIN) + list(APPEND SOURCES_OS res/app.rc src/App_win.cpp src/crypto/common/VirtualMemory_win.cpp ) - add_definitions(/DWIN32) set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) -elseif (APPLE) - set(SOURCES_OS - "${SOURCES_OS}" +elseif (XMRIG_OS_APPLE) + list(APPEND SOURCES_OS src/App_unix.cpp src/crypto/common/VirtualMemory_unix.cpp ) else() - set(SOURCES_OS - "${SOURCES_OS}" + list(APPEND SOURCES_OS src/App_unix.cpp src/crypto/common/VirtualMemory_unix.cpp ) - if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD) - set(EXTRA_LIBS kvm pthread) - else() + if (XMRIG_OS_ANDROID) + set(EXTRA_LIBS pthread rt dl log) + elseif (XMRIG_OS_LINUX) set(EXTRA_LIBS pthread rt dl) - endif() -endif() - -if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android") - EXECUTE_PROCESS(COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM) - if (OPERATING_SYSTEM MATCHES "Android") - set(EXTRA_LIBS ${EXTRA_LIBS} log) + elseif (XMRIG_OS_FREEBSD) + set(EXTRA_LIBS kvm pthread) endif() endif() diff --git a/cmake/os.cmake b/cmake/os.cmake new file mode 100644 index 000000000..0270cc930 --- /dev/null +++ b/cmake/os.cmake @@ -0,0 +1,45 @@ +if (WIN32) + set(XMRIG_OS_WIN ON) +elseif (APPLE) + set(XMRIG_OS_APPLE ON) + + if (IOS OR CMAKE_SYSTEM_NAME STREQUAL iOS) + set(XMRIG_OS_IOS ON) + else() + set(XMRIG_OS_MACOS ON) + endif() +else() + set(XMRIG_OS_UNIX ON) + + if (ANDROID OR CMAKE_SYSTEM_NAME MATCHES "Android") + set(XMRIG_OS_ANDROID ON) + elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(XMRIG_OS_LINUX ON) + elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD) + set(XMRIG_OS_FREEBSD ON) + endif() +endif() + + +if (XMRIG_OS_WIN) + add_definitions(/DWIN32) + add_definitions(/DXMRIG_OS_WIN) +elseif(XMRIG_OS_APPLE) + add_definitions(/DXMRIG_OS_APPLE) + + if (XMRIG_OS_IOS) + add_definitions(/DXMRIG_OS_IOS) + else() + add_definitions(/DXMRIG_OS_MACOS) + endif() +elseif(XMRIG_OS_UNIX) + add_definitions(/DXMRIG_OS_UNIX) + + if (XMRIG_OS_ANDROID) + add_definitions(/DXMRIG_OS_ANDROID) + elseif (XMRIG_OS_LINUX) + add_definitions(/DXMRIG_OS_LINUX) + elseif (XMRIG_OS_FREEBSD) + add_definitions(/DXMRIG_OS_FREEBSD) + endif() +endif() From 86e25a13e3671a801ff6e5c9d8804d561d98be9e Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 14:21:28 +0700 Subject: [PATCH 14/31] New summary information about 1GB pages. --- src/Summary.cpp | 14 ++++---- src/crypto/common/VirtualMemory.h | 1 + src/crypto/common/VirtualMemory_unix.cpp | 45 ++++++++++++++++-------- src/crypto/common/VirtualMemory_win.cpp | 8 ++++- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/src/Summary.cpp b/src/Summary.cpp index db8ce9fa3..b925de305 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -60,26 +60,28 @@ inline static const char *asmName(Assembly::Id assembly) static void print_memory(Config *config) { -# ifdef _WIN32 +# ifdef XMRIG_OS_WIN Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable")); +# else + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled")); Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "1GB PAGES", Cpu::info()->hasOneGbPages() ? GREEN_BOLD("available on Linux") : RED_BOLD("unavailable")); + "1GB PAGES", config->cpu().isOneGbPages() ? (VirtualMemory::isOneGbPagesAvailable() ? GREEN_BOLD("supported") : YELLOW_BOLD("unavailable")) : YELLOW_BOLD("disabled")); # endif } static void print_cpu(Config *) { - const ICpuInfo *info = Cpu::info(); + const auto info = Cpu::info(); - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES %sPDPE1GB", + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES", "CPU", info->brand(), info->packages(), info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-", - info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-", - info->hasOneGbPages() ? GREEN_BOLD_S : RED_BOLD_S "-" + info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-" ); # if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC) Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB") diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h index 90ecdd691..e5101f525 100644 --- a/src/crypto/common/VirtualMemory.h +++ b/src/crypto/common/VirtualMemory.h @@ -58,6 +58,7 @@ public: } static bool isHugepagesAvailable(); + static bool isOneGbPagesAvailable(); static uint32_t bindToNUMANode(int64_t affinity); static void *allocateExecutableMemory(size_t size); static void *allocateLargePagesMemory(size_t size); diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index a7790aaf6..391351714 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -29,6 +29,7 @@ #include +#include "backend/cpu/Cpu.h" #include "crypto/common/portable/mm_malloc.h" #include "crypto/common/VirtualMemory.h" @@ -38,12 +39,27 @@ #endif +#if defined (XMRIG_OS_LINUX) && (defined(MAP_HUGE_1GB) || defined(MAP_HUGE_SHIFT)) +# define XMRIG_HAS_1GB_PAGES +#endif + + bool xmrig::VirtualMemory::isHugepagesAvailable() { return true; } +bool xmrig::VirtualMemory::isOneGbPagesAvailable() +{ +# ifdef XMRIG_HAS_1GB_PAGES + return Cpu::info()->hasOneGbPages(); +# else + return false; +# endif +} + + void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) { # if defined(__APPLE__) @@ -72,24 +88,23 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) { -# if defined(__APPLE__) - void *mem = MAP_FAILED; -# elif defined(__FreeBSD__) - void *mem = MAP_FAILED; -# else +# ifdef XMRIG_HAS_1GB_PAGES + if (isOneGbPagesAvailable()) { +# if defined(MAP_HUGE_1GB) + constexpr int flag_1gb = MAP_HUGE_1GB; +# elif defined(MAP_HUGE_SHIFT) + constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT); +# else + constexpr int flag_1gb = 0; +# endif -# if defined(MAP_HUGE_1GB) - constexpr int flag_1gb = MAP_HUGE_1GB; -# elif defined(MAP_HUGE_SHIFT) - constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT); -# else - constexpr int flag_1gb = 0; + void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0); + + return mem == MAP_FAILED ? nullptr : mem; + } # endif - void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0); -# endif - - return mem == MAP_FAILED ? nullptr : mem; + return nullptr; } diff --git a/src/crypto/common/VirtualMemory_win.cpp b/src/crypto/common/VirtualMemory_win.cpp index 70e48d3a0..370f7c07d 100644 --- a/src/crypto/common/VirtualMemory_win.cpp +++ b/src/crypto/common/VirtualMemory_win.cpp @@ -156,6 +156,12 @@ bool xmrig::VirtualMemory::isHugepagesAvailable() } +bool xmrig::VirtualMemory::isOneGbPagesAvailable() +{ + return false; +} + + void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) { return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); @@ -175,7 +181,7 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size) } -void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size) +void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t) { return nullptr; } From 4dec0634723a3f3aacbeed8e670d59f4886521b5 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 14:30:44 +0700 Subject: [PATCH 15/31] Fix summary. --- src/Summary.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Summary.cpp b/src/Summary.cpp index b925de305..020eca61f 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -59,7 +59,8 @@ inline static const char *asmName(Assembly::Id assembly) #endif -static void print_memory(Config *config) { +static void print_memory(Config *config) +{ # ifdef XMRIG_OS_WIN Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); @@ -67,7 +68,7 @@ static void print_memory(Config *config) { # else Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled")); Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "1GB PAGES", config->cpu().isOneGbPages() ? (VirtualMemory::isOneGbPagesAvailable() ? GREEN_BOLD("supported") : YELLOW_BOLD("unavailable")) : YELLOW_BOLD("disabled")); + "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->cpu().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable"))); # endif } From d0df8245990ea688b83942906a3e57051ef88758 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sun, 8 Dec 2019 16:14:02 +0100 Subject: [PATCH 16/31] Optimized dataset read for Ryzen CPUs Removed register dependency in dataset read, +0.8% speedup on average. --- src/backend/cpu/CpuWorker.cpp | 2 +- .../asm/program_read_dataset_ryzen.inc | 19 ++++++++++++++++++ src/crypto/randomx/jit_compiler_a64.cpp | 2 +- src/crypto/randomx/jit_compiler_a64.hpp | 2 +- src/crypto/randomx/jit_compiler_fallback.hpp | 2 +- src/crypto/randomx/jit_compiler_x86.cpp | 20 ++++++++++++++----- src/crypto/randomx/jit_compiler_x86.hpp | 2 +- src/crypto/randomx/jit_compiler_x86_static.S | 4 ++++ .../randomx/jit_compiler_x86_static.asm | 5 +++++ .../randomx/jit_compiler_x86_static.hpp | 1 + src/crypto/randomx/randomx.cpp | 19 +++++++++++++----- src/crypto/randomx/randomx.h | 6 +++++- src/crypto/randomx/virtual_machine.hpp | 4 ++++ src/crypto/randomx/vm_compiled.cpp | 2 +- src/crypto/rx/RxVm.cpp | 6 +++++- src/crypto/rx/RxVm.h | 3 ++- src/net/JobResults.cpp | 2 +- 17 files changed, 81 insertions(+), 20 deletions(-) create mode 100644 src/crypto/randomx/asm/program_read_dataset_ryzen.inc diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 2e223a31e..5d58106d8 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -96,7 +96,7 @@ void xmrig::CpuWorker::allocateRandomX_VM() } if (!m_vm) { - m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES); + m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly); } } #endif diff --git a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc new file mode 100644 index 000000000..37e2104bf --- /dev/null +++ b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc @@ -0,0 +1,19 @@ + mov rcx, rbp ;# ecx = ma + shr rcx, 32 + and ecx, RANDOMX_DATASET_BASE_MASK + xor rbp, rax ;# modify "mx" + mov rax, qword ptr [rdi+rcx] + mov edx, ebp ;# edx = mx + and edx, RANDOMX_DATASET_BASE_MASK + prefetchnta byte ptr [rdi+rdx] + ror rbp, 32 ;# swap "ma" and "mx" + add rcx, rdi ;# dataset cache line + xor r8, rax + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] + \ No newline at end of file diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index bf790c2b4..d291de4d3 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -118,7 +118,7 @@ static void clear_code_cache(char* p1, char* p2) # endif } -void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config) +void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t) { uint32_t codePos = MainLoopBegin + 4; diff --git a/src/crypto/randomx/jit_compiler_a64.hpp b/src/crypto/randomx/jit_compiler_a64.hpp index e524feb87..05afdc70d 100644 --- a/src/crypto/randomx/jit_compiler_a64.hpp +++ b/src/crypto/randomx/jit_compiler_a64.hpp @@ -49,7 +49,7 @@ namespace randomx { JitCompilerA64(); ~JitCompilerA64(); - void generateProgram(Program&, ProgramConfiguration&); + void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); template diff --git a/src/crypto/randomx/jit_compiler_fallback.hpp b/src/crypto/randomx/jit_compiler_fallback.hpp index bc3638589..063ae5219 100644 --- a/src/crypto/randomx/jit_compiler_fallback.hpp +++ b/src/crypto/randomx/jit_compiler_fallback.hpp @@ -44,7 +44,7 @@ namespace randomx { JitCompilerFallback() { throw std::runtime_error("JIT compilation is not supported on this platform"); } - void generateProgram(Program&, ProgramConfiguration&) { + void generateProgram(Program&, ProgramConfiguration&, uint32_t) { } void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) { diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index bfde7d002..082b9eb3e 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -89,7 +89,6 @@ namespace randomx { const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; - const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; @@ -105,7 +104,6 @@ namespace randomx { const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad; const int32_t prologueSize = codeLoopBegin - codePrologue; const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; - const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset; const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; @@ -301,10 +299,22 @@ namespace randomx { freePagedMemory(allocatedCode, CodeSize); } - void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { + void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { generateProgramPrologue(prog, pcfg); - memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize); - codePos += readDatasetSize; + + uint8_t* p; + uint32_t n; + if (flags & RANDOMX_FLAG_RYZEN) { + p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; + n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; + } + else { + p = RandomX_CurrentConfig.codeReadDatasetTweaked; + n = RandomX_CurrentConfig.codeReadDatasetTweakedSize; + } + memcpy(code + codePos, p, n); + codePos += n; + generateProgramEpilogue(prog, pcfg); } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index f1864018a..0d515b0eb 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -49,7 +49,7 @@ namespace randomx { public: JitCompilerX86(); ~JitCompilerX86(); - void generateProgram(Program&, ProgramConfiguration&); + void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); template void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector &); diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index c20cd7433..8e1f9ef63 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -45,6 +45,7 @@ .global DECL(randomx_program_loop_load) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) +.global DECL(randomx_program_read_dataset_ryzen) .global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_fin) .global DECL(randomx_program_loop_store) @@ -110,6 +111,9 @@ DECL(randomx_program_start): DECL(randomx_program_read_dataset): #include "asm/program_read_dataset.inc" +DECL(randomx_program_read_dataset_ryzen): + #include "asm/program_read_dataset_ryzen.inc" + DECL(randomx_program_read_dataset_sshash_init): #include "asm/program_read_dataset_sshash_init.inc" diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 73fa503ad..7dd1232d6 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -36,6 +36,7 @@ PUBLIC randomx_program_loop_begin PUBLIC randomx_program_loop_load PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset +PUBLIC randomx_program_read_dataset_ryzen PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_dataset_init @@ -103,6 +104,10 @@ randomx_program_read_dataset PROC include asm/program_read_dataset.inc randomx_program_read_dataset ENDP +randomx_program_read_dataset_ryzen PROC + include asm/program_read_dataset_ryzen.inc +randomx_program_read_dataset_ryzen ENDP + randomx_program_read_dataset_sshash_init PROC include asm/program_read_dataset_sshash_init.inc randomx_program_read_dataset_sshash_init ENDP diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 0a62c986e..151c1c589 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -37,6 +37,7 @@ extern "C" { void randomx_program_loop_load(); void randomx_program_start(); void randomx_program_read_dataset(); + void randomx_program_read_dataset_ryzen(); void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 44d881ebf..08f4f241b 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -157,8 +157,15 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() } { const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset; - const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init; + const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen; memcpy(codeReadDatasetTweaked, a, b - a); + codeReadDatasetTweakedSize = b - a; + } + { + const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen; + const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init; + memcpy(codeReadDatasetRyzenTweaked, a, b - a); + codeReadDatasetRyzenTweakedSize = b - a; } { const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init; @@ -191,10 +198,11 @@ void RandomX_ConfigurationBase::Apply() #if defined(_M_X64) || defined(__x86_64__) *(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1; - const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE; - *(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask; - *(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask; - *(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; + // Not needed right now because all variants use default dataset base size + //const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE; + //*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask; + //*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask; + //*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; *(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated; @@ -435,6 +443,7 @@ extern "C" { } vm->setScratchpad(scratchpad); + vm->setFlags(flags); } catch (std::exception &ex) { delete vm; diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 95bfdbf47..84ae7dfc8 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -49,6 +49,7 @@ enum randomx_flags { RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_JIT = 8, RANDOMX_FLAG_1GB_PAGES = 16, + RANDOMX_FLAG_RYZEN = 64, }; @@ -118,7 +119,10 @@ struct RandomX_ConfigurationBase rx_vec_i128 fillAes4Rx4_Key[8]; uint8_t codeShhPrefetchTweaked[20]; - uint8_t codeReadDatasetTweaked[64]; + uint8_t codeReadDatasetTweaked[72]; + uint32_t codeReadDatasetTweakedSize; + uint8_t codeReadDatasetRyzenTweaked[72]; + uint32_t codeReadDatasetRyzenTweakedSize; uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codePrefetchScratchpadTweaked[32]; diff --git a/src/crypto/randomx/virtual_machine.hpp b/src/crypto/randomx/virtual_machine.hpp index d3718d04d..3fdd86df4 100644 --- a/src/crypto/randomx/virtual_machine.hpp +++ b/src/crypto/randomx/virtual_machine.hpp @@ -46,6 +46,9 @@ public: virtual void run(void* seed) = 0; void resetRoundingMode(); + void setFlags(uint32_t flags) { vm_flags = flags; } + uint32_t getFlags() const { return vm_flags; } + randomx::RegisterFile *getRegisterFile() { return ® } @@ -71,6 +74,7 @@ protected: randomx_dataset* datasetPtr; }; uint64_t datasetOffset; + uint32_t vm_flags; }; namespace randomx { diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index d2ee59e8b..1ab76cd7a 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -43,7 +43,7 @@ namespace randomx { void CompiledVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); - compiler.generateProgram(program, config); + compiler.generateProgram(program, config, getFlags()); mem.memory = datasetPtr->memory + datasetOffset; execute(); } diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index e8d615e86..8cdad3710 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -31,7 +31,7 @@ #include "crypto/rx/RxVm.h" -xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes) +xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly) { if (!softAes) { m_flags |= RANDOMX_FLAG_HARD_AES; @@ -45,6 +45,10 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes) m_flags |= RANDOMX_FLAG_JIT; } + if (assembly == Assembly::RYZEN) { + m_flags |= RANDOMX_FLAG_RYZEN; + } + m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad); } diff --git a/src/crypto/rx/RxVm.h b/src/crypto/rx/RxVm.h index 30a31c2e4..7cddf93bd 100644 --- a/src/crypto/rx/RxVm.h +++ b/src/crypto/rx/RxVm.h @@ -29,6 +29,7 @@ #include "base/tools/Object.h" +#include "backend/cpu/Cpu.h" #include @@ -49,7 +50,7 @@ class RxVm public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm); - RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes); + RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly); ~RxVm(); inline randomx_vm *get() const { return m_vm; } diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp index e8b4adcee..26f169521 100644 --- a/src/net/JobResults.cpp +++ b/src/net/JobResults.cpp @@ -117,7 +117,7 @@ static void getResults(JobBundle &bundle, std::vector &results, uint3 return; } - auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES); + auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE); for (uint32_t nonce : bundle.nonces) { *bundle.job.nonce() = nonce; From ffec42140860714a19a2fcb0623a067c453eacc3 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sun, 8 Dec 2019 16:20:46 +0100 Subject: [PATCH 17/31] Fixed indentation --- src/crypto/randomx/jit_compiler_x86.cpp | 8 ++++---- src/crypto/randomx/jit_compiler_x86_static.hpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 082b9eb3e..2528c2cf3 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -305,12 +305,12 @@ namespace randomx { uint8_t* p; uint32_t n; if (flags & RANDOMX_FLAG_RYZEN) { - p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; - n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; + p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; + n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; } else { - p = RandomX_CurrentConfig.codeReadDatasetTweaked; - n = RandomX_CurrentConfig.codeReadDatasetTweakedSize; + p = RandomX_CurrentConfig.codeReadDatasetTweaked; + n = RandomX_CurrentConfig.codeReadDatasetTweakedSize; } memcpy(code + codePos, p, n); codePos += n; diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 151c1c589..b0a7c5acb 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -37,7 +37,7 @@ extern "C" { void randomx_program_loop_load(); void randomx_program_start(); void randomx_program_read_dataset(); - void randomx_program_read_dataset_ryzen(); + void randomx_program_read_dataset_ryzen(); void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); From 028b335bacb6299b09d774b7e835e3deba3c1ea9 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sun, 8 Dec 2019 16:51:37 +0100 Subject: [PATCH 18/31] Fix GCC compilation --- src/crypto/randomx/vm_compiled.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index 1ab76cd7a..2dc0c0250 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -43,7 +43,7 @@ namespace randomx { void CompiledVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); - compiler.generateProgram(program, config, getFlags()); + compiler.generateProgram(program, config, randomx_vm::getFlags()); mem.memory = datasetPtr->memory + datasetOffset; execute(); } From d32df84ca5189a98050a6a190f092e33600acbe6 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 23:17:39 +0700 Subject: [PATCH 19/31] Memory allocation refactoring. --- CMakeLists.txt | 7 ++ src/backend/common/interfaces/IRxStorage.h | 3 +- src/backend/cpu/CpuBackend.cpp | 40 ++++---- src/backend/cpu/CpuWorker.cpp | 2 +- src/crypto/common/HugePagesInfo.cpp | 50 ++++++++++ src/crypto/common/HugePagesInfo.h | 67 ++++++++++++++ src/crypto/common/LinuxMemory.cpp | 103 +++++++++++++++++++++ src/crypto/common/LinuxMemory.h | 49 ++++++++++ src/crypto/common/MemoryPool.cpp | 2 +- src/crypto/common/VirtualMemory.cpp | 17 +++- src/crypto/common/VirtualMemory.h | 11 ++- src/crypto/common/VirtualMemory_unix.cpp | 17 +++- src/crypto/randomx/allocator.cpp | 10 +- src/crypto/randomx/common.hpp | 2 - src/crypto/randomx/dataset.hpp | 2 - src/crypto/randomx/randomx.cpp | 69 +++----------- src/crypto/randomx/randomx.h | 4 +- src/crypto/randomx/virtual_memory.cpp | 10 -- src/crypto/randomx/virtual_memory.hpp | 1 - src/crypto/rx/Rx.cpp | 8 +- src/crypto/rx/Rx.h | 5 +- src/crypto/rx/RxBasicStorage.cpp | 35 ++++--- src/crypto/rx/RxBasicStorage.h | 2 +- src/crypto/rx/RxCache.cpp | 50 +++++----- src/crypto/rx/RxCache.h | 16 ++-- src/crypto/rx/RxDataset.cpp | 90 +++++++++--------- src/crypto/rx/RxDataset.h | 13 ++- src/crypto/rx/RxNUMAStorage.cpp | 92 ++++++++++-------- src/crypto/rx/RxNUMAStorage.h | 2 +- src/crypto/rx/RxQueue.cpp | 4 +- src/crypto/rx/RxQueue.h | 3 +- src/net/JobResults.cpp | 2 +- 32 files changed, 516 insertions(+), 272 deletions(-) create mode 100644 src/crypto/common/HugePagesInfo.cpp create mode 100644 src/crypto/common/HugePagesInfo.h create mode 100644 src/crypto/common/LinuxMemory.cpp create mode 100644 src/crypto/common/LinuxMemory.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e507a03c6..b65d53379 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,7 @@ set(HEADERS_CRYPTO src/crypto/cn/soft_aes.h src/crypto/common/Algorithm.h src/crypto/common/Coin.h + src/crypto/common/HugePagesInfo.h src/crypto/common/keccak.h src/crypto/common/MemoryPool.h src/crypto/common/Nonce.h @@ -115,6 +116,7 @@ set(SOURCES_CRYPTO src/crypto/cn/CnHash.cpp src/crypto/common/Algorithm.cpp src/crypto/common/Coin.cpp + src/crypto/common/HugePagesInfo.cpp src/crypto/common/keccak.cpp src/crypto/common/MemoryPool.cpp src/crypto/common/Nonce.cpp @@ -154,6 +156,11 @@ else() if (XMRIG_OS_ANDROID) set(EXTRA_LIBS pthread rt dl log) elseif (XMRIG_OS_LINUX) + list(APPEND SOURCES_OS + src/crypto/common/LinuxMemory.h + src/crypto/common/LinuxMemory.cpp + ) + set(EXTRA_LIBS pthread rt dl) elseif (XMRIG_OS_FREEBSD) set(EXTRA_LIBS kvm pthread) diff --git a/src/backend/common/interfaces/IRxStorage.h b/src/backend/common/interfaces/IRxStorage.h index 372eee6f9..567885185 100644 --- a/src/backend/common/interfaces/IRxStorage.h +++ b/src/backend/common/interfaces/IRxStorage.h @@ -25,6 +25,7 @@ #include "crypto/rx/RxConfig.h" +#include "crypto/common/HugePagesInfo.h" #include @@ -44,8 +45,8 @@ class IRxStorage public: virtual ~IRxStorage() = default; + virtual HugePagesInfo hugePages() const = 0; virtual RxDataset *dataset(const Job &job, uint32_t nodeId) const = 0; - virtual std::pair hugePages() const = 0; virtual void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) = 0; }; diff --git a/src/backend/cpu/CpuBackend.cpp b/src/backend/cpu/CpuBackend.cpp index ddee67683..ac4594975 100644 --- a/src/backend/cpu/CpuBackend.cpp +++ b/src/backend/cpu/CpuBackend.cpp @@ -68,17 +68,15 @@ static std::mutex mutex; struct CpuLaunchStatus { public: - inline size_t hugePages() const { return m_hugePages; } - inline size_t memory() const { return m_ways * m_memory; } - inline size_t pages() const { return m_pages; } - inline size_t threads() const { return m_threads; } - inline size_t ways() const { return m_ways; } + inline const HugePagesInfo &hugePages() const { return m_hugePages; } + inline size_t memory() const { return m_ways * m_memory; } + inline size_t threads() const { return m_threads; } + inline size_t ways() const { return m_ways; } inline void start(const std::vector &threads, size_t memory) { - m_hugePages = 0; + m_hugePages.reset(); m_memory = memory; - m_pages = 0; m_started = 0; m_errors = 0; m_threads = threads.size(); @@ -89,11 +87,9 @@ public: inline bool started(IWorker *worker, bool ready) { if (ready) { - auto hugePages = worker->memory()->hugePages(); - m_started++; - m_hugePages += hugePages.first; - m_pages += hugePages.second; + + m_hugePages += worker->memory()->hugePages(); m_ways += worker->intensity(); } else { @@ -115,19 +111,18 @@ public: tag, m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S, m_started, m_threads, m_ways, - (m_hugePages == m_pages ? GREEN_BOLD_S : (m_hugePages == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), - m_hugePages == 0 ? 0.0 : static_cast(m_hugePages) / m_pages * 100.0, - m_hugePages, m_pages, + (m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), + m_hugePages.percent(), + m_hugePages.allocated, m_hugePages.total, memory() / 1024, Chrono::steadyMSecs() - m_ts ); } private: + HugePagesInfo m_hugePages; size_t m_errors = 0; - size_t m_hugePages = 0; size_t m_memory = 0; - size_t m_pages = 0; size_t m_started = 0; size_t m_threads = 0; size_t m_ways = 0; @@ -169,18 +164,17 @@ public: rapidjson::Value hugePages(int version, rapidjson::Document &doc) { - std::pair pages(0, 0); + HugePagesInfo pages; # ifdef XMRIG_ALGO_RANDOMX if (algo.family() == Algorithm::RANDOM_X) { - pages = Rx::hugePages(); + pages += Rx::hugePages(); } # endif mutex.lock(); - pages.first += status.hugePages(); - pages.second += status.pages(); + pages += status.hugePages(); mutex.unlock(); @@ -188,11 +182,11 @@ public: if (version > 1) { hugepages.SetArray(); - hugepages.PushBack(pages.first, doc.GetAllocator()); - hugepages.PushBack(pages.second, doc.GetAllocator()); + hugepages.PushBack(static_cast(pages.allocated), doc.GetAllocator()); + hugepages.PushBack(static_cast(pages.total), doc.GetAllocator()); } else { - hugepages = pages.first == pages.second; + hugepages = pages.isFullyAllocated(); } return hugepages; diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 5d58106d8..f93f7dacf 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -63,7 +63,7 @@ xmrig::CpuWorker::CpuWorker(size_t id, const CpuLaunchData &data) : m_miner(data.miner), m_ctx() { - m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, true, m_node); + m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, m_node); } diff --git a/src/crypto/common/HugePagesInfo.cpp b/src/crypto/common/HugePagesInfo.cpp new file mode 100644 index 000000000..3108c7de6 --- /dev/null +++ b/src/crypto/common/HugePagesInfo.cpp @@ -0,0 +1,50 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "crypto/common/HugePagesInfo.h" +#include "crypto/common/VirtualMemory.h" + + +namespace xmrig { + +constexpr size_t twoMiB = 2U * 1024U * 1024U; +constexpr size_t oneGiB = 1024U * 1024U * 1024U; + +} // namespace xmrig + + +xmrig::HugePagesInfo::HugePagesInfo(const VirtualMemory *memory) +{ + if (memory->isOneGbPages()) { + size = VirtualMemory::align(memory->size(), oneGiB); + total = size / oneGiB; + allocated = size / oneGiB; + } + else { + size = memory->size(); + total = size / twoMiB; + allocated = memory->isHugePages() ? total : 0; + } +} diff --git a/src/crypto/common/HugePagesInfo.h b/src/crypto/common/HugePagesInfo.h new file mode 100644 index 000000000..1dc93bb4f --- /dev/null +++ b/src/crypto/common/HugePagesInfo.h @@ -0,0 +1,67 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_HUGEPAGESINFO_H +#define XMRIG_HUGEPAGESINFO_H + + +#include +#include + + +namespace xmrig { + + +class VirtualMemory; + + +class HugePagesInfo +{ +public: + HugePagesInfo() = default; + HugePagesInfo(const VirtualMemory *memory); + + size_t allocated = 0; + size_t total = 0; + size_t size = 0; + + inline bool isFullyAllocated() const { return allocated == total; } + inline double percent() const { return allocated == 0 ? 0.0 : static_cast(allocated) / total * 100.0; } + inline void reset() { allocated = 0; total = 0; size = 0; } + + inline HugePagesInfo &operator+=(const HugePagesInfo &other) + { + allocated += other.allocated; + total += other.total; + size += other.size; + + return *this; + } +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_HUGEPAGESINFO_H */ diff --git a/src/crypto/common/LinuxMemory.cpp b/src/crypto/common/LinuxMemory.cpp new file mode 100644 index 000000000..b7c2000f9 --- /dev/null +++ b/src/crypto/common/LinuxMemory.cpp @@ -0,0 +1,103 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +//#include + +#include "crypto/common/LinuxMemory.h" +#include "base/io/log/Log.h" +#include "crypto/common/VirtualMemory.h" +#include "backend/cpu/Cpu.h" + + +#include +#include +#include +#include + + +namespace xmrig { + + +static std::mutex mutex; +constexpr size_t twoMiB = 2U * 1024U * 1024U; +constexpr size_t oneGiB = 1024U * 1024U * 1024U; + + +static inline std::string sysfs_path(uint32_t node, bool oneGbPages, bool nr) +{ + return "/sys/devices/system/node/node" + std::to_string(node) + "/hugepages/hugepages-" + (oneGbPages ? "1048576" : "2048") + "kB/" + (nr ? "nr" : "free") + "_hugepages"; +} + + +static inline bool write_nr_hugepages(uint32_t node, bool oneGbPages, uint64_t count) { return LinuxMemory::write(sysfs_path(node, oneGbPages, true).c_str(), count); } +static inline int64_t free_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, false).c_str()); } +static inline int64_t nr_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, true).c_str()); } + + +} // namespace xmrig + + +bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, bool oneGbPages) +{ + std::lock_guard lock(mutex); + + const size_t pageSize = oneGbPages ? oneGiB : twoMiB; + const size_t required = VirtualMemory::align(size, pageSize) / pageSize; + + const auto available = free_hugepages(node, oneGbPages); + if (available < 0 || static_cast(available) >= required) { + return false; + } + + return write_nr_hugepages(node, oneGbPages, std::max(nr_hugepages(node, oneGbPages), 0) + (required - available)); +} + + +bool xmrig::LinuxMemory::write(const char *path, uint64_t value) +{ + std::ofstream file(path, std::ios::out | std::ios::binary | std::ios::trunc); + if (!file.is_open()) { + return false; + } + + file << value; + file.flush(); + + return true; +} + + +int64_t xmrig::LinuxMemory::read(const char *path) +{ + std::ifstream file(path); + if (!file.is_open()) { + return -1; + } + + uint64_t value = 0; + file >> value; + + return value; +} diff --git a/src/crypto/common/LinuxMemory.h b/src/crypto/common/LinuxMemory.h new file mode 100644 index 000000000..aa46a6fcd --- /dev/null +++ b/src/crypto/common/LinuxMemory.h @@ -0,0 +1,49 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_LINUXMEMORY_H +#define XMRIG_LINUXMEMORY_H + + +#include +#include + + +namespace xmrig { + + +class LinuxMemory +{ +public: + static bool reserve(size_t size, uint32_t node, bool oneGbPages = false); + + static bool write(const char *path, uint64_t value); + static int64_t read(const char *path); +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_LINUXMEMORY_H */ diff --git a/src/crypto/common/MemoryPool.cpp b/src/crypto/common/MemoryPool.cpp index 2b2da6591..6e17d752b 100644 --- a/src/crypto/common/MemoryPool.cpp +++ b/src/crypto/common/MemoryPool.cpp @@ -47,7 +47,7 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node) return; } - m_memory = new VirtualMemory(size * pageSize, hugePages, false, node); + m_memory = new VirtualMemory(size * pageSize, hugePages, false, false, node); } diff --git a/src/crypto/common/VirtualMemory.cpp b/src/crypto/common/VirtualMemory.cpp index 0eaef3c5e..14909f1b7 100644 --- a/src/crypto/common/VirtualMemory.cpp +++ b/src/crypto/common/VirtualMemory.cpp @@ -46,10 +46,13 @@ namespace xmrig { static IMemoryPool *pool = nullptr; static std::mutex mutex; +constexpr size_t twoMiB = 2U * 1024U * 1024U; +constexpr size_t oneGiB = 1024U * 1024U * 1024U; + } // namespace xmrig -xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool usePool, uint32_t node, size_t alignSize) : +xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node, size_t alignSize) : m_size(align(size)), m_node(node) { @@ -68,6 +71,10 @@ xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool usePool, u } } + if (oneGbPages && allocateOneGbPagesMemory()) { + return; + } + if (hugePages && allocateLargePagesMemory()) { return; } @@ -86,7 +93,7 @@ xmrig::VirtualMemory::~VirtualMemory() std::lock_guard lock(mutex); pool->release(m_node); } - else if (isHugePages()) { + else if (isHugePages() || isOneGbPages()) { freeLargePagesMemory(); } else { @@ -95,6 +102,12 @@ xmrig::VirtualMemory::~VirtualMemory() } +xmrig::HugePagesInfo xmrig::VirtualMemory::hugePages() const +{ + return { this }; +} + + #ifndef XMRIG_FEATURE_HWLOC uint32_t xmrig::VirtualMemory::bindToNUMANode(int64_t) { diff --git a/src/crypto/common/VirtualMemory.h b/src/crypto/common/VirtualMemory.h index e5101f525..f27e5908f 100644 --- a/src/crypto/common/VirtualMemory.h +++ b/src/crypto/common/VirtualMemory.h @@ -29,6 +29,7 @@ #include "base/tools/Object.h" +#include "crypto/common/HugePagesInfo.h" #include @@ -45,17 +46,16 @@ class VirtualMemory public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(VirtualMemory) - VirtualMemory(size_t size, bool hugePages, bool usePool, uint32_t node = 0, size_t alignSize = 64); + VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node = 0, size_t alignSize = 64); ~VirtualMemory(); inline bool isHugePages() const { return m_flags.test(FLAG_HUGEPAGES); } + inline bool isOneGbPages() const { return m_flags.test(FLAG_1GB_PAGES); } inline size_t size() const { return m_size; } + inline uint8_t *raw() const { return m_scratchpad; } inline uint8_t *scratchpad() const { return m_scratchpad; } - inline std::pair hugePages() const - { - return { isHugePages() ? (align(size()) / 2097152) : 0, align(size()) / 2097152 }; - } + HugePagesInfo hugePages() const; static bool isHugepagesAvailable(); static bool isOneGbPagesAvailable(); @@ -75,6 +75,7 @@ public: private: enum Flags { FLAG_HUGEPAGES, + FLAG_1GB_PAGES, FLAG_LOCK, FLAG_EXTERNAL, FLAG_MAX diff --git a/src/crypto/common/VirtualMemory_unix.cpp b/src/crypto/common/VirtualMemory_unix.cpp index 391351714..0c77ba091 100644 --- a/src/crypto/common/VirtualMemory_unix.cpp +++ b/src/crypto/common/VirtualMemory_unix.cpp @@ -39,8 +39,11 @@ #endif -#if defined (XMRIG_OS_LINUX) && (defined(MAP_HUGE_1GB) || defined(MAP_HUGE_SHIFT)) -# define XMRIG_HAS_1GB_PAGES +#if defined(XMRIG_OS_LINUX) +# if (defined(MAP_HUGE_1GB) || defined(MAP_HUGE_SHIFT)) +# define XMRIG_HAS_1GB_PAGES +# endif +# include "crypto/common/LinuxMemory.h" #endif @@ -141,6 +144,10 @@ void xmrig::VirtualMemory::osInit(bool) bool xmrig::VirtualMemory::allocateLargePagesMemory() { +# if defined(XMRIG_OS_LINUX) + LinuxMemory::reserve(m_size, m_node); +# endif + m_scratchpad = static_cast(allocateLargePagesMemory(m_size)); if (m_scratchpad) { m_flags.set(FLAG_HUGEPAGES, true); @@ -160,9 +167,13 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory() bool xmrig::VirtualMemory::allocateOneGbPagesMemory() { +# if defined(XMRIG_HAS_1GB_PAGES) + LinuxMemory::reserve(m_size, m_node, true); +# endif + m_scratchpad = static_cast(allocateOneGbPagesMemory(m_size)); if (m_scratchpad) { - m_flags.set(FLAG_HUGEPAGES, true); + m_flags.set(FLAG_1GB_PAGES, true); madvise(m_scratchpad, m_size, MADV_RANDOM | MADV_WILLNEED); diff --git a/src/crypto/randomx/allocator.cpp b/src/crypto/randomx/allocator.cpp index d46393f1f..770e60f9f 100644 --- a/src/crypto/randomx/allocator.cpp +++ b/src/crypto/randomx/allocator.cpp @@ -43,7 +43,7 @@ namespace randomx { } template - void AlignedAllocator::freeMemory(void* ptr, size_t count) { + void AlignedAllocator::freeMemory(void* ptr, size_t) { rx_aligned_free(ptr); } @@ -57,12 +57,4 @@ namespace randomx { freePagedMemory(ptr, count); }; - void* OneGbPageAllocator::allocMemory(size_t count) { - return allocOneGbPagesMemory(count); - } - - void OneGbPageAllocator::freeMemory(void* ptr, size_t count) { - freePagedMemory(ptr, count); - }; - } diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp index 48f31bac2..cea5f5be6 100644 --- a/src/crypto/randomx/common.hpp +++ b/src/crypto/randomx/common.hpp @@ -167,7 +167,5 @@ namespace randomx { typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); - typedef void(DatasetDeallocFunc)(randomx_dataset*); - typedef void(CacheDeallocFunc)(randomx_cache*); typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t); } diff --git a/src/crypto/randomx/dataset.hpp b/src/crypto/randomx/dataset.hpp index 6c179b5d1..a40cf1d76 100644 --- a/src/crypto/randomx/dataset.hpp +++ b/src/crypto/randomx/dataset.hpp @@ -38,13 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* Global scope for C binding */ struct randomx_dataset { uint8_t* memory = nullptr; - randomx::DatasetDeallocFunc* dealloc; }; /* Global scope for C binding */ struct randomx_cache { uint8_t* memory = nullptr; - randomx::CacheDeallocFunc* dealloc; randomx::JitCompiler* jit; randomx::CacheInitializeFunc* initialize; randomx::DatasetInitFunc* datasetInit; diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 08f4f241b..8b1cfe2ce 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -272,42 +272,24 @@ RandomX_ConfigurationBase RandomX_CurrentConfig; extern "C" { - randomx_cache *randomx_alloc_cache(randomx_flags flags) { + randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory) { randomx_cache *cache = nullptr; try { cache = new randomx_cache(); - switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { + switch (flags & RANDOMX_FLAG_JIT) { case RANDOMX_FLAG_DEFAULT: - cache->dealloc = &randomx::deallocCache; - cache->jit = nullptr; - cache->initialize = &randomx::initCache; - cache->datasetInit = &randomx::initDataset; - cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); + cache->jit = nullptr; + cache->initialize = &randomx::initCache; + cache->datasetInit = &randomx::initDataset; + cache->memory = memory; break; case RANDOMX_FLAG_JIT: - cache->dealloc = &randomx::deallocCache; - cache->jit = new randomx::JitCompiler(); - cache->initialize = &randomx::initCacheCompile; - cache->datasetInit = cache->jit->getDatasetInitFunc(); - cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); - break; - - case RANDOMX_FLAG_LARGE_PAGES: - cache->dealloc = &randomx::deallocCache; - cache->jit = nullptr; - cache->initialize = &randomx::initCache; - cache->datasetInit = &randomx::initDataset; - cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); - break; - - case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: - cache->dealloc = &randomx::deallocCache; - cache->jit = new randomx::JitCompiler(); - cache->initialize = &randomx::initCacheCompile; - cache->datasetInit = cache->jit->getDatasetInitFunc(); - cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE); + cache->jit = new randomx::JitCompiler(); + cache->initialize = &randomx::initCacheCompile; + cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->memory = memory; break; default: @@ -331,35 +313,12 @@ extern "C" { } void randomx_release_cache(randomx_cache* cache) { - assert(cache != nullptr); - cache->dealloc(cache); delete cache; } - randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { - randomx_dataset *dataset = nullptr; - - try { - dataset = new randomx_dataset(); - if (flags & RANDOMX_FLAG_1GB_PAGES) { - dataset->dealloc = &randomx::deallocDataset; - dataset->memory = (uint8_t*)randomx::OneGbPageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE); - } - else if (flags & RANDOMX_FLAG_LARGE_PAGES) { - dataset->dealloc = &randomx::deallocDataset; - dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE); - } - else { - dataset->dealloc = &randomx::deallocDataset; - dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE); - } - } - catch (std::exception &ex) { - if (dataset != nullptr) { - randomx_release_dataset(dataset); - dataset = nullptr; - } - } + randomx_dataset *randomx_create_dataset(uint8_t *memory) { + auto dataset = new randomx_dataset(); + dataset->memory = memory; return dataset; } @@ -384,8 +343,6 @@ extern "C" { } void randomx_release_dataset(randomx_dataset *dataset) { - assert(dataset != nullptr); - dataset->dealloc(dataset); delete dataset; } diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 84ae7dfc8..923142299 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -215,7 +215,7 @@ extern "C" { * NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT * is set and JIT compilation is not supported on the current platform. */ -RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags); +RANDOMX_EXPORT randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory); /** * Initializes the cache memory and SuperscalarHash using the provided key value. @@ -242,7 +242,7 @@ RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache); * @return Pointer to an allocated randomx_dataset structure. * NULL is returned if memory allocation fails. */ -RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags); +RANDOMX_EXPORT randomx_dataset *randomx_create_dataset(uint8_t *memory); /** * Gets the number of items contained in the dataset. diff --git a/src/crypto/randomx/virtual_memory.cpp b/src/crypto/randomx/virtual_memory.cpp index 48a8a8d25..06165ffb6 100644 --- a/src/crypto/randomx/virtual_memory.cpp +++ b/src/crypto/randomx/virtual_memory.cpp @@ -53,16 +53,6 @@ void* allocLargePagesMemory(std::size_t bytes) { } -void* allocOneGbPagesMemory(std::size_t bytes) { - void* mem = xmrig::VirtualMemory::allocateOneGbPagesMemory(bytes); - if (mem == nullptr) { - throw std::runtime_error("Failed to allocate 1GB pages memory"); - } - - return mem; -} - - void freePagedMemory(void* ptr, std::size_t bytes) { xmrig::VirtualMemory::freeLargePagesMemory(ptr, bytes); } diff --git a/src/crypto/randomx/virtual_memory.hpp b/src/crypto/randomx/virtual_memory.hpp index 8c5b49005..d3b31db12 100644 --- a/src/crypto/randomx/virtual_memory.hpp +++ b/src/crypto/randomx/virtual_memory.hpp @@ -32,5 +32,4 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void* allocExecutableMemory(std::size_t); void* allocLargePagesMemory(std::size_t); -void* allocOneGbPagesMemory(std::size_t); void freePagedMemory(void*, std::size_t); diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 84c9b5fc9..369b0aacf 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -83,15 +83,15 @@ bool xmrig::Rx::isReady(const Job &job) } -xmrig::RxDataset *xmrig::Rx::dataset(const Job &job, uint32_t nodeId) +xmrig::HugePagesInfo xmrig::Rx::hugePages() { - return d_ptr->queue.dataset(job, nodeId); + return d_ptr->queue.hugePages(); } -std::pair xmrig::Rx::hugePages() +xmrig::RxDataset *xmrig::Rx::dataset(const Job &job, uint32_t nodeId) { - return d_ptr->queue.hugePages(); + return d_ptr->queue.dataset(job, nodeId); } diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index abaca9200..d26304451 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -32,6 +32,9 @@ #include +#include "crypto/common/HugePagesInfo.h" + + namespace xmrig { @@ -49,8 +52,8 @@ class Rx public: static bool init(const Job &job, const RxConfig &config, const CpuConfig &cpu); static bool isReady(const Job &job); + static HugePagesInfo hugePages(); static RxDataset *dataset(const Job &job, uint32_t nodeId); - static std::pair hugePages(); static void destroy(); static void init(IRxListener *listener); }; diff --git a/src/crypto/rx/RxBasicStorage.cpp b/src/crypto/rx/RxBasicStorage.cpp index a0656e46b..079bf95c9 100644 --- a/src/crypto/rx/RxBasicStorage.cpp +++ b/src/crypto/rx/RxBasicStorage.cpp @@ -73,7 +73,7 @@ public: { const uint64_t ts = Chrono::steadyMSecs(); - m_dataset = new RxDataset(hugePages, oneGbPages, true, mode); + m_dataset = new RxDataset(hugePages, oneGbPages, true, mode, 0); printAllocStatus(ts); } @@ -94,18 +94,17 @@ private: void printAllocStatus(uint64_t ts) { if (m_dataset->get() != nullptr) { - const auto pages = m_dataset->hugePages(); - const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + const auto pages = m_dataset->hugePages(); LOG_INFO("%s" GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") BLACK_BOLD(" (%zu+%zu)") " huge pages %s%1.0f%% %u/%u" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), - m_dataset->size() / oneMiB, + pages.size / oneMiB, RxDataset::maxSize() / oneMiB, RxCache::maxSize() / oneMiB, - (pages.first == pages.second ? GREEN_BOLD_S : (pages.first == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), - percent, - pages.first, - pages.second, + (pages.isFullyAllocated() ? GREEN_BOLD_S : (pages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), + pages.percent(), + pages.allocated, + pages.total, m_dataset->cache()->isJIT() ? GREEN_BOLD_S "+" : RED_BOLD_S "-", Chrono::steadyMSecs() - ts ); @@ -137,6 +136,16 @@ xmrig::RxBasicStorage::~RxBasicStorage() } +xmrig::HugePagesInfo xmrig::RxBasicStorage::hugePages() const +{ + if (!d_ptr->dataset()) { + return {}; + } + + return d_ptr->dataset()->hugePages(); +} + + xmrig::RxDataset *xmrig::RxBasicStorage::dataset(const Job &job, uint32_t) const { if (!d_ptr->isReady(job)) { @@ -147,16 +156,6 @@ xmrig::RxDataset *xmrig::RxBasicStorage::dataset(const Job &job, uint32_t) const } -std::pair xmrig::RxBasicStorage::hugePages() const -{ - if (!d_ptr->dataset()) { - return { 0U, 0U }; - } - - return d_ptr->dataset()->hugePages(); -} - - void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) { d_ptr->setSeed(seed); diff --git a/src/crypto/rx/RxBasicStorage.h b/src/crypto/rx/RxBasicStorage.h index bd8a5e00d..1f50af576 100644 --- a/src/crypto/rx/RxBasicStorage.h +++ b/src/crypto/rx/RxBasicStorage.h @@ -48,8 +48,8 @@ public: ~RxBasicStorage() override; protected: + HugePagesInfo hugePages() const override; RxDataset *dataset(const Job &job, uint32_t nodeId) const override; - std::pair hugePages() const override; void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) override; private: diff --git a/src/crypto/rx/RxCache.cpp b/src/crypto/rx/RxCache.cpp index a248ea5cc..f58f30247 100644 --- a/src/crypto/rx/RxCache.cpp +++ b/src/crypto/rx/RxCache.cpp @@ -35,30 +35,25 @@ static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mism -xmrig::RxCache::RxCache(bool hugePages) +xmrig::RxCache::RxCache(bool hugePages, uint32_t nodeId) { - if (hugePages) { - m_flags = RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES; - m_cache = randomx_alloc_cache(static_cast(m_flags)); - } + m_memory = new VirtualMemory(maxSize(), hugePages, false, false, nodeId); - if (!m_cache) { - m_flags = RANDOMX_FLAG_JIT; - m_cache = randomx_alloc_cache(static_cast(m_flags)); - } + create(m_memory->raw()); +} - if (!m_cache) { - m_flags = RANDOMX_FLAG_DEFAULT; - m_cache = randomx_alloc_cache(static_cast(m_flags)); - } + +xmrig::RxCache::RxCache(uint8_t *memory) +{ + create(memory); } xmrig::RxCache::~RxCache() { - if (m_cache) { - randomx_release_cache(m_cache); - } + randomx_release_cache(m_cache); + + delete m_memory; } @@ -75,15 +70,18 @@ bool xmrig::RxCache::init(const Buffer &seed) } -std::pair xmrig::RxCache::hugePages() const +xmrig::HugePagesInfo xmrig::RxCache::hugePages() const { - constexpr size_t twoMiB = 2u * 1024u * 1024u; - constexpr size_t total = VirtualMemory::align(maxSize(), twoMiB) / twoMiB; - - uint32_t count = 0; - if (isHugePages()) { - count += total; - } - - return { count, total }; + return m_memory ? m_memory->hugePages() : HugePagesInfo(); +} + + +void xmrig::RxCache::create(uint8_t *memory) +{ + m_cache = randomx_create_cache(RANDOMX_FLAG_JIT, memory); + + if (!m_cache) { + m_jit = false; + m_cache = randomx_create_cache(RANDOMX_FLAG_DEFAULT, memory); + } } diff --git a/src/crypto/rx/RxCache.h b/src/crypto/rx/RxCache.h index 84635292b..d1822b30a 100644 --- a/src/crypto/rx/RxCache.h +++ b/src/crypto/rx/RxCache.h @@ -33,6 +33,7 @@ #include "base/tools/Buffer.h" #include "base/tools/Object.h" +#include "crypto/common/HugePagesInfo.h" #include "crypto/randomx/configuration.h" @@ -48,24 +49,27 @@ class RxCache public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxCache) - RxCache(bool hugePages = true); + RxCache(bool hugePages, uint32_t nodeId); + RxCache(uint8_t *memory); ~RxCache(); - inline bool isHugePages() const { return m_flags & 1; } - inline bool isJIT() const { return m_flags & 8; } + inline bool isJIT() const { return m_jit; } inline const Buffer &seed() const { return m_seed; } inline randomx_cache *get() const { return m_cache; } inline size_t size() const { return maxSize(); } bool init(const Buffer &seed); - std::pair hugePages() const; + HugePagesInfo hugePages() const; static inline constexpr size_t maxSize() { return RANDOMX_CACHE_MAX_SIZE; } private: + void create(uint8_t *memory); + + bool m_jit = true; Buffer m_seed; - int m_flags = 0; - randomx_cache *m_cache = nullptr; + randomx_cache *m_cache = nullptr; + VirtualMemory *m_memory = nullptr; }; diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index 5786926a3..b2f934eb0 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -38,9 +38,6 @@ #include -static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mismatch"); - - namespace xmrig { @@ -55,18 +52,26 @@ static void init_dataset_wrapper(randomx_dataset *dataset, randomx_cache *cache, } // namespace xmrig -xmrig::RxDataset::RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode) : - m_mode(mode) +xmrig::RxDataset::RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode, uint32_t node) : + m_mode(mode), + m_node(node) { allocate(hugePages, oneGbPages); + if (isOneGbPages()) { + m_cache = new RxCache(m_memory->raw() + VirtualMemory::align(maxSize())); + + return; + } + if (cache) { - m_cache = new RxCache(hugePages); + m_cache = new RxCache(hugePages, node); } } xmrig::RxDataset::RxDataset(RxCache *cache) : + m_node(0), m_cache(cache) { } @@ -74,11 +79,10 @@ xmrig::RxDataset::RxDataset(RxCache *cache) : xmrig::RxDataset::~RxDataset() { - if (m_dataset) { - randomx_release_dataset(m_dataset); - } + randomx_release_dataset(m_dataset); delete m_cache; + delete m_memory; } @@ -118,6 +122,30 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads, int priorit } +bool xmrig::RxDataset::isHugePages() const +{ + return m_memory && m_memory->isHugePages(); +} + + +bool xmrig::RxDataset::isOneGbPages() const +{ + return m_memory && m_memory->isOneGbPages(); +} + + +xmrig::HugePagesInfo xmrig::RxDataset::hugePages(bool cache) const +{ + auto pages = m_memory ? m_memory->hugePages() : HugePagesInfo(); + + if (cache && m_cache) { + pages += m_cache->hugePages(); + } + + return pages; +} + + size_t xmrig::RxDataset::size(bool cache) const { size_t size = 0; @@ -134,31 +162,6 @@ size_t xmrig::RxDataset::size(bool cache) const } -std::pair xmrig::RxDataset::hugePages(bool cache) const -{ - constexpr size_t twoMiB = 2U * 1024U * 1024U; - constexpr size_t oneGiB = 1024U * 1024U * 1024U; - constexpr size_t cacheSize = VirtualMemory::align(RxCache::maxSize(), twoMiB) / twoMiB; - size_t datasetPageSize = isOneGbPages() ? oneGiB : twoMiB; - size_t total = VirtualMemory::align(maxSize(), datasetPageSize) / datasetPageSize; - - uint32_t count = 0; - if (isHugePages() || isOneGbPages()) { - count += total; - } - - if (cache && m_cache) { - total += cacheSize; - - if (m_cache->isHugePages()) { - count += cacheSize; - } - } - - return { count, total }; -} - - void *xmrig::RxDataset::raw() const { return m_dataset ? randomx_get_dataset_memory(m_dataset) : nullptr; @@ -189,19 +192,12 @@ void xmrig::RxDataset::allocate(bool hugePages, bool oneGbPages) return; } - if (hugePages) { - m_flags = oneGbPages ? RANDOMX_FLAG_1GB_PAGES : RANDOMX_FLAG_LARGE_PAGES; - m_dataset = randomx_alloc_dataset(static_cast(m_flags)); + m_memory = new VirtualMemory(maxSize(), hugePages, oneGbPages, false, m_node); + m_dataset = randomx_create_dataset(m_memory->raw()); - if (oneGbPages && !m_dataset) { - LOG_ERR(CLEAR "%s" RED_BOLD_S "Failed to allocate RandomX dataset using 1GB pages", rx_tag()); - m_flags = RANDOMX_FLAG_LARGE_PAGES; - m_dataset = randomx_alloc_dataset(static_cast(m_flags)); - } - } - - if (!m_dataset) { - m_flags = RANDOMX_FLAG_DEFAULT; - m_dataset = randomx_alloc_dataset(static_cast(m_flags)); +# ifdef XMRIG_OS_LINUX + if (oneGbPages && !isOneGbPages()) { + LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to allocate RandomX dataset using 1GB pages", rx_tag()); } +# endif } diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index ec9c7c8a6..e46a93c6f 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -30,6 +30,7 @@ #include "base/tools/Object.h" #include "crypto/common/Algorithm.h" +#include "crypto/common/HugePagesInfo.h" #include "crypto/randomx/configuration.h" #include "crypto/randomx/randomx.h" #include "crypto/rx/RxConfig.h" @@ -44,6 +45,7 @@ namespace xmrig class Buffer; class RxCache; +class VirtualMemory; class RxDataset @@ -51,19 +53,19 @@ class RxDataset public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxDataset) - RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode); + RxDataset(bool hugePages, bool oneGbPages, bool cache, RxConfig::Mode mode, uint32_t node); RxDataset(RxCache *cache); ~RxDataset(); - inline bool isHugePages() const { return m_flags & RANDOMX_FLAG_LARGE_PAGES; } - inline bool isOneGbPages() const { return m_flags & RANDOMX_FLAG_1GB_PAGES; } inline randomx_dataset *get() const { return m_dataset; } inline RxCache *cache() const { return m_cache; } inline void setCache(RxCache *cache) { m_cache = cache; } bool init(const Buffer &seed, uint32_t numThreads, int priority); + bool isHugePages() const; + bool isOneGbPages() const; + HugePagesInfo hugePages(bool cache = true) const; size_t size(bool cache = true) const; - std::pair hugePages(bool cache = true) const; void *raw() const; void setRaw(const void *raw); @@ -73,9 +75,10 @@ private: void allocate(bool hugePages, bool oneGbPages); const RxConfig::Mode m_mode = RxConfig::FastMode; - int m_flags = 0; + const uint32_t m_node; randomx_dataset *m_dataset = nullptr; RxCache *m_cache = nullptr; + VirtualMemory *m_memory = nullptr; }; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index a6d191481..b139a14d6 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -130,8 +130,10 @@ public: join(); - std::thread thread(allocateCache, this, m_nodeset.front(), hugePages); - thread.join(); + if (isCacheRequired()) { + std::thread thread(allocateCache, this, m_nodeset.front(), hugePages); + thread.join(); + } if (m_datasets.empty()) { m_datasets.insert({ m_nodeset.front(), new RxDataset(m_cache) }); @@ -139,7 +141,9 @@ public: LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "failed to allocate RandomX datasets, switching to slow mode" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); } else { - dataset(m_nodeset.front())->setCache(m_cache); + if (m_cache) { + dataset(m_nodeset.front())->setCache(m_cache); + } printAllocStatus(ts); } @@ -148,6 +152,22 @@ public: } + inline bool isCacheRequired() const + { + if (m_datasets.empty()) { + return true; + } + + for (const auto kv : m_datasets) { + if (kv.second->isOneGbPages()) { + return false; + } + } + + return true; + } + + inline void initDatasets(uint32_t threads, int priority) { uint64_t ts = Chrono::steadyMSecs(); @@ -174,13 +194,11 @@ public: } - inline std::pair hugePages() const + inline HugePagesInfo hugePages() const { - auto pages = m_cache->hugePages(); + HugePagesInfo pages; for (auto const &item : m_datasets) { - const auto p = item.second->hugePages(false); - pages.first += p.first; - pages.second += p.second; + pages += item.second->hugePages(); } return pages; @@ -198,7 +216,7 @@ private: return; } - auto dataset = new RxDataset(hugePages, oneGbPages, false, RxConfig::FastMode); + auto dataset = new RxDataset(hugePages, oneGbPages, false, RxConfig::FastMode, nodeId); if (!dataset->get()) { printSkipped(nodeId, "failed to allocate dataset"); @@ -218,7 +236,7 @@ private: bindToNUMANode(nodeId); - auto cache = new RxCache(hugePages); + auto cache = new RxCache(hugePages, nodeId); std::lock_guard lock(mutex); d_ptr->m_cache = cache; @@ -238,15 +256,14 @@ private: void printAllocStatus(RxDataset *dataset, uint32_t nodeId, uint64_t ts) { - const auto pages = dataset->hugePages(); - const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + const auto pages = dataset->hugePages(); LOG_INFO("%s" CYAN_BOLD("#%u ") GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") " huge pages %s%3.0f%%" CLEAR BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), nodeId, - dataset->size() / oneMiB, - (pages.first == pages.second ? GREEN_BOLD_S : RED_BOLD_S), - percent, + pages.size / oneMiB, + (pages.isFullyAllocated() ? GREEN_BOLD_S : RED_BOLD_S), + pages.percent(), Chrono::steadyMSecs() - ts ); } @@ -254,15 +271,14 @@ private: void printAllocStatus(RxCache *cache, uint32_t nodeId, uint64_t ts) { - const auto pages = cache->hugePages(); - const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + const auto pages = cache->hugePages(); LOG_INFO("%s" CYAN_BOLD("#%u ") GREEN_BOLD("allocated") CYAN_BOLD(" %4zu MB") " huge pages %s%3.0f%%" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), nodeId, cache->size() / oneMiB, - (pages.first == pages.second ? GREEN_BOLD_S : RED_BOLD_S), - percent, + (pages.isFullyAllocated() ? GREEN_BOLD_S : RED_BOLD_S), + pages.percent(), cache->isJIT() ? GREEN_BOLD_S "+" : RED_BOLD_S "-", Chrono::steadyMSecs() - ts ); @@ -271,21 +287,15 @@ private: void printAllocStatus(uint64_t ts) { - size_t memory = m_cache->size(); - auto pages = hugePages(); - const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; - - for (auto const &item : m_datasets) { - memory += item.second->size(false); - } + auto pages = hugePages(); LOG_INFO("%s" CYAN_BOLD("-- ") GREEN_BOLD("allocated") CYAN_BOLD(" %4zu MB") " huge pages %s%3.0f%% %u/%u" CLEAR BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), - memory / oneMiB, - (pages.first == pages.second ? GREEN_BOLD_S : (pages.first == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), - percent, - pages.first, - pages.second, + pages.size / oneMiB, + (pages.isFullyAllocated() ? GREEN_BOLD_S : (pages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), + pages.percent(), + pages.allocated, + pages.total, Chrono::steadyMSecs() - ts ); } @@ -326,6 +336,16 @@ xmrig::RxNUMAStorage::~RxNUMAStorage() } +xmrig::HugePagesInfo xmrig::RxNUMAStorage::hugePages() const +{ + if (!d_ptr->isAllocated()) { + return {}; + } + + return d_ptr->hugePages(); +} + + xmrig::RxDataset *xmrig::RxNUMAStorage::dataset(const Job &job, uint32_t nodeId) const { if (!d_ptr->isReady(job)) { @@ -336,16 +356,6 @@ xmrig::RxDataset *xmrig::RxNUMAStorage::dataset(const Job &job, uint32_t nodeId) } -std::pair xmrig::RxNUMAStorage::hugePages() const -{ - if (!d_ptr->isAllocated()) { - return { 0U, 0U }; - } - - return d_ptr->hugePages(); -} - - void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode, int priority) { d_ptr->setSeed(seed); diff --git a/src/crypto/rx/RxNUMAStorage.h b/src/crypto/rx/RxNUMAStorage.h index e7ea842fb..80626c9a6 100644 --- a/src/crypto/rx/RxNUMAStorage.h +++ b/src/crypto/rx/RxNUMAStorage.h @@ -51,8 +51,8 @@ public: ~RxNUMAStorage() override; protected: + HugePagesInfo hugePages() const override; RxDataset *dataset(const Job &job, uint32_t nodeId) const override; - std::pair hugePages() const override; void init(const RxSeed &seed, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority) override; private: diff --git a/src/crypto/rx/RxQueue.cpp b/src/crypto/rx/RxQueue.cpp index 8ca4a3791..b724e6af2 100644 --- a/src/crypto/rx/RxQueue.cpp +++ b/src/crypto/rx/RxQueue.cpp @@ -86,11 +86,11 @@ xmrig::RxDataset *xmrig::RxQueue::dataset(const Job &job, uint32_t nodeId) } -std::pair xmrig::RxQueue::hugePages() +xmrig::HugePagesInfo xmrig::RxQueue::hugePages() { std::lock_guard lock(m_mutex); - return m_storage && m_state == STATE_IDLE ? m_storage->hugePages() : std::pair(0U, 0U); + return m_storage && m_state == STATE_IDLE ? m_storage->hugePages() : HugePagesInfo(); } diff --git a/src/crypto/rx/RxQueue.h b/src/crypto/rx/RxQueue.h index 8a362b8ed..c83ae6d9a 100644 --- a/src/crypto/rx/RxQueue.h +++ b/src/crypto/rx/RxQueue.h @@ -29,6 +29,7 @@ #include "base/tools/Object.h" +#include "crypto/common/HugePagesInfo.h" #include "crypto/rx/RxConfig.h" #include "crypto/rx/RxSeed.h" @@ -83,7 +84,7 @@ public: bool isReady(const Job &job); RxDataset *dataset(const Job &job, uint32_t nodeId); - std::pair hugePages(); + HugePagesInfo hugePages(); void enqueue(const RxSeed &seed, const std::vector &nodeset, uint32_t threads, bool hugePages, bool oneGbPages, RxConfig::Mode mode, int priority); private: diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp index 26f169521..c9c699f91 100644 --- a/src/net/JobResults.cpp +++ b/src/net/JobResults.cpp @@ -105,7 +105,7 @@ static inline void checkHash(const JobBundle &bundle, std::vector &re static void getResults(JobBundle &bundle, std::vector &results, uint32_t &errors, bool hwAES) { const auto &algorithm = bundle.job.algorithm(); - auto memory = new VirtualMemory(algorithm.l3(), false, false); + auto memory = new VirtualMemory(algorithm.l3(), false, false, false); uint8_t hash[32]{ 0 }; if (algorithm.family() == Algorithm::RANDOM_X) { From d6582de09bec4f7f9add54526cacb0515a3b83df Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Dec 2019 23:23:03 +0700 Subject: [PATCH 20/31] v5.2.0-dev --- src/version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/version.h b/src/version.h index 2cb9fef43..8b761e10c 100644 --- a/src/version.h +++ b/src/version.h @@ -28,15 +28,15 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "5.1.2-dev" +#define APP_VERSION "5.2.0-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" #define APP_KIND "miner" #define APP_VER_MAJOR 5 -#define APP_VER_MINOR 1 -#define APP_VER_PATCH 2 +#define APP_VER_MINOR 2 +#define APP_VER_PATCH 0 #ifdef _MSC_VER # if (_MSC_VER >= 1920) From 558c524e2a65590659efd7ef7dedf1726fd393d7 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 9 Dec 2019 01:07:42 +0700 Subject: [PATCH 21/31] Added missing Cpu::release call. --- src/App.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/App.cpp b/src/App.cpp index 3acccc5df..17060f967 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -50,6 +50,8 @@ xmrig::App::App(Process *process) xmrig::App::~App() { + Cpu::release(); + delete m_signals; delete m_console; delete m_controller; From 3edaebb4cf1593b16562c630a76372db1bc8c2bd Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 9 Dec 2019 21:42:40 +0700 Subject: [PATCH 22/31] Move "1gb-pages" option to "randomx" object. --- src/Summary.cpp | 15 +++++++++++++-- src/backend/cpu/CpuConfig.cpp | 3 --- src/backend/cpu/CpuConfig.h | 2 -- src/config.json | 2 +- src/crypto/rx/Rx.cpp | 2 +- src/crypto/rx/RxConfig.h | 10 ++++++---- src/crypto/rx/RxConfig_basic.cpp | 18 ++++++++++++------ src/crypto/rx/RxConfig_hwloc.cpp | 20 +++++++++++++------- 8 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/Summary.cpp b/src/Summary.cpp index 020eca61f..695138d12 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -39,6 +39,11 @@ #include "version.h" +#ifdef XMRIG_ALGO_RANDOMX +# include "crypto/rx/RxConfig.h" +#endif + + namespace xmrig { @@ -64,11 +69,17 @@ static void print_memory(Config *config) # ifdef XMRIG_OS_WIN Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled")); - Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable")); # else Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled")); +# endif + +# ifdef XMRIG_ALGO_RANDOMX +# ifdef XMRIG_OS_LINUX Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->cpu().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable"))); + "1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable"))); +# else + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable")); +# endif # endif } diff --git a/src/backend/cpu/CpuConfig.cpp b/src/backend/cpu/CpuConfig.cpp index fb95f942e..a9e10338c 100644 --- a/src/backend/cpu/CpuConfig.cpp +++ b/src/backend/cpu/CpuConfig.cpp @@ -34,7 +34,6 @@ namespace xmrig { static const char *kEnabled = "enabled"; static const char *kHugePages = "huge-pages"; -static const char *kOneGbPages = "1gb-pages"; static const char *kHwAes = "hw-aes"; static const char *kMaxThreadsHint = "max-threads-hint"; static const char *kMemoryPool = "memory-pool"; @@ -69,7 +68,6 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kEnabled), m_enabled, allocator); obj.AddMember(StringRef(kHugePages), m_hugePages, allocator); - obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); obj.AddMember(StringRef(kHwAes), m_aes == AES_AUTO ? Value(kNullType) : Value(m_aes == AES_HW), allocator); obj.AddMember(StringRef(kPriority), priority() != -1 ? Value(priority()) : Value(kNullType), allocator); obj.AddMember(StringRef(kMemoryPool), m_memoryPool < 1 ? Value(m_memoryPool < 0) : Value(m_memoryPool), allocator); @@ -123,7 +121,6 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value) if (value.IsObject()) { m_enabled = Json::getBool(value, kEnabled, m_enabled); m_hugePages = Json::getBool(value, kHugePages, m_hugePages); - m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); m_limit = Json::getUint(value, kMaxThreadsHint, m_limit); m_yield = Json::getBool(value, kYield, m_yield); diff --git a/src/backend/cpu/CpuConfig.h b/src/backend/cpu/CpuConfig.h index 035a1673d..a9c408069 100644 --- a/src/backend/cpu/CpuConfig.h +++ b/src/backend/cpu/CpuConfig.h @@ -54,7 +54,6 @@ public: inline bool isEnabled() const { return m_enabled; } inline bool isHugePages() const { return m_hugePages; } - inline bool isOneGbPages() const { return m_oneGbPages; } inline bool isShouldSave() const { return m_shouldSave; } inline bool isYield() const { return m_yield; } inline const Assembly &assembly() const { return m_assembly; } @@ -74,7 +73,6 @@ private: Assembly m_assembly; bool m_enabled = true; bool m_hugePages = true; - bool m_oneGbPages = false; bool m_shouldSave = false; bool m_yield = true; int m_memoryPool = 0; diff --git a/src/config.json b/src/config.json index dd64f407d..ea2e4e15d 100644 --- a/src/config.json +++ b/src/config.json @@ -17,12 +17,12 @@ "randomx": { "init": -1, "mode": "auto", + "1gb-pages": false, "numa": true }, "cpu": { "enabled": true, "huge-pages": true, - "1gb-pages": false, "hw-aes": null, "priority": null, "memory-pool": false, diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 369b0aacf..c311e5331 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -71,7 +71,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp return true; } - d_ptr->queue.enqueue(job, config.nodeset(), config.threads(cpu.limit()), cpu.isHugePages(), cpu.isOneGbPages(), config.mode(), cpu.priority()); + d_ptr->queue.enqueue(job, config.nodeset(), config.threads(cpu.limit()), cpu.isHugePages(), config.isOneGbPages(), config.mode(), cpu.priority()); return false; } diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index e3381fec6..90beb4e82 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -57,14 +57,16 @@ public: const char *modeName() const; uint32_t threads(uint32_t limit = 100) const; - inline Mode mode() const { return m_mode; } + inline bool isOneGbPages() const { return m_oneGbPages; } + inline Mode mode() const { return m_mode; } private: Mode readMode(const rapidjson::Value &value) const; - bool m_numa = true; - int m_threads = -1; - Mode m_mode = AutoMode; + bool m_numa = true; + bool m_oneGbPages = false; + int m_threads = -1; + Mode m_mode = AutoMode; # ifdef XMRIG_FEATURE_HWLOC std::vector m_nodeset; diff --git a/src/crypto/rx/RxConfig_basic.cpp b/src/crypto/rx/RxConfig_basic.cpp index 1311b2206..bf2a2c8f3 100644 --- a/src/crypto/rx/RxConfig_basic.cpp +++ b/src/crypto/rx/RxConfig_basic.cpp @@ -30,8 +30,9 @@ namespace xmrig { -static const char *kInit = "init"; -static const char *kMode = "mode"; +static const char *kInit = "init"; +static const char *kMode = "mode"; +static const char *kOneGbPages = "1gb-pages"; } @@ -42,8 +43,9 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const auto &allocator = doc.GetAllocator(); Value obj(kObjectType); - obj.AddMember(StringRef(kInit), m_threads, allocator); - obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); + obj.AddMember(StringRef(kInit), m_threads, allocator); + obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); + obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); return obj; } @@ -52,8 +54,12 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const bool xmrig::RxConfig::read(const rapidjson::Value &value) { if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); + m_threads = Json::getInt(value, kInit, m_threads); + m_mode = readMode(Json::getValue(value, kMode)); + +# ifdef XMRIG_OS_LINUX + m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); +# endif return true; } diff --git a/src/crypto/rx/RxConfig_hwloc.cpp b/src/crypto/rx/RxConfig_hwloc.cpp index 4fc57fb60..91104ef4e 100644 --- a/src/crypto/rx/RxConfig_hwloc.cpp +++ b/src/crypto/rx/RxConfig_hwloc.cpp @@ -32,9 +32,10 @@ namespace xmrig { -static const char *kInit = "init"; -static const char *kMode = "mode"; -static const char *kNUMA = "numa"; +static const char *kInit = "init"; +static const char *kMode = "mode"; +static const char *kNUMA = "numa"; +static const char *kOneGbPages = "1gb-pages"; } @@ -46,8 +47,9 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const Value obj(kObjectType); - obj.AddMember(StringRef(kInit), m_threads, allocator); - obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); + obj.AddMember(StringRef(kInit), m_threads, allocator); + obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); + obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); if (!m_nodeset.empty()) { Value numa(kArrayType); @@ -69,8 +71,12 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const bool xmrig::RxConfig::read(const rapidjson::Value &value) { if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); + m_threads = Json::getInt(value, kInit, m_threads); + m_mode = readMode(Json::getValue(value, kMode)); + +# ifdef XMRIG_OS_LINUX + m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); +# endif if (m_mode == LightMode) { m_numa = false; From 9bc13813ba2fb12436413fefa1fc1ba36c78cc19 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 9 Dec 2019 18:59:49 +0100 Subject: [PATCH 23/31] Fixed assembly selection for RandomX when it's on Auto --- src/crypto/rx/RxVm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index 8cdad3710..486d83c2e 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -45,7 +45,7 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig:: m_flags |= RANDOMX_FLAG_JIT; } - if (assembly == Assembly::RYZEN) { + if ((assembly == Assembly::RYZEN) || ((assembly == Assembly::AUTO) && (Cpu::info()->assembly() == Assembly::RYZEN))) { m_flags |= RANDOMX_FLAG_RYZEN; } From 763691fa4b7777a1798c9954fe0e3268f78e7d2d Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 9 Dec 2019 20:29:05 +0100 Subject: [PATCH 24/31] More optimizations for Ryzen --- .../randomx/asm/program_read_dataset_ryzen.inc | 15 +++++++-------- src/crypto/randomx/jit_compiler_x86.cpp | 10 +++++++++- src/crypto/randomx/jit_compiler_x86.hpp | 1 + src/crypto/randomx/jit_compiler_x86_static.asm | 1 + src/crypto/randomx/randomx.h | 4 ++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc index 37e2104bf..6bb87c8f9 100644 --- a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc +++ b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc @@ -7,13 +7,12 @@ and edx, RANDOMX_DATASET_BASE_MASK prefetchnta byte ptr [rdi+rdx] ror rbp, 32 ;# swap "ma" and "mx" - add rcx, rdi ;# dataset cache line xor r8, rax - xor r9, qword ptr [rcx+8] - xor r10, qword ptr [rcx+16] - xor r11, qword ptr [rcx+24] - xor r12, qword ptr [rcx+32] - xor r13, qword ptr [rcx+40] - xor r14, qword ptr [rcx+48] - xor r15, qword ptr [rcx+56] + xor r9, qword ptr [rdi+rcx+8] + xor r10, qword ptr [rdi+rcx+16] + xor r11, qword ptr [rdi+rcx+24] + xor r12, qword ptr [rdi+rcx+32] + xor r13, qword ptr [rdi+rcx+40] + xor r14, qword ptr [rdi+rcx+48] + xor r15, qword ptr [rdi+rcx+56] \ No newline at end of file diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 2528c2cf3..84cfe39c7 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -169,6 +169,7 @@ namespace randomx { static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e }; static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC }; + static const uint8_t AND_OR_MOV_LDMXCSR_RYZEN[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x3B, 0x44, 0x24, 0xFC, 0x74, 0x09, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC }; static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; @@ -300,6 +301,8 @@ namespace randomx { } void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { + vm_flags = flags; + generateProgramPrologue(prog, pcfg); uint8_t* p; @@ -1010,7 +1013,12 @@ namespace randomx { emit(ROL_RAX, p, pos); emitByte(rotate, p, pos); } - emit(AND_OR_MOV_LDMXCSR, p, pos); + if (vm_flags & RANDOMX_FLAG_RYZEN) { + emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos); + } + else { + emit(AND_OR_MOV_LDMXCSR, p, pos); + } codePos = pos; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 0d515b0eb..9354e5dbc 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -70,6 +70,7 @@ namespace randomx { uint8_t* allocatedCode; uint8_t* code; int32_t codePos; + uint32_t vm_flags; static bool BranchesWithin32B; diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 7dd1232d6..90395c522 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -81,6 +81,7 @@ randomx_program_prologue_first_load PROC and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK + stmxcsr dword ptr [rsp-20] jmp randomx_program_loop_begin randomx_program_prologue_first_load ENDP diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 923142299..1ed5aa536 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -119,9 +119,9 @@ struct RandomX_ConfigurationBase rx_vec_i128 fillAes4Rx4_Key[8]; uint8_t codeShhPrefetchTweaked[20]; - uint8_t codeReadDatasetTweaked[72]; + uint8_t codeReadDatasetTweaked[256]; uint32_t codeReadDatasetTweakedSize; - uint8_t codeReadDatasetRyzenTweaked[72]; + uint8_t codeReadDatasetRyzenTweaked[256]; uint32_t codeReadDatasetRyzenTweakedSize; uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codePrefetchScratchpadTweaked[32]; From ef522f6404051e7f27456b21c49da835594a26d7 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 9 Dec 2019 20:30:37 +0100 Subject: [PATCH 25/31] Update jit_compiler_x86_static.S --- src/crypto/randomx/jit_compiler_x86_static.S | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index 8e1f9ef63..50019b7e5 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -93,6 +93,7 @@ DECL(randomx_program_prologue_first_load): and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK + stmxcsr dword ptr [rsp-20] jmp DECL(randomx_program_loop_begin) .balign 64 From f3f75fb788d53854a094005738cecfbca45a9b89 Mon Sep 17 00:00:00 2001 From: xmrig Date: Tue, 10 Dec 2019 11:19:11 +0700 Subject: [PATCH 26/31] Update CHANGELOG.md --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f43e11974..e913a24ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +# v5.2.0 +- **[#1388](https://github.com/xmrig/xmrig/pull/1388) Added 1GB huge pages support for Linux.** + - Added new option `1gb-pages` in `randomx` object. + - Added automatic huge pages configuration on Linux if use the miner with root privileges. + - Added new [hugepages documentation](https://xmrig.com/docs/miner/hugepages) article. +- [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs. +- [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads. +- [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads. +- For official builds all dependencies (libuv, hwloc, openssl) updated to recent versions. +- Windows `msvc` builds now use Visual Studio 2019 instead of 2017. + # v5.1.1 - [#1365](https://github.com/xmrig/xmrig/issues/1365) Fixed various system response/stability issues. - Added new CPU option `yield` and command line equivalent `--cpu-no-yield`. From 6163d27f145817c7215361125a088a943cb1b646 Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 10 Dec 2019 11:56:31 +0700 Subject: [PATCH 27/31] Added command line option --randomx-1gb-pages --- README.md | 1 + src/base/kernel/interfaces/IConfig.h | 1 + src/core/config/ConfigTransform.cpp | 3 +++ src/core/config/Config_platform.h | 2 ++ src/core/config/usage.h | 1 + 5 files changed, 8 insertions(+) diff --git a/README.md b/README.md index 7563ff2d9..ce2232610 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ CPU backend: --randomx-init=N threads count to initialize RandomX dataset --randomx-no-numa disable NUMA support for RandomX --randomx-mode=MODE RandomX mode: auto, fast, light + --randomx-1gb-pages use 1GB hugepages for dataset (Linux only) API: --api-worker-id=ID custom worker-id for API diff --git a/src/base/kernel/interfaces/IConfig.h b/src/base/kernel/interfaces/IConfig.h index dbbf82cd9..94b5f90a3 100644 --- a/src/base/kernel/interfaces/IConfig.h +++ b/src/base/kernel/interfaces/IConfig.h @@ -90,6 +90,7 @@ public: RandomXInitKey = 1022, RandomXNumaKey = 1023, RandomXModeKey = 1029, + RandomX1GbPagesKey = 1031, CPUMaxThreadsKey = 1026, MemoryPoolKey = 1027, YieldKey = 1030, diff --git a/src/core/config/ConfigTransform.cpp b/src/core/config/ConfigTransform.cpp index b9f666789..20a885bac 100644 --- a/src/core/config/ConfigTransform.cpp +++ b/src/core/config/ConfigTransform.cpp @@ -165,6 +165,9 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const case IConfig::RandomXModeKey: /* --randomx-mode */ return set(doc, kRandomX, "mode", arg); + + case IConfig::RandomX1GbPagesKey: /* --randomx-1gb-pages */ + return set(doc, kRandomX, "1gb-pages", true); # endif # ifdef XMRIG_FEATURE_OPENCL diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index a866c128c..2a606501c 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -98,6 +98,8 @@ static const option options[] = { { "randomx-init", 1, nullptr, IConfig::RandomXInitKey }, { "randomx-no-numa", 0, nullptr, IConfig::RandomXNumaKey }, { "randomx-mode", 1, nullptr, IConfig::RandomXModeKey }, + { "randomx-1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey }, + { "1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey }, # endif # ifdef XMRIG_FEATURE_OPENCL { "opencl", 0, nullptr, IConfig::OclKey }, diff --git a/src/core/config/usage.h b/src/core/config/usage.h index 38a487010..035261485 100644 --- a/src/core/config/usage.h +++ b/src/core/config/usage.h @@ -88,6 +88,7 @@ static inline const std::string &usage() u += " --randomx-init=N threads count to initialize RandomX dataset\n"; u += " --randomx-no-numa disable NUMA support for RandomX\n"; u += " --randomx-mode=MODE RandomX mode: auto, fast, light\n"; + u += " --randomx-1gb-pages use 1GB hugepages for dataset (Linux only)\n"; # endif # ifdef XMRIG_FEATURE_HTTP From 3b4b230cabc0c7df79be0fe9fb9ada3cf645f196 Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 10 Dec 2019 12:49:42 +0700 Subject: [PATCH 28/31] Added CPU vendor enum. --- src/backend/cpu/interfaces/ICpuInfo.h | 7 +++++++ src/backend/cpu/platform/AdvancedCpuInfo.cpp | 15 +++++++++++---- src/backend/cpu/platform/AdvancedCpuInfo.h | 2 ++ src/backend/cpu/platform/BasicCpuInfo.cpp | 7 ++++--- src/backend/cpu/platform/BasicCpuInfo.h | 12 +++++++----- src/backend/cpu/platform/BasicCpuInfo_arm.cpp | 8 ++------ 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index 2ffd00f2c..674668b59 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -37,6 +37,12 @@ namespace xmrig { class ICpuInfo { public: + enum Vendor { + VENDOR_UNKNOWN, + VENDOR_INTEL, + VENDOR_AMD + }; + virtual ~ICpuInfo() = default; # if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) @@ -58,6 +64,7 @@ public: virtual size_t nodes() const = 0; virtual size_t packages() const = 0; virtual size_t threads() const = 0; + virtual Vendor vendor() const = 0; }; diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp index 37a958dbc..4a3c6f62e 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp @@ -103,6 +103,13 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : cpu_brand_string(m_brand, data.brand_str); snprintf(m_backend, sizeof m_backend, "libcpuid/%s", cpuid_lib_version()); + if (data.vendor == ::VENDOR_INTEL) { + m_vendor = VENDOR_INTEL; + } + else if (data.vendor == ::VENDOR_AMD) { + m_vendor = VENDOR_AMD; + } + m_threads = static_cast(data.total_logical_cpus); m_packages = std::max(threads() / static_cast(data.num_logical_cpus), 1); m_cores = static_cast(data.num_cores) * m_packages; @@ -111,13 +118,13 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : const auto l2 = static_cast(data.l2_cache); // Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 - if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { + if (m_vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { m_L2 = l2 * (cores() / 2) * m_packages; m_L2_exclusive = true; } // Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue // These processors have L2 cache shared by 2 cores. - else if (data.vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) { + else if (m_vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) { size_t l2_count_per_socket = cores() > 1 ? cores() / 2 : 1; m_L2 = data.l2_cache > 0 ? l2 * l2_count_per_socket * m_packages : 0; } @@ -131,10 +138,10 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : if (data.flags[CPU_FEATURE_AES]) { m_aes = true; - if (data.vendor == VENDOR_AMD) { + if (m_vendor == VENDOR_AMD) { m_assembly = (data.ext_family >= 23) ? Assembly::RYZEN : Assembly::BULLDOZER; } - else if (data.vendor == VENDOR_INTEL) { + else if (m_vendor == VENDOR_INTEL) { m_assembly = Assembly::INTEL; } } diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h index f590ccfb3..f6691b8f7 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.h +++ b/src/backend/cpu/platform/AdvancedCpuInfo.h @@ -52,6 +52,7 @@ protected: inline size_t nodes() const override { return 0; } inline size_t packages() const override { return m_packages; } inline size_t threads() const override { return m_threads; } + inline Vendor vendor() const override { return m_vendor; } private: Assembly m_assembly; @@ -66,6 +67,7 @@ private: size_t m_L3 = 0; size_t m_packages = 1; size_t m_threads = 0; + Vendor m_vendor = VENDOR_UNKNOWN; }; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index e2ad19eac..b586fad03 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -151,9 +151,7 @@ static inline bool has_pdpe1gb() xmrig::BasicCpuInfo::BasicCpuInfo() : - m_brand(), m_threads(std::thread::hardware_concurrency()), - m_assembly(Assembly::NONE), m_aes(has_aes_ni()), m_avx2(has_avx2()), m_pdpe1gb(has_pdpe1gb()) @@ -172,12 +170,15 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : memcpy(vendor + 8, &data[2], 4); if (memcmp(vendor, "AuthenticAMD", 12) == 0) { + m_vendor = VENDOR_AMD; + cpuid(PROCESSOR_INFO, data); const int32_t family = get_masked(data[EAX_Reg], 12, 8) + get_masked(data[EAX_Reg], 28, 20); m_assembly = family >= 23 ? Assembly::RYZEN : Assembly::BULLDOZER; } - else { + else if (memcmp(vendor, "GenuineIntel", 12) == 0) { + m_vendor = VENDOR_INTEL; m_assembly = Assembly::INTEL; } } diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index ecbd3e237..019c1dc04 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -52,16 +52,18 @@ protected: inline size_t nodes() const override { return 0; } inline size_t packages() const override { return 1; } inline size_t threads() const override { return m_threads; } + inline Vendor vendor() const override { return m_vendor; } protected: - char m_brand[64 + 6]; + char m_brand[64 + 6]{}; size_t m_threads; private: - Assembly m_assembly; - bool m_aes; - const bool m_avx2; - const bool m_pdpe1gb; + Assembly m_assembly = Assembly::NONE; + bool m_aes = false; + const bool m_avx2 = false; + const bool m_pdpe1gb = false; + Vendor m_vendor = VENDOR_UNKNOWN; }; diff --git a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp index 5dd54fe92..00f5f01f3 100644 --- a/src/backend/cpu/platform/BasicCpuInfo_arm.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo_arm.cpp @@ -22,7 +22,7 @@ * along with this program. If not, see . */ -#include +#include #include @@ -36,11 +36,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : - m_brand(), - m_threads(std::thread::hardware_concurrency()), - m_aes(false), - m_avx2(false), - m_pdpe1gb(false) + m_threads(std::thread::hardware_concurrency()) { # ifdef XMRIG_ARMv8 memcpy(m_brand, "ARMv8", 5); From 96cfdda9a1cd355aa8db19350ae05a6ab8302f7b Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 10 Dec 2019 23:57:29 +0700 Subject: [PATCH 29/31] Added RandomX option "wrmsr" with command line equivalent --randomx-wrmsr=N. --- README.md | 1 + cmake/randomx.cmake | 9 +-- src/base/kernel/interfaces/IConfig.h | 1 + src/config.json | 2 +- src/core/config/ConfigTransform.cpp | 3 + src/core/config/Config_platform.h | 1 + src/core/config/usage.h | 1 + src/crypto/rx/Rx.cpp | 17 +++- src/crypto/rx/Rx.h | 3 + src/crypto/rx/RxConfig.cpp | 111 +++++++++++++++++++++++++- src/crypto/rx/RxConfig.h | 3 + src/crypto/rx/RxConfig_basic.cpp | 68 ---------------- src/crypto/rx/RxConfig_hwloc.cpp | 115 --------------------------- src/crypto/rx/Rx_linux.cpp | 114 ++++++++++++++++++++++++++ 14 files changed, 257 insertions(+), 192 deletions(-) delete mode 100644 src/crypto/rx/RxConfig_basic.cpp delete mode 100644 src/crypto/rx/RxConfig_hwloc.cpp create mode 100644 src/crypto/rx/Rx_linux.cpp diff --git a/README.md b/README.md index ce2232610..fdbf0f59a 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ CPU backend: --randomx-no-numa disable NUMA support for RandomX --randomx-mode=MODE RandomX mode: auto, fast, light --randomx-1gb-pages use 1GB hugepages for dataset (Linux only) + --randomx-wrmsr=N write value (0-15) to Intel MSR register 0x1a4 or do nothing (-1) (Linux only) API: --api-worker-id=ID custom worker-id for API diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake index 290b8391f..5a225c00e 100644 --- a/cmake/randomx.cmake +++ b/cmake/randomx.cmake @@ -75,13 +75,12 @@ if (WITH_RANDOMX) ) list(APPEND SOURCES_CRYPTO - src/crypto/rx/RxConfig_hwloc.cpp src/crypto/rx/RxNUMAStorage.cpp ) - else() - list(APPEND SOURCES_CRYPTO - src/crypto/rx/RxConfig_basic.cpp - ) + endif() + + if (XMRIG_OS_LINUX) + list(APPEND SOURCES_CRYPTO src/crypto/rx/Rx_linux.cpp) endif() else() remove_definitions(/DXMRIG_ALGO_RANDOMX) diff --git a/src/base/kernel/interfaces/IConfig.h b/src/base/kernel/interfaces/IConfig.h index 94b5f90a3..fe1a24b53 100644 --- a/src/base/kernel/interfaces/IConfig.h +++ b/src/base/kernel/interfaces/IConfig.h @@ -91,6 +91,7 @@ public: RandomXNumaKey = 1023, RandomXModeKey = 1029, RandomX1GbPagesKey = 1031, + RandomXWrmsrKey = 1032, CPUMaxThreadsKey = 1026, MemoryPoolKey = 1027, YieldKey = 1030, diff --git a/src/config.json b/src/config.json index ea2e4e15d..52afb4cb9 100644 --- a/src/config.json +++ b/src/config.json @@ -11,13 +11,13 @@ "restricted": true }, "autosave": true, - "version": 1, "background": false, "colors": true, "randomx": { "init": -1, "mode": "auto", "1gb-pages": false, + "wrmsr": 6, "numa": true }, "cpu": { diff --git a/src/core/config/ConfigTransform.cpp b/src/core/config/ConfigTransform.cpp index 20a885bac..bc1ce7fca 100644 --- a/src/core/config/ConfigTransform.cpp +++ b/src/core/config/ConfigTransform.cpp @@ -168,6 +168,9 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const case IConfig::RandomX1GbPagesKey: /* --randomx-1gb-pages */ return set(doc, kRandomX, "1gb-pages", true); + + case IConfig::RandomXWrmsrKey: /* --randomx-wrmsr */ + return set(doc, kRandomX, "wrmsr", static_cast(strtol(arg, nullptr, 10))); # endif # ifdef XMRIG_FEATURE_OPENCL diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index 2a606501c..9b360b23a 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -100,6 +100,7 @@ static const option options[] = { { "randomx-mode", 1, nullptr, IConfig::RandomXModeKey }, { "randomx-1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey }, { "1gb-pages", 0, nullptr, IConfig::RandomX1GbPagesKey }, + { "randomx-wrmsr", 1, nullptr, IConfig::RandomXWrmsrKey }, # endif # ifdef XMRIG_FEATURE_OPENCL { "opencl", 0, nullptr, IConfig::OclKey }, diff --git a/src/core/config/usage.h b/src/core/config/usage.h index 035261485..27edee3d2 100644 --- a/src/core/config/usage.h +++ b/src/core/config/usage.h @@ -89,6 +89,7 @@ static inline const std::string &usage() u += " --randomx-no-numa disable NUMA support for RandomX\n"; u += " --randomx-mode=MODE RandomX mode: auto, fast, light\n"; u += " --randomx-1gb-pages use 1GB hugepages for dataset (Linux only)\n"; + u += " --randomx-wrmsr=N write value (0-15) to Intel MSR register 0x1a4 or do nothing (-1) (Linux only)\n"; # endif # ifdef XMRIG_FEATURE_HTTP diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index c311e5331..1de648c20 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -39,8 +39,9 @@ namespace xmrig { class RxPrivate; -static const char *tag = BLUE_BG(WHITE_BOLD_S " rx ") " "; -static RxPrivate *d_ptr = nullptr; +static bool osInitialized = false; +static const char *tag = BLUE_BG(WHITE_BOLD_S " rx ") " "; +static RxPrivate *d_ptr = nullptr; class RxPrivate @@ -71,6 +72,11 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp return true; } + if (!osInitialized) { + osInit(config); + osInitialized = true; + } + d_ptr->queue.enqueue(job, config.nodeset(), config.threads(cpu.limit()), cpu.isHugePages(), config.isOneGbPages(), config.mode(), cpu.priority()); return false; @@ -107,3 +113,10 @@ void xmrig::Rx::init(IRxListener *listener) { d_ptr = new RxPrivate(listener); } + + +#ifndef XMRIG_OS_LINUX +void xmrig::Rx::osInit(const RxConfig &) +{ +} +#endif diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index d26304451..1a289b052 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -56,6 +56,9 @@ public: static RxDataset *dataset(const Job &job, uint32_t nodeId); static void destroy(); static void init(IRxListener *listener); + +private: + static void osInit(const RxConfig &config); }; diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index 7ae7d35d8..cd8601a7d 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -25,9 +25,15 @@ #include "crypto/rx/RxConfig.h" #include "backend/cpu/Cpu.h" +#include "base/io/json/Json.h" #include "rapidjson/document.h" +#ifdef XMRIG_FEATURE_HWLOC +# include "backend/cpu/platform/HwlocCpuInfo.h" +#endif + + #include #include #include @@ -40,11 +46,100 @@ namespace xmrig { +static const char *kInit = "init"; +static const char *kMode = "mode"; +static const char *kOneGbPages = "1gb-pages"; +static const char *kWrmsr = "wrmsr"; + +#ifdef XMRIG_FEATURE_HWLOC +static const char *kNUMA = "numa"; +#endif static const std::array modeNames = { "auto", "fast", "light" }; +} -} // namespace xmrig + +bool xmrig::RxConfig::read(const rapidjson::Value &value) +{ + if (value.IsObject()) { + m_threads = Json::getInt(value, kInit, m_threads); + m_mode = readMode(Json::getValue(value, kMode)); + m_wrmsr = readMSR(Json::getValue(value, kWrmsr)); + +# ifdef XMRIG_OS_LINUX + m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); +# endif + +# ifdef XMRIG_FEATURE_HWLOC + if (m_mode == LightMode) { + m_numa = false; + + return true; + } + + const auto &numa = Json::getValue(value, kNUMA); + if (numa.IsArray()) { + m_nodeset.reserve(numa.Size()); + + for (const auto &node : numa.GetArray()) { + if (node.IsUint()) { + m_nodeset.emplace_back(node.GetUint()); + } + } + } + else if (numa.IsBool()) { + m_numa = numa.GetBool(); + } +# endif + + return true; + } + + return false; +} + + +rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const +{ + using namespace rapidjson; + auto &allocator = doc.GetAllocator(); + + Value obj(kObjectType); + obj.AddMember(StringRef(kInit), m_threads, allocator); + obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); + obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); + obj.AddMember(StringRef(kWrmsr), m_wrmsr < 0 ? Value(kFalseType) : Value(m_wrmsr), allocator); + +# ifdef XMRIG_FEATURE_HWLOC + if (!m_nodeset.empty()) { + Value numa(kArrayType); + + for (uint32_t i : m_nodeset) { + numa.PushBack(i, allocator); + } + + obj.AddMember(StringRef(kNUMA), numa, allocator); + } + else { + obj.AddMember(StringRef(kNUMA), m_numa, allocator); + } +# endif + + return obj; +} + + +#ifdef XMRIG_FEATURE_HWLOC +std::vector xmrig::RxConfig::nodeset() const +{ + if (!m_nodeset.empty()) { + return m_nodeset; + } + + return (m_numa && Cpu::info()->nodes() > 1) ? static_cast(Cpu::info())->nodeset() : std::vector(); +} +#endif const char *xmrig::RxConfig::modeName() const @@ -67,6 +162,20 @@ uint32_t xmrig::RxConfig::threads(uint32_t limit) const } +int xmrig::RxConfig::readMSR(const rapidjson::Value &value) const +{ + if (value.IsInt()) { + return std::min(value.GetInt(), 15); + } + + if (value.IsBool() && !value.GetBool()) { + return -1; + } + + return m_wrmsr; +} + + xmrig::RxConfig::Mode xmrig::RxConfig::readMode(const rapidjson::Value &value) const { if (value.IsUint()) { diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index 90beb4e82..bf5992a32 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -58,14 +58,17 @@ public: uint32_t threads(uint32_t limit = 100) const; inline bool isOneGbPages() const { return m_oneGbPages; } + inline int wrmsr() const { return m_wrmsr; } inline Mode mode() const { return m_mode; } private: + int readMSR(const rapidjson::Value &value) const; Mode readMode(const rapidjson::Value &value) const; bool m_numa = true; bool m_oneGbPages = false; int m_threads = -1; + int m_wrmsr = 6; Mode m_mode = AutoMode; # ifdef XMRIG_FEATURE_HWLOC diff --git a/src/crypto/rx/RxConfig_basic.cpp b/src/crypto/rx/RxConfig_basic.cpp deleted file mode 100644 index bf2a2c8f3..000000000 --- a/src/crypto/rx/RxConfig_basic.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "crypto/rx/RxConfig.h" -#include "base/io/json/Json.h" -#include "rapidjson/document.h" - - -namespace xmrig { - -static const char *kInit = "init"; -static const char *kMode = "mode"; -static const char *kOneGbPages = "1gb-pages"; - -} - - -rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const -{ - using namespace rapidjson; - auto &allocator = doc.GetAllocator(); - - Value obj(kObjectType); - obj.AddMember(StringRef(kInit), m_threads, allocator); - obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); - obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); - - return obj; -} - - -bool xmrig::RxConfig::read(const rapidjson::Value &value) -{ - if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); - -# ifdef XMRIG_OS_LINUX - m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); -# endif - - return true; - } - - return false; -} diff --git a/src/crypto/rx/RxConfig_hwloc.cpp b/src/crypto/rx/RxConfig_hwloc.cpp deleted file mode 100644 index 91104ef4e..000000000 --- a/src/crypto/rx/RxConfig_hwloc.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "backend/cpu/Cpu.h" -#include "backend/cpu/platform/HwlocCpuInfo.h" -#include "base/io/json/Json.h" -#include "crypto/rx/RxConfig.h" -#include "rapidjson/document.h" - - -namespace xmrig { - -static const char *kInit = "init"; -static const char *kMode = "mode"; -static const char *kNUMA = "numa"; -static const char *kOneGbPages = "1gb-pages"; - -} - - -rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const -{ - using namespace rapidjson; - auto &allocator = doc.GetAllocator(); - - Value obj(kObjectType); - - obj.AddMember(StringRef(kInit), m_threads, allocator); - obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); - obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); - - if (!m_nodeset.empty()) { - Value numa(kArrayType); - - for (uint32_t i : m_nodeset) { - numa.PushBack(i, allocator); - } - - obj.AddMember(StringRef(kNUMA), numa, allocator); - } - else { - obj.AddMember(StringRef(kNUMA), m_numa, allocator); - } - - return obj; -} - - -bool xmrig::RxConfig::read(const rapidjson::Value &value) -{ - if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); - -# ifdef XMRIG_OS_LINUX - m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); -# endif - - if (m_mode == LightMode) { - m_numa = false; - - return true; - } - - const auto &numa = Json::getValue(value, kNUMA); - if (numa.IsArray()) { - m_nodeset.reserve(numa.Size()); - - for (const auto &node : numa.GetArray()) { - if (node.IsUint()) { - m_nodeset.emplace_back(node.GetUint()); - } - } - } - else if (numa.IsBool()) { - m_numa = numa.GetBool(); - } - - return true; - } - - return false; -} - - -std::vector xmrig::RxConfig::nodeset() const -{ - if (!m_nodeset.empty()) { - return m_nodeset; - } - - return (m_numa && Cpu::info()->nodes() > 1) ? static_cast(Cpu::info())->nodeset() : std::vector(); -} diff --git a/src/crypto/rx/Rx_linux.cpp b/src/crypto/rx/Rx_linux.cpp new file mode 100644 index 000000000..a513d38aa --- /dev/null +++ b/src/crypto/rx/Rx_linux.cpp @@ -0,0 +1,114 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2019 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 tevador + * Copyright 2018-2019 SChernykh + * Copyright 2000 Transmeta Corporation + * Copyright 2004-2008 H. Peter Anvin + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "crypto/rx/Rx.h" +#include "backend/common/Tags.h" +#include "backend/cpu/Cpu.h" +#include "base/io/log/Log.h" +#include "crypto/rx/RxConfig.h" + + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace xmrig { + + +static inline int dir_filter(const struct dirent *dirp) +{ + return isdigit(dirp->d_name[0]) ? 1 : 0; +} + + +static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value) +{ + char msr_file_name[64]{}; + + sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + int fd = open(msr_file_name, O_WRONLY); + if (fd < 0) { + return false; + } + + const bool success = pwrite(fd, &value, sizeof value, reg) == sizeof value; + + close(fd); + + return success; +} + + +static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value) +{ + struct dirent **namelist; + int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0); + int errors = 0; + + while (dir_entries--) { + if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value)) { + ++errors; + } + + free(namelist[dir_entries]); + } + + free(namelist); + + if (errors) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "cannot set MSR 0x%04" PRIx32 " to 0x%04" PRIx64, rx_tag(), reg, value); + } + + return errors == 0; +} + + +} // namespace xmrig + + +void xmrig::Rx::osInit(const RxConfig &config) +{ + if (config.wrmsr() < 0 || Cpu::info()->vendor() != ICpuInfo::VENDOR_INTEL) { + return; + } + + if (system("/sbin/modprobe msr > /dev/null 2>&1") != 0) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "msr kernel module is not available", rx_tag()); + + return; + } + + wrmsr_on_all_cpus(0x1a4, config.wrmsr()); +} From fc5b339f04fcc3f52f6ee420d636aa3b8ae96c88 Mon Sep 17 00:00:00 2001 From: XMRig Date: Wed, 11 Dec 2019 13:15:31 +0700 Subject: [PATCH 30/31] Added new screenshot. --- README.md | 2 +- doc/screenshot_v5_2_0.png | Bin 0 -> 48600 bytes 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 doc/screenshot_v5_2_0.png diff --git a/README.md b/README.md index fdbf0f59a..7ed04212f 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ XMRig High performance, open source, cross platform RandomX, CryptoNight and Arg - **OpenCL** for AMD GPUs. - **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda). - + ## Download * Binary releases: https://github.com/xmrig/xmrig/releases diff --git a/doc/screenshot_v5_2_0.png b/doc/screenshot_v5_2_0.png new file mode 100644 index 0000000000000000000000000000000000000000..630dbf971d92389dccea58b5ecaf76fa0d5bf686 GIT binary patch literal 48600 zcmb@tXH-*Nv^I(j3kF04BvKR*1O%jaumRGgOU+9s1VTcUmRLaq>AgjzccdnipoHFg zFNPLcKuTz#+~9k@bH*LxyW^ZQ#+^Ts?8Rp7wbq<#&SyPy?%B=U1ra19ycTAObk69lNi5{qCC&Udgx#}1gTPRvDydOTr zD@Iphf9bz|wNboB&S&`n)T|)(Fl=1=-MG7S6vWmFU*fj znK4z$VU+$)jPfcPAgM1p@MOU?)JZ!EFTBvMif>{Ami zX)y(Jue1yz!xvawNs=-m;gGI*j0%7`m>ucX$V=_r&^WV|9iHR)rLqh4KJWq+)t|NJ zJ(7C`q3mBKP_bA8?6@~jCLIwY?e10ZTt3qat$nTZxugUb{dpm$)Rmm{tWfur)1n3t z;LVZk^ySqc%vwiu0mu-phF%0+ba?2WOBr;UZi9#bi?!V36u9CjiF)A?*P(Ef^Nco* z51CV@p%$?1bnHdZ#HB+nRI$+GKGTLqk>wEY<>8OCaA{uX{qw_E!- z@T}>-5r3X5wQu3g=yOy*y`N>(aX=cD`>AR%CE0L~Y3bGZr=rNi+l8y8{-;M#`g~N* zF=ZAb3diuQDA4{DjJ4i3kCDT!PhcLBp^7RyKJE9A?2GLO z+ouI^M9|^Hm;sYc*SU)9feXOPbBi{LBW%07+ZdAkMXEQv2Cf5-G*gFRsR!opVmnvg zQPGccX9DE_RIDZk<#Ol#*$Lj3e(ZZUS6BWRhehcKKq51Ozt#*E~Vjg+8Y2=^$8%MWEZMq{x>&t?#N zkRg~rTckPz?=hEcnop<#E{79^P;cD5xn0A(n)ns@MC>tQOrzKw9QvbWA+e!K6L|XiP z=_V#uV|TsR*KS?(W-lc;3>=)Q0ku)*-hIlx2FRYK{l2Ld1};yX^tl5d?lKd5?IV~O z=XU!YSLF)uCjccXs^53|d){|9O^3UmvIE0zD`egGR?1Q|azuBQMO^2~#?MP_@$tFU zR}ANil{bX!x@sT`$UdeN>u<8q16ou0{O;Rkx86kk^zQ3*qpr(zRJ{w=i}ZvhF`zApiv;!&~zt^`Ef>%JP^WF8Fn#*zn!?0?V$1Mv%fQa z-TXhFhy9NEVoqYq6Bmg%E28<;3)bNKb&SS_|cEyddbsy5a8|16zC%7~Jh7z9p;Lk14p$?*dRHud8ob7btlT`E3 z#Wi7?tIRhTzeh|I^}XF-5y9SxTg@;3X9`r@LCTYei0R(au{lR%J|3C`d)`+91^Lvj z92T!ie~N$VGl`Xt(RLZL_uCdopJ*(p_~+C|pg@MD5Ds^Oi!%< zS7I3?3yAMPwmYKzudS4%ask6k2Vy>m@l<|X)|&D%eAhPQJDRzAUdEuus>%r!N@;%u#7dZp4sxv z`HDZUM$B92$h2^|8kQPkf(3#lBOtkf8f)&w-WI%0PmKNsXDTd{A+Q@G`hNUL#jfqh zTVaX(wUn2BuWJrmQpMl7?OJ_dTNe{&mFfckY0vam1k^rwKxE!=7w>YmFE=Q_IC&b* zi<9aglSP%aTAWqUgu4|Zjw2y2KzY&T<1ORMPwg)F@q+l%@s%hKr@YCp2--gFmr{s zY1nauwe}_`4qj7GILSJ61uXqCPD~+`*!JnJfl%%CgSk zl_^7~qH;?Tu5Zk6TfX}DYX9{)2SJB#g&_i$|6czOKHl7K-8Xpt37ll;w$E)7`#Bd9 z)!cEr-pumD0|q8F_jY;}yl3A#wP$KYhaNSFudpY^io3<1SNsr`gRXSvkIptg&)4ZJ zs|49XyzDk+#X&bLUvQ?j!|;CG5TVjdjc&!QZ6L+eTD!kQ#Z1U~WW@czObcm_C;^|u zOPC9RoGL>9RhlBGIZ<3)^of4J`8VP^d|ZK?0OQCe{?-uS(WHM?vJlj}>|!9}^OzT; z3qfOqhA0`e3hJo-v4#4Tc;hHXjm&bOJ&LAd-u`#{+-^XT;Eb~?!pvu$RR>LHgpI_6mVfbp zwR!QlzJMY$!cNGCE7g-RsaTZc^61)7;3ljcHUdd^?Gx_$|4W*%0Y zFMT;qI4TWXeH2-Ik=_#HX0P==>_U`9%|99Q3Ip03%|6)D z&>W9TJG-D9U~a*N-I&I!(QbdQ|GSU<>748SaQib&3|(YFIk{r^9gb|p*goib8cH6- zKLQG)KAfQLq6F61Bwv2|zNT;dp>8Z}*z0wERLmH4+qnuCENF6mfLwq46V=0?>MEWC zw_%(+S|`x6Sl`Q1TmK2%3lboVAC#|J43+(=9!X;FTmO_tZ?i(9pjax;%4tyxRT|yG zj;88P$!*Bf^%!KKx(M!@io(&qobkVc9=V5vjn}O$cJPSc)LwHP?7#l9^^okUwzg^a zJWP?Q+vq>xW38WR%$~#E*Sl2~Nn9G}6l1HX0zK+OWW#)~+cU+8D+Pmy1YGFgtw=qv z8s{qN=i(C9W;Vooxn8@<9x0IJQz)mT6Hp?_KF#NC$&K0gyx=d#fvCI8)kaj#@sot4 zu5_NTmynh#vzlU8-A6YzN*CRz)1vHUoHI&54);Cd_(;u|4bcme%WJ~$Zo89&T#vRV zD)v!gy@r2=-(t9~-2~FU8L&%HOXn8un9A1U`{fABr&oN!C(cS)9skFSV zU+HlWF}F~!1B)kaN@m^D`;vS3-3aHRo&1QVYzW4$?`~kS2wMLWbLltRUl(9} zAI@L*AZTMjU=3!td&i~KZOrbyYne`tkIBU$wGQ)a{A|1rNX(|E6@6nb6#nB1{*cAv z<$hWEBifBg6heEjHpt!MNNKZmdZ7CJ|Cs+Z!CuN2A9 z>`)1iZILf1Ucdp7mg9H>5$$&>O5#u!DFm?1et{$&JISD8L(7uZwC!78v{hUOr`cux z2=&Mxg7rH}IcW`lX}q^OmHQ)8YXO9XVh6wBKB!V@XQMnndw{_ohH;`XEHw zgy&=OCl9BC*A*|~92M;jV~Pi1azy`q5_F}fncGb1ZhOLn+lS~wKVaCJh412GPtWj3 znzF%$@f>LslzS=6@gYQJHP9y-bbPbZCOu1H@y!?Q6&LE-?`%RZ-TTF_P&@~!=tGZJZ+sigz*i>Dw_j&;%~?pqTe$DR?DesC2w-&dA;^K4f1BXK{LbWU)7+4) z*KA9P8je`LjUOT35(4IIDg#pozQ>D_w$pyw5FGN~*od23)hBg3+>c!!S@J8sad7mf7`v-=l1M zfUyB@8$5S!>bX#(T;gWhhQqIT(|*{-5Yoc^W=a9LAC@)KCzUxzIs&dA7w@Tr=M2l_ zatt798sorN>=92D6K@i`%!U*CwPBCBA4lPu!^DAec@jJ}&!qdgu^PwHYSzl&#ie^p z=7vC_!*5&pyD|WWj!egzLr=FM{dFp;4gTkjCFk(dw^&X?9RbvTv?^SuD_-kZGra~s zrH0;2+>D9NJ-Su;JzN428@@Q@DmK12OyDOX&|>CFplc8y8veTeBsqKTDHEG!C%)9L zLabY6NYAsDALR0`iD4{k{K@ZjjF%$OSLzwh3ayz{Vfaj8l#Wur7KZIl&8kps!+{Aq zVXK1S_WAJ6h}Ns<*IE}M;r2vRX%3;%qF86J*RKzx{%UEi5xCl4Prp4=^r(Sy+O}de z$-dS7+3wBB`z-GU5=z!t8b!gggZd=dQ?+kBCm&BPyxC^?@BB6eNIaG@FTT(pLLpeF zc>BYP!}dic!bgZ9KUU6l2EH_eR~j+MR*AMewF_W)EB@j(_M_OBGS~IY$NXQOjnGZJ z7z8{=uzvq1m;8+WgRx)>Ue=X$C%lDp;v0WDEV5=RC2UL@@&&XDS8*sI!HARBHFd{F zaE;5}YyE|-S|4g+E0I1EraVXfHEXOY?o^quN!q2+8k|7=*1{Id5;y09dc0UhLlSPR6+J*^*(p76)DMWh{C2Ug6xmz*7~H*o z2=A5TPS&VtNNfD^EI}Jw*bgJo8}!@>va(Q}Kpu~A#0$s>6`5Z{SA`fFGT*P}1jLIq z1n{1g-k(aY zV_J^)yxU?I4T=vP6J#H3yuzET*Y)`^ZeaomSKl1z_F8%pU;Qu)w}{jzhy={_ zKI~qiT|0cN!**FX!w8irI{a!8?I=kOorppFXUE1ZlJ*a(N2OJKF%ld@RQ^GI0_{SP&1uS%Jj zig0MJGC}mOx+s+i=w#Uh-?#8E$wL~{ zZL;!bg{N~t!rV6c%Zgo?oE((0kU!b+gdleS;}C2@OqF6ZX2zb%e- zq6a&CX8oG%=9m6US*UTaX*?rEHC8g#71S=(b8`OkUB%F&i3zP5=~uMDqNNznoc>R7 zhN>im^^bo|h-#nzZh)X_^*RgQYG~+~ z{?&eERmniFO}f(Qw!l#A#BNnqWqn-nt(h_gePQ>Dh49}-MJYC~wtXI59e53@KHklS zL)tH6rEIhsa)z@q)_4+q^iOS-0GMkPuKb{QpGV7b*&o*j^EE$?aXv}E4Gxerg4KVF ztE0s$-fI9-46``zzs^DXM}}K%AAcGr)~OZU6CO6F^~7QgAVOoA?eKD6%%WnRY>`${ zMu3vcV40|(PhoScgLuk_p^2Z_^p*H>r2&Gs(uv42 zEY>-N-XStDf8#=1PcHI~?WDo7G*PvgcJI7C>)Dj^@r(JlU^rj4Jn>0eO=}f3U4UwB zjvVR2x5bM|LYttgBoY6Ze91V^;|BLS?LnB#kEZSq0_DY?hN`E|TVVb=30LUY)S(g?_i_}c`=(e$2{Q1Zyt&IyD6{Jk z&%CCj&5QgJoZU*iL<)*l%Fh4Y-hx0jhT@GbHH%HOw|4zAHy&?P9*f#BvWgZP?~oJ< z#7J{=dq49U;+L!o?q!me;aHu1@IcHwK5i^!wc9b8l#S?Gv}GIg3Bfn{jd->NX?yk>FyB5LuL zQ(<6wmmkRK?#8SPycb64>MQlhj@oaUH{#M{Y0T5Mpe|de?zgE;^ zNXy&x>x@wOO`glHy}Cj&=wO;1<HINh_Eq=v1}icI|->Az0St`BE#D24`WG#3lpFFGgg#bO8t!QE~%I=7T%@s z1W;1P~gaX@+>c@>|Vf?Fdb1=dj1RD8Pe07Jz{^3*xBMN6395=QST{0f0DG@0l zj497C!sU*GUKb3ceq=kNjHv!At5p*yhE%z_k^4&Ibn4`qcuEwkxiRiDx*(AJ-Exo( zkUd#2-jjDdu6^JkOy!I$`5K5|H>iF=LG1$lD(wBRWR@>Z=r%dJ?8fl@jZu8LcpN(ao3cy=?sjwvL z*Jq|WsOJDIGHxuZc?g0&2+2#W4J@k>u>W+%dQjC~2(k`A|KT)U>iRV(898|Ex<~T& zwkFmCYir(0xj=i3QT*9$|6BRJ$~iC{ZrEq1mNPExnwt<1I&;6(1r_69y_@HzuX&Rt zp|Gv^`xT5LSL0K&8;EED!SR#BpgF>B$ExT0bmlVHzplO=xiO@8_Ta4*7hquS0I*05 zwTDdKwzMm?8>?@c-~P_je!O8(Z77H}FFlV5_9W#39X|`_-NhUkVh6J?lGXiEr#WhQvAj52jGJ8d_+ z6MN>1*oI)7LIOl`THv;1C3vzA796lnVjp~amc*n=OLg=-mmVR#Jm2yD09LEg20{PP z$+MZIPE*E#Nlq5+Z$bMS6oj?>f-PzMj8XgAmpGk&iQ`-J=$8mdJj*F55o=(HbFm~r;}{kx2x7Nt_pdwr8E*^7 zN|%XFNvq4^3fdp#uzY6MF7`t?W9zH_{Kdj@%^##5r__u|jRe>ol+e7n_uyyhyx|zy zviD-b9z`f}bBiAQvrv7OmAbgO(xU%Ydo5h0?LCv7UM<_g_cKslyUF2J-#VdymMS{R z(Ek^*fB7Z7e^afZ+Tp{q|9aDN|I>JV`uBG)YcvwX?+SLs4~*6V_?A6Q-#wCgdm~!i zzhoN4U3Od({b62^V^}a^1DD;xM@*>z>AazZ*6uJaSSY!QL>G6!{AZv_2rvR`0jsCD zg0HAY|0hvcE1cn&mDA*)2^+g;z61lduyX_;+x<=W;e=h29-xEvfxTyXNl=<%6t&HUF z{U%CcvFI(YDMp7O(=6c(Jhe>i-s&SVtI>rvD~VB7BV(T{mR=nb5|i@#lO~8pYR_In zOu>!OFGpT>VFt-7iWA!PwZ3zS5*c+#sQK~iUHR<^@es&M&NU`teQDUk(6Wh1d&cc& zy0;0?PCL42nHB@7=LLAQ8|d3&&eSyGX!0RcX&TB1T(2BDJ$QB!b9+Sy`wgY`ERn{U zXVDEcIBMavc;17>-+0&U;ru&_asHugraPGQOzbujpN#vVp#IOsb4FvtOK&^#T+lNd zi_bT6a0838q}`3{H8%&6YrY65eZ|LIqfc(VTfZXv-8Ds0)Qd}BjYm7+K2T^vyO$x+ zaj1j2GN2e|dt4eg5%~QVZOfOku@r;X`~&&z%@fJ8P4I3<>7i$3;haCJ7TR5;LV1eD z!aEjb2giyzswj^98kBjvrV2GZ05qt5ky-`8GCZ9xh86cTFx3S zJ0l|5a@)lE`oNAaVr7^qc(?gOjenRu0B#GmX6?VLkb|`cn&UmY;wdL`kJ=Jl?R@q- z0_X`{4*du(1KfQEw!5Ic`^R$pZl)qCFj{}37`kqQC`yhf36a_HkxCCb@X+wdjk1A#E{ZNUXiTZ$y~?{D)Yybs#r3w*xCE7XwGyT*B=zGH6sVoO@FPeZ(nZxH$5;7s?&Pk3CWc( zcTkL9O*Y>((tn_9#jUKE9mm9`z0fvOv`AMpsm92HSI`__82ko)1zr(O6?H7-VCHy< zI57wC+P#f|heOcu)fu+2wIH$i8z` zIpt6TIIJnCjqBS9Grg>+)_9_Y>q4GD1Am<&rZk*e;#)5$qPB`a8Lr7QZe3Ew&Q_=%;jjDZ1kt;L2)-tWLh~@dbOZF z)bKOBN$72j;qea_)Cg=|XJ~6BEfRGYAr92`)ki}3w*3wSl;2nET^Xqw&2W^+d8yK! z*qYk)JJ;(`xnUk_?3RYa^Jo{znG+hDpk(H#FeB*e#BgG93>+D+bjr|NN~%}VzOzZz zxRk5Y-WG~8IW{=WI?eb@t~oNQV_Z3~r#=8em#o_Z$C=>Nz|QTt za#}x+(Oq|-(2pLrXuxJngh2#b6@5cARoRR9}SeBrH!^9Do1aX9wSsRx+cs7-40h~;A#mjm zK76B*jjaBjepO?EJjW}l?OPWzMNgT^3U4}obeE>({s7Vl$4EX4{k06739;*hw3Z*r zwYb*mCpM?LKPHpshb0^x^&N$Ug()=&f_!Zmn>_)hNu!)*ujwVoPRXh|dp|Ea!D}9% zNKa0cOKR-9F0(Ln`?pncu$Ay7q%5R<+_@vBTyKAPY#)@2IIS9&VD-|vFu_q}Qqj=l z$|pxlAZ_m<@o1J*3r7@DC+l>3mNt;cU~>F(mtubuuCt-)lhwTx&k7@4u+G>_C4#f^ z%2_+M!syR8s=b4`g7j5X`1DL1;_2Vo04h`bDpB~%D^nJa@h=)Y`}uzr*2ibnPqQMU zR|>_O3yxB*4%B2GcE{J}yvntLO{c!Ga0{D`H-^)K7Sv@9s*3r(c4#j7; z<`+1h2&!36fnyea$%ub3*7sq`QfQe(rCx2g~97+nq&x+>S!V?oMm4QO-hqLNL`S8H#XYw8r zzDq1CAtDl$n=b8HG0*1&Jl>DMcFHP;x=6&c<@kNyS61Vk)F5VX_IhTmR)I2Z$Wicq zkLGZ%^nlz<_qG#y^{A1=0wDP3j%04s@=S0uXV-bmO;@iHVc$87s$!45&bqmw)J^y{ z5o`1J?@^M`&F7fnmQiRD^rc%`+yv^Rtx`*I zbR%m9{};mN$(p_`b#H5qWI(#`9{8fYLB6kEYK$JAWCI0DfowceQ6$rtf2&3 z3xL#p2;-z{ePEzI6CcEEoCgsJwGW8bF$5J!qG^YBc>v(t&THlIw^iy{B~mWzSsSJ% z1&FGv>Acc3%U{>8OXW5(aCcRlOjAkpY`a>3l5j2 zfc?NzrL(j7ACHx%_9f;)R#QE?iE8&YJ{md(QF2aUzPjl%F7;l(q6Y)m(!glz*5y2C z)a=7SmE7k3`^h?Vvw4+$OBk%O@(7n*@~9GAAlE-YVgz(SDlS7Pbu{hbuwP1*3*_S|E&=V=#L@6 z$)y*Ht@I9!3lDbxUC?5+3gHTF^3x|-li6aMnSn$Ko3-R|%ns}}TyOln26Ib&yOZdZ z`w}t$n|vxr6avI11VZ*Q)wL?63wbh!M$L33 zfI+#vWizE%gM*=Jf<|n>s~~Iz>TK=ztJj*DuErR#n$5F+xHgu@Zzatzrusf)^x9Y; z_C}xSTS4cblZZp{Se`q~HCbE%T9em}@y!8X&{MSNYX`%WD(y`yNW`^|qllu7q%vm~9K(gv{^B7z^ zm}^*jxGP;F-;;zPcqR=NvPJzlGc?Awt|lkx8IIUWjkVps^tMj6kS%t{GXmTnWWm?8 zTBr_CJu$ecefsq10%ws7p-yP*dBxyp27=dFdHHGAV(o~mGcVy-KI|>@PEZo!jCA^) zxYJZXl+oksJZ=F?n>58mhtGI^YI*$sVMam0SLM4EPmNK`!c6#I5C1i%wu zTJOuy%G3&ov?*_Tcp=sxw~Z-#D5D(cCy=Ky_lVJV*K9bRzuuIBD@y5Bd#nDPIq0^? zz;Dj!eHELqeLEVkGd(bX`%zoKDj5F%v;F*G+$X7>As&kS8)GS7|F0i!kbxZ&J)qAh zS7u``vQ<0c_~?nc=K_b)kg*b&*su%Lu%w|Z990o57x8a)xOjNUkX>!m$b?DAWX zY*F0{IzkiUn3oUnz5#~j7S+@yl2ALFh=1~DKHK?lTz_WQ^9|4bOQrT?>CMQ%mT!Fr za^y47YnvYuM(1E8lb>3)yUId)`oJ%_%_KT51YucZKN5$TU?D(mEt~Or`iT7c<+8i* zU<{?x>2G167oYl!?+_6;WLYd10B@ZaljQVi(y1;o-pd!jyk)Bm-#ms7;j}z_Yf4j~ zNgFzN_7ozoHxjx|*eyKbnFPtwPSNZi0)_gRj1tX-4#jkZiWZ5g51E##*6D>qG!q9! zVrMzQK|~!JIk{t3Fd^f3%i=}&)03BWa$T{^P`HiZVIhb|KN=L=EfV+sgqK^l4WI#lgTy7#d}CTj%Fzd5xWp~N%LVz*kp_7 zF~;=cfajq*H01bpRrF3!{Z#_FZ3>l4UY5GKUUJH3b*Ul%2<(*qnzYsNI|B4b0c!pt%~N}Cv#d%}wPx{9|` zc#>%*K!jzxs@f+wK)yJx;C)3)Z4r&n?GP&yDTYDWJ>i-KA`qH3AaK-;H`dBv}pW}6Bd zfYG?5(sEFcxVg#)5n#8UWiUg$fbb+q#kFx`E`D@yhen!N+VB)Hpf;&@E&aus3&w2%kRA*X zOe6ujxwX$~ZGL^hh%LK(76dk20BS$r0uf!el5!6eD>TTyC_qV*8y!+jXJ~fEiCw`SC9nzQCw?f4RwFZ1xN2qiGq@-d z?>Bq4EqD8qH;VCYbDhpm?-chH{t}vMYK&W{pxqz2DO%>xG;xfNjhR$Yx`I#}otZ?+ zZCeN+@B7z8({-Es#me6=_6)!h5$iK32>C$$YCr|z{M74@f#fh?_fn><-LK+*&X1%C zOT<2+bIjt9pw3c|GyidVdF>tD#)#F`7FX|jrIqtMdu+Q3!^2nkxhrNgaCI+KS5V%m zbS)6+(}ywBT96=piKMU_;*-1A;}u)fY*!-S>x(~WTY|q4^M77xsXQH$FvnNPEiC3& z4|p2mM4(-WTC&5b5wAppgcBNSBv{V(wbVWj8_wvVyQat+up3D_! zzZWLmagr{KcW%3%@_M7)HJ8MfU$=%FioZQ`JLWsG1jj z0e0tbityb@)_+kZg$2iF1!%U1w>_*V4J0>H3hp({%3Xo9}$>yO2i7MHDQ2#TlOAsqRmc>t=-`VY)3QspX4acX^D zY+2nV9ZxYl@iDY4T&(%{YTX0dUNrIrLivfPW$oAAOP-SD{&Gzx;^nx# zdQ56RVoHx_xI|a1?kXdgGw&?add+zK`bwK8(P(U7M9QVVr%;Bj9kPa|?41JeXdI8` z>DXPMO7pQ!yY%YdQH@Z2su2T&$%T7iMH-%vTN%|!%vA+XJ{Nv`XqoW*@WO&E0E=IP zF^o0-k^o4SzNnxxE`C)GRi90FHXr!<%=qHz(lfu$;z1_QN(3)mdGC!b&X`>uB51Y0 z=o1?!RwJ0kH}71#{00JA<(}BD9raQtXHx1**{V8Ew&qx?bjn{?Y2g~d=zm01G3LDz zoC8?FPl5DsH~t5cCo%x3@lnp=;2Ql)0#|D`;^b_ZWOIg}0(h0J?9=TwcY!iA9O&@- zdujw|`SWT%&zGgg_;sAKQX<*iPYGO1c?XTiYy6z)Aj?adYI^SIfG~>-(0ddy+H(0( zH`SYsqwCNsgV!EgTYU)ZxGD{ifTSqzm6V__!=bCOtuX5+&xLTWmhBl6KpI0`yzv)2A4kHExr2$~+0q$mJU&SE4pBzBzx?hhX?e;xW zbj3!HVzBBjNeJa%X5gsqBT{R)&VUyUiOkJYIDO!s?|*uy(#%mw@uH`Lv2Wc5NlXDd zd0ax0mfWi+Sf6(n^qQC0IN&rFTp6?idYnu!lv z#OQ3q&h>h?<*wf>-gy@B9uv{Apl^MY79~8izP-QE$j=eBP(>^HJdp5=W8P<{q5h~tF(hh7LGeDHkuIjVXg;=*B-3=hdrUlDZi1E04$76gm9onZeopJ z+?H1e`AqUYM3w8yu4|r7w8%(7Yg+1#u4irCy>X9GW&_6`P6%|35lBqgYdSHm(w{8e z_0J7Xvzd_5z~zcL5~y!L&}j-II&BpdLfqbQWTPVOWY3({7{oK zwToKJ9@O~4I^}Kc>ETG_Z-q{A;ZaCL51qnPbO>9NDE*AQ?ba zz)%`(?aH);G-}L2LuxquExRlPbUkUJ-!85(^OR?l3~Ol({{D~(9FDQNvKzikU1PC( z)Hydw!D+c$qvSQjF?EVHm8m?2+Z zM8~cfGN)RXYdhX5*VeZzu>g3#(vRC+jR@@c^hgEg(<1<%bzAsY=09IVBCCOsMGMv+n#il*&ux6E{pet9&9Ump}%b{7Y8=e8tbH$TlRZYe-_|qOHQm+ zY`HfWK5jmcTihuZ2c5>c-Z!Sa994l*CBkw=G^U(u!@v5qhZB2eMy!A;%Dl6WLu$*`Ti1TO)qsIw$*Xd$^+2&(yz{1cR29CTOKjI^hr5 z<0k{sFkyM!XKSV)U@li?)t9E|G^_6 z`afILKMu&kkx3IWTDXP1RkWnb-bY2-Xqs%22QET>&O>zZcipdgWFzPLEdap9?wH0XZ;5Rh34DV6VFYZlP__O1wO~sam;nSulmH`Zm{iB_$JpDk6j9sWxttKn%P0 z%~3eR3tN7?^f`yM=X+p?Z#O##dUPzVdyvqR;+ZLs;*t0!lcAs}-t6x0((%8vb?Hz+qcdN;*5jL5gvYvm^Gl+XMMn=0gn(gK zUH6GEO{U7sp(o2;p>5B)4vrAOqCYoUca=&M_EBrH31UWH8%POZI#xAgFXLf4#zVcD z*?$_1t`xN#5ZQZSQ}h610=+{EsJ?5*qR`;M4l+B{@n~!gU5sxyH-;1{(UPwPVdnaM z(~QCBPS}v_@vzxCf%?qh?1J6WQ$rAYN=x%zwxL)o zc_xEz?AU29bJ3e`AWZ%x)TTWMQa|7BgNod|ijgs!z|0Y>A=~YcM|3=5sKZo=l+FAT zd-2H`lZ2V}V2{Ry!%bk=w7@e^-mcBAAh2j&LqQEEvmnD1Yw{}|I2kf_p!Y3~=#&;? za5}$e-QGPQ(}2IAS zVeEED^V%NmVmO~Vy5U50!fsr6U+-Y^p*)0!*WO;yT=c}=sD?P7fR*vUV7$PmAw=62 zlw8;(9KXKZwEBtTJ4Mlc=1D#7i$)mzdi(pm>+X+6PMhIE`(H70C++g@g>_p)V3i{i z;h@A9-1Xn@vItwgTzV&a#}u1tjRcP4$upDM;Cg88F8@xDab{HtlYyHgv+YL#hn*rP8jecpJRSTp^A_M|`;aKp|_i4O;=%zj{$ z<#`t6ZRfGuH~M{kXXl96^C=V;^>jwJjwto^z=oYkjpcu^+3b;AiO;8khEM*f$(UYz zGzt*cHTCNQIQNDrDshBa4D_Nh@v|2EMQqD~B2}kvqrpox0R){BU)z?T3F!LK{OaCB zk;VHquWBPD7{L&8+INyQAy~oDb#8B>4l1Y>{|yQbwF_ zrYv9+d~SXF$g>et9lpVgG#~GFN4T6r&a}$eK*pE%(zu9oovA%XaMu9p3lUOWh~<44 zpii3=) z?EI4~nrjobSZPfj812?RH5-=!*T35-Gz6|YrjcJ6Ur!H>ow`py1lGQplrDvm%fS*H zAn1XuOhtz63Jcsa&8)^{uL5LQ{C^TDinblZsg!<%UAtD_t~|K1WpE*Ov72y#fbrMn zp?d-n6-4xO^Ajrl1}nFmfkKLz9dob-n>?RJ^lrE_Wug4 z?i8h5_i56tdFg!#qa*5-wROXAGcSGkgOA=&aFapIDBRliOcAX!cl=)y0IQ3P( zM{*&EO)KhKX6l^&V@eRlGUnzF;|_ijQ)7T|F|4|o zzv>w=?`b6JtycAKE+z|60&Krr=k_-hbL&(4Y%(82r`Aeg*Vp5|_9}1K(V66Pdm-!eK%W%k0->}M3<*u9k3`ES>6_Y8U>Xvd>9Zy>E7s99*%Y)G2cni_^QG;*S3K3XXFa2ai)fkB?R1 z#&M1;U9t*~R6_GG4Wfo^g#B|dH~mqh4NsI2lj|y7T7E%=FEQ=ZPOMkop;6PJCw!r9 z+V+_kO9jOP3&)?$UepY1iA(L1#W4DHYny{>YSbEFer5f|-q{_Ja4_a-?R`+|Ed1rmkQ(W&f-sXrJ*cv8wJjp(96IjP~=m#Kp}k$|982b5JHn z%@F;L*?gnH_4dQp#}!A`S{CLpPO$!_(snI+xh|)!_fBVu)XI8k^d^xtJ-X^Xr}Nwu zstz(%HO(eZ<=nFNE4tR}Y<6|5ew^Ian4c!(7fkG4NMBWse(!*!F<3vlKeus5e!-e> z5k|&O$_|q8iMmWv!m6BgBci(Zi%8~78%okTXHB>OSW}q)tFWpQrLy+q!e z56H0&tz%j_OD)&cJkX$)*g^@E!TTFAE@vf9ur{{?R+~oTy4@(?VBIvorkAjkJ(m_0BL*Fo28g2i#>^4%;{j;$!o2=2LcmxqGTAQWyDh-)sS8q zPI1}6VfGjPOd59m7ugK#xMJCbh9*`%FzN&= zzmO~soB%D}C>jBjPZa0>cjiX{=dFc=5b|{%sQ%(rC~8tgr0Fxg2wVN9jOq%RT#9p$ zY3qH*Itkf4iZIJ}FB}7OZu*jqgeQ`>#K5{uj(LJzgKY4w-LjJphN1N->?Q##K$y{UyY^unzQ|p*EE)ce&zey|AJ1)9^1QVWbD+` zv>Z|jSe|xG9s+O}U(TR~m=#u}IRKyfMzccA5ou!7sNs*R3Qhvz!W+RlokmnUbDl@| z+Vs?JrpxXDnjt5UJj2$He6;O06D;V8649?$?4F_8fDKTPdo)41knIPygyxOaj9q6XjQ zSudSV^Qk&-HadT|nB32GR*e?_^$XM z@3jmX8oSN6XH|%45x-1`aKDt*hhNN`)@B_)xU{zuAG=NUWbF1w#@74^_=Mz>9_V%Y zy0@3!xnIKWBuD>vPex@;3f*NGu%Bg7fMb-4_K72vn2qPVT_TJThhg<8tf5NShpJfWn&+fT-H23$tS=fJ3 zRPSN%je|V-HF(7N?He)Xi7(chJf^a z74hH>d{^Ggql)OB`2nzJT(Z%b8t-t#<5i#enbeu)(po2fc#)dnU#KA3 z@;|Adk;aqqFqu*wb)u&V!d8(n7t@4Il5~gfwDI92^Nw^#>K4Tq^9Zvhe@Yp!j)U}r zVm*gvXJ=fCr@Yiy`KAz70nLy3HegKyUOGs@TtUkyy-e~g9RY@7t^T#Txa(fd`D`(c zPYXx^f!61zb^O$398g)x>_&NHiXKQbl?nCl>G%rA0e$?5$GK8X(A|({)N9GyLFx;d z@=b|)%B2`bI*#@i4~srmh+nZ)Md;Ter!X7hV)g4F8#RqCOY?5`lw;@n*sm%FPa%Q~ ziUU(YYxnP~ZibxVT5*cts&J{P`ONK|0pXu^)^O8SH!np^J%>=dd2%oh{+VqT`oN0X zyKF;yw57J@NZdk1ZVjlGP=}xv^z@RP9>SmV$lu0JyOq((n<9!*Fb}=`bHk`cj;F`3 z!kr`kV{dAF$-E`Zl6b3GVVQrcClX`CG`Iq%J&Tybxl^Itsg<40`g)iI9AC<9Wn?(- zdsRDyhFRn2{J)MwMl?qcj#(XNIYE51;z!5)nFJcn&5Xr?zw5IL{IuA$*L0HC_UI&f zzeMn;-kv0s!o7&t69~7Xq+@{f%#SEJq{(iRk>;Ab2~*wms0&qCPARyqwaqr?@T?6O ze3>gNj>q2AmqRmM@%h9J9Gw`=&c-F+T%Qwc<}?rQ56WW+S46mzeDx4Qm(*RsM>7s3 zwDN|CJI#qc@<9}IFfxl;^wIZ;2&qu$F@6g3nWXTBA;71VMqM(fkw|)Fzy|*gtl$4Y z5N)AoDhZ?Ol!vN4K%ccVh9%&b$KecV@=GYeF=#ZEkR;K_VPQt=EX3=OZYQO#WO;B^ zdkD}rnSZk*&XYt@T|S$VZZCPPzx0_6a$>Mqh?%_NG7Vh0=2|`wANF9sQK}jt<{)Vm z&ID$S0fh-K$J)d4T87kTVm4TW#YMbJ9_@2aw1~fxLYS=|%@aXkVYs!fzaD={YRKNO z$%gqOGO&S;9w&~O@3*e?g%BnWPiq|ufM;JtIU6nTEu$v_w$pKxy)GI@0DOum9G==~ z^D60;yVtbyMm){tLxfl=)yeTIFG-;@eb8be#Rk-C^<*midczhTx0mlo z3*L^tDmTPG3Q#2m1RD9ePrH6i%aotLaR7SlPt1P5q|_OoN9ekf@uc z42$Ywbicd;iwI0XH`YBu1jmq55>Pg)xds|vgl>7$REk=CiR`C+`GURSFGD)(f!e2S z{{;?pM_=$%YPws*H;|^X5K2Gfzv|@Wnd4a@4@TNqiGN8DBja~Z&BVOlga1m#<5w- z1IzQ>WaOee$ku*HaY7vKnhBk~pFTzRas1Hq^oVSO(9Qax&Ge2=mXvV7Onyn$rb&n$)i77BiFe?n_B`*@d zmgcW3ii!>{x>jWyUp84P5||3n?!2}sTx7blENCIuSyBJJr&ixORiYag<#30s=BFn? ztYi$fqsussh#MffYa>=~RiM7@64bhQWn4u)jg;4D1@u2&KPJq^4NtMx;{|YI;_9>( zkecqh%~7YS1ww&cTbv`mVySdz1r11WW)oz?J;K8qZ>#Q60nOoVfg7b-+~sQ^{@q3Y z_Cur>%R9Rv0GhNxUx1hz@MU}LD=QJONRmh(v`n6@d3`31kre|&AZHoVtfUW4J`Y2s z4pr8y@<)%?X|GDOb%}vhZ*maqH3`AINwNKLynP+yrdHjaH@%&sWC}HpTE^`>l`{7l zEkt1yU9Y?lSHUNG>Iv#TPm!TAQ98>B6wrZ#c>*`yyzbTU_LlpZO6K0|cyP18lGJM7 zGQ!6KL1C>TYBT_QCC9MZ7Dx^4MK;{cuH%t4NS}0>=Xi*xu-FS zYD6+G9r~|O2J9L>Qjq(wBCu3xe_gtl<{sM3;sL)-1tu&>{y|dmT2ltXPK$N3oT-tV zM&)YQ`2@-zBGwc=epc9Fl@x}GuhaQatKbU0H=fjZHL^&?9B5Y*_P|(HG7&viP+r>S zwF;||fL-kwE7k&RQf^IawMfNX;T*1P1u490C0XJRevAt+Y~J0P2*7y>o>CScPLQEP zCao!4`eF`lnYjZvmN{Ofg?`*w+7yNw{zRVLG>QwYsG$jpo{tAjGS2JIpiP?Dz+XlnEEJ01TuhdzkMOmL0!fj$mu^Xq0LN*#33ai!n5e~C;}+S5S3h@lNMz$d zP}mC*w+p*oby=-VmX*GZ?eYc$0q^)Q=KgDNTb7j;hOvehGp~V;6>q{vRB?-LGQpHG zg%}MAKAwo>m;#U3IWxiw#dDd8zcEVbqr-pDEH_vF7}(7{EogUwS)I{NQn1ZdAa2IE zPBi6B>`Qp}<+wqZw07XQov5Om8aZN?v>0WYC3Hs&a8*_v{%oaHW*kd@22^*oWL2=&p>Vop&{oGC>1btW3wvak_99)F93xyc<9 z$G~1kL88Up@_(jTnu>S?Oy2KcE`1o44i^}DD=mh307t>{->ijAg+s?&(jPY`0hc#R ze(UT8qIKzEbA5hD=C!*uh%}!4=Qi-T4jhdL(p#1~O+DaXvF4Fyl9yyczse7x5Oj0?nZhi>R*tlX z1rhShL2*hugjv`QohaI^Iq!${N;A(N3>6NNT#M$t1u*w`ZBoAa`g7HLQ6H{1HH4mT zK6b|28O?E;oaz{kJpLWRi|2=$<*s!OfY$b=)M4V!q+8eHqQ*tn!x)AHMqL7)@flm9 zt&$2R(sX3(Reg?nja`TeDe|BBtG&C?cttZ~=$0$Ma{T&7C#qi`udOwqn|oGQ9n)}$ zTK$*DtWjRe(Myy@>y`d}Nt>5`_^M!`14mmDrbjnqOs#r_p}escPD&H>yXJMFV_kqi zk`tG_&THjqwy(?7&+V9!DRlaxXVaKwXsZhTkgZ^)d3PWgoGDa)`TWTTE>~JKKO1~# zwq@DuxG(J26XzRSrpWoQCuQe2=sPP|78%|#9qpCe@Ajz*wcxM$b$4zffxtOn^gEg8 zRPJxiTpY5QO4}dQBH*ap6wn(dKWOeq}dI+E25~E`iF*v2RitOG^2SI8Ij^%EX14o{e z>HiVb=YqYEqviZ<6j{Ljz2aHn_LSAh;HxMLDOcf}f~*ae%T`;xKjzpEVq~84dpzLC z2)lyk8wpSn3QA(1YYn4WswW0@gV~xj7OUP~*pD$tpb(4L{>qb8uh#^BbkE=IC5M{w z#9ni6c>1Lk{PB-aM#x+o%J7gb!$4nIC^(k!-Y5R^nVJaB={DYkNUe(red-SsHnG#g zY3HD5VVy3P0hf+_NL5D0DYOXcOdh zC0k*Wl_!JV3bj#f4uaR`u>{v0nVM;zj~MaMitJ5XpV{GWDRjU zzBp}phUDI|Cpwlx^vBzBAGX;$1%1d$)tSxnH+13%@})aIu7{mOH!f^o)!7*S)`ed^BYu@9PiF?{EaEt;s`B9h*gh# zlbr1KTSGaRNmCOJ<&cgtKoRyqGl$>pVP;U+Zx9hw@=p*UA_cSWEs>*q?|O=t{=&&MPmD%<=&j|&8F;4 zzkI1p%g-0cZs^dt4yK0<%8BdxT#+9CH}s%?R5q7Np0&vVE4q+r|KwgA89n6u-_XM| z{_AYY>i{!uG+|Q?IIHe3b_C2~NSbjSr;&JM{SB4;gK2geIO~So^6X1d%OquospBot zP0!8XC6%cSi=5ucj*>YYpm1UM^k|ajlmKfwK^_PC>8q8&_T5S5Xho4LQ3Y{2NaDyM z%+}&&E?5o{lkw&!w0heWHKJfSHW<+Jvf@Oa#OLK=xiaevSkZv5o6?BWw~CwUW(RHd zFkB_!MM(x!x}&%O4qxBzJhhv-z3B9l1~?mnodB%m3-r#+7Gl~w2J7o2DFRCMB+KXy zAn=S4QPZH-7^jg3gRDDW`}Sj6XPW0`_c@c)8Ou7RyevdR_XkI*Lx_oj3I>Q}!c^p+ zR0p}IA>>0G_tQhXLcG}OUZS=>@e=v%sqC?H$&znF*b!!9A}uj^Ktxy{Gpb!yxDyUO zr2}5gaFB8e_rC#*nfcFnfrDo~*cb7J;1v;AzGAk)K`oJb2iPasA1GMt8QoB}G;02X zW@6RXbqJc#8p1jXDEN?+*bbZ^CLZ)OI?aB%{5`>5@;}I0<`R9V$!(cG8s`y2Ps6q$ ze0s|6^y!Lg+HSOH^KIZV+c(U|DrDL4<%8A{F|=xno(p4*mvB)(IYiJi8Z|J5VdOvj z6h>8-&Z3=@uaodqg|*0$x?R{aiTNsXzZ2{#bX|(6{IqRu2rzBs)tJ||ap;wAwu!$X z!?M?^YvI^}&Uio0dge-Y5!k$F^`s!q#t7DxVw$}4UN^j?dWLup83Py#+HN}SFq;Ia zmuVZ}ZO=+tWF=#xH?>RN@*cFR>Q)oSHq-|;bqX>h#5!#GUdp4Nd@;uUv%tV&qeiB>es5#b@e@`8#aOrIf3~~(UVIHcD|3=5~-16 z6<=u$vN3tA8$skqt$sU^=j>bguHqZtAw{H0YC#KMi(>Nv`V+b>iBMf|A12c1xL z619NY$US^xeS5jV!(=zNTgO}p_Km}R+8N|OZ}wAQzyK@GNCJ*N@#%4M_Ep$Wfqe*j zfEt4~cTZAU@cQUbr%bMeOqshW1lh93oqYaa)8vyq^kxzX)m_(qE)#I}6~%*};2$#& zJ>4qaO&A|d{(FFK8O!cNq*)mNti-pzjG>t=d%g=qt7K0Gf(m`!{aIU&lD;4WyZ zizg8 zd}JGILvD&lG=1{Qc76Zs8DDyB3FjY*jD%EkJGCx2M^k`pSv@IBosC?dZz_IththdE z%(ONB2sL_sy`29Wop2!mc)$gLl-QQ*N((*Ei*-RINyM|+R3SF_4O*BlV((|niDMCS zfMxP@6T#DmE|v`{yKQFj(|?TqH5+1YuOjVG#fa4**g09^T84umy$ZjCpPi$JLVNn9 zt){k&ziW+l%vLJoL7lrAq0uhQ^c82&A&jkAl;7wElX zt2Nt=(xk}h65)qUZ=Y?uFW*X2f6#B|_KeuO)kj7Ux3}ERn54g;Hkj8qh7N?IyV9vA zWz03{M>#s(3Ie8ewpXg(-5SioG`LoUHun26LNL7JcG<6WLTp(XFGB%LrS>4}pWa-mBaZv@i48=R_`ZD|l4oZ;HMHF4+2My1u zTW--Xw7hdk`Mjz+*H^maZf>$1c-eKUsGdx5%;jHy&M5m010$N0iK1*I zk-^yj!8)FpPm<2V2ropDeJ2DpFpRf#k2yiG&N&?Cv8Ly*{mM5ZT*F+i{6z70(T>~QW*ANLvO)tdDCaDx4}7uX23UVPwJN}WRP~g zE+YWMPN$Y<8H^2zNBX)=?`^z)j1BDjA2@}L!79|uf|Z5Bhk%y;J)P1y=l)(N7(a=( zWK_}?T2iZrD2~^{6uV}wxla2NvpmD2D)bO82lm}`Tc5&o1wH@N$TAdw>dQKK=B@MF z95P@E19)%&W{2o$`t`2+#+NzMdhZkb25G-uu*kH{o`0~On9Nh_9plP~h6EhJnFO+s z<|pQ{CiA^aTH1)Kr*|~JCo$l!4z|?Y9RrM3-jEgcMKHF`MEwrw-7!KzVz;)YzzV*# zZ>yhaUIW4WBf@Yg0a||LO?epekE1aLcz*J2G;bFzml2K`aXi}j_2Ow?-$7PX81W9% zKoj+syvnm>>W~2x=RQ2$8=fGFe)G1QfAR4_^W+5UPCs|T(?&}g$hVnAU8rGeKx|)8 z&j;3W-LR@d)ZpCxl3tK>teTot1BkF@UeVL&P6iZD^20>@c#Mp;_)@=Vx9L^{&7-B zMrLCH_DfPdE?tM}N|~Fw{0UGbnQS5QOZp{w#pz(W3K3WsM%NqV_y5Q?K=x}CQOlmu z(q@gnWDO)rWET7SC+7U&#f1E0v8&VhM{DStxfWh(9~lg0OEKMEdoE)3ms|7zh6v93 z<2U~xl9x` zxD&va2;sk`FRz|dD+?QsyreT%RE7G;n=Ua&kB2w7NohOvq!cscAFsNeBJ*RdBG->> zX8xa92Hyl$0m}f(}okEVh>6|HKWa-m3zY&$HT6sE6K9vVX(hL0>Pt{jR<3GBr6L z)NM>i(aYp;OgM658-KbOpbDao0+9E;x$sv+&p9UYQ}`#z@Q)u){>txog7_;d?%WfE zlwRRvI%5s_Mc-@UBKd*V&wF}bCP+(hmj%Mnt_`x$LQk2dV|V(Xh~L$Cz!_~VsF&=l zi9Be$^-+fAnc&E&9t((}N+Mwqo^azG6wrb?-hWugX|(^FboeEDAx#wDr{7U&<{j_T zIZDw@It7cWl7(bb8{R3Z1ilyYsLt%o zh4sD`wPT;-X`!Gx3W32&RnD2(C>MHZc_lg4;l04;J}urvyJ&BX5;Z-%o6|C=$B_gBzKG&koV6c*a0kTJ?AJuvR0HKBZc(1|IsY%B zgx?jpJ@5a(=dZQv8&xJ%yyny>y_01w=U&chu_hVM9o^d~3-Emfr<^q+Z#1E3k}~(K zSTy@AIKe$QDGbD>1w}X{nB1Gzp=MYi_1^)*z}se6P{a$VRRX-ZX3z7k>eobnCNm;J z*h^1=AK@+ocHS=+DJ*u*YhJxH7gV2hB6-nhtZ}VnY~>L*d9O0a`Prb`PGVXpv+xXw zyZELgKg~LHGi-h77{X!XN3NxlFPxJ%KotGMKJq^ z-U*qKu9iwEIue^ccER84-}rI~dmjk2M2{TymZ10`w>JFtX*`6w2L}^71=xRER;JJeH!+udl zBEPQ(rhaRO9gN@GUo_yl*;DaQr8AO&so~pTSn~hGUMGPED2%d~p-WF0onQa~uU$># zK=|S{dRiygoS$k)SHHjRc>(hgfJSF?qNe+zX!HT+J`*xhXi&d{K4N7DB}GJ?1UBFM zW{_2(#ly?!CY!T#+pcz$U088?dtAC)&Dlv=edz{>Zi1z z`{8f-)|AJ2_Hg(lVL1=`7QDR`zmItUKi@ei)3b)juxH zx@%SM$l`_imJJMa?X~ z{#SdW7(f@M&+6U79ZOfWfuzSh~dw&18$Kk(Qset7}Ogq=Q?iHz;` zoh!xUu;^+v{_@B%fvd~%5~6|dvg*BoKMYO%N=lHNo6wSBbI(S6&Ehwc)W)FkI}PB| zzIuyac3v#?r?8VUKjpP0v->WxDM!)CHCT{&6p!_c{jmKV0;@|8icoGaO{3xd=LUBe z@2X3mYoGO)dk*ouz$xlJ#fdIcp#DXTFKI0bK4`iLaafe>Vj9F|cz8u>LB7=@A15LG z=XVM-lL+Q&wVp@(d1`dWXA*Y~49LMR7!MgRy`ykdAXo5{vM|0SzfEb3r%ozCzT!#s zUj9ZL94i5e@0WH&vC+r(o3BA^C0)aHdo(;>=w+u1&&;Ec`w48}_xfHN%t*|3ckn<2 zyqZ^#cIBFK>Q5j#`rPn7s|u64#R}9lnJYKQi_|k}$VyB{Liw~P%PUqO`&So!tv}eD zXtZ$c|DtfO{Q>JfDJW7X4HKaimZ|ENZ>xVs{OZd1!EW5>zsekWMTC;Wll-Y3RL>>1 zE@M7X7L%+=1XxRL-9ETI7ZSq zz8T)cG76YIC07%cGUR{gcaULnIWZNlF1bCMG^gXF{n~%(tXPnC(0Y+LQk>RHavM%u z52_>T5E>I5lmLIHKEl_?ZAMRR&+_4sZyLOD3JiZ0Y)_&agDWbcwa`-?i)u{4| zi215EgiyWxu5Bg`A+{K6%FHbX$Qs#uA+r{9glTuZ^mKF$$7)p+-(D-dVpvFFBOh)u z9v;DI|G8)j8<~I#6}ciTdS~+|hjr<-aFZ$I3Lq-5J~J+LXZ>?Ymkl?1&CGhmp)Q0`&a=cBGVNUkd;Q;&E4EsRg z^{qW*JzZn@=`)VgH#kJiaOT2?_`#d1udViYuFxFhFpv&?pDn>bOGUZDk!Tk=G-P`C zP4+~y>Uo9@he_o{^M!Jl7vnTaPw@ma;l9}Wq3-1Ez-d#Eh0EoK%UD;-&+(sE&*d-6 ztsi|L)xOmZ@jul_S^?Ij<2m1E<*TjVy*w@NZ`L1ZXrIH3*L;s%FmB7aWS$pncGnFl zXe05;VjRW1_KcaH;zM7ZckL3r%SY&0$c*GL|cKSeSf8Ml|B7-r-CZ6I?4-g4oGTv1*rU7%aRms_6&wbTH% zYQcB86f6(*#0Ad*fOaPCREUekYJpw_am)Ru)n#@05C$_YpMG-}zIypIFY&c3Yiu5F zXUQZ%lEr8heGCH{+2WS!3>uz-giot?J!7UD9(~T+zd+-iW$srLvxmbj0zRxsf*@ZM zohZfr5RX1~)~?ard0clz*172>2>3jCHn!6R2-~`JB43@Daj^OXaxHgT#H)^u6DHxa z+Csr^^6=Y9)80Ka=;9Gw!DoHn`wz|9K$P!BI{lz*ejFT639|~zF!6vD*c1C%Wn7js zx1ZUOU(xQo%9Q0A*gs3ILs#2pIO&6jXd7n;Uo0?`apYJe0NHB{&8k?%+MN)x^Xh{- zD>iYar~FqtdInNt9|!g#jX&Uy_v|RcL8Ob*Zp&gcxQRsex`N5$HgVRr-V-7-XH5Nx zX+`Wb1bb5cz`xI_rQ^h<9n@14{4t1!JAPD-`Nf{c$AYqlZY|~6;?u}UmIXkYSf^0x zo0i(55D9&WTExJJ7f2FqA0zECXzC^sH8LhQdM%-fvt<=V)#XqnES*$^rJ@rEcLEEV zR&20jVLw|>mmSR8Dn3#0uV>wNEx|X_Ja@V~yuS?F-QV9J#ylW>I5k5yRmRK4F4fYx zt7MLsN}}H=xHb3;9L3XAdtHqE$a`(;%w6)#u>1XomRke^eYIhcXi+7YRk>r-Gqy4HWryT@fDs7BGWcV_T5N%n z+RkaokqDt_8|#0V>HuB=>}(SJ*}|zd8wehwxA!{Be2pYWP#xPJBQB&s`5~d{*}Ulv z$esEm(R}2#tIe_mLkk>ukhN8T9aC6SN_d3FN(YL6$QN5{6^SOsvA7-qx@o&)vfEAl z>l(~3Xz)CC{sa&|BvzBszo-@8W!90B{z9kf1?wG1k^9k(|{lust+~bLvOVj`gOVOyV{THW0z8*`1;?7?j_YlO{Ml-hDu|<84Ox zJdh89)q1fXr3I;AsNQ|*F;`lzYt+KmmeE0A>DLy$EYBUReblWGvnu+2){a?HH-r7+ zjYUY6KKC%T=J(+H2o714whXMsgF7+5aMMW5gNv*un>J(N%Yc}7Fm{*BV%U+^tBF~r z1_a9n==g}8Hd;X>&40TH)hq>W+;_gRSV6k})l>>5bb=g;zE=e=0K(Rf-2jr9@CUtL zk+^zFfoO!PnwXfLFCYDFIQ~eM1}i;xo`ylVC`ke7w_9qWxh1d5WcadupsEGjy4B=j z$062UI_-iM(xy=x&){Bn21<_0Ps~pRJw0`OiP&|QfoCL+4475Fy^~t`h6EVlIv6%B zV-5G)0ehLV)W^0a?YB!B$eS|ZSl?S)95fTM^x^(hLGPg*S#b>QzLGR4W@mCNA-l^C+NQ$i|6;|H|wXBU@m4oW-QHDH!`L?rz{CxP$Q9h1Wd~ye9mgoHEuX?LJ8Y_2jQhc;$XTCunk-tXxstvz{yBt0uHgUt= zJ7)WXgw{)L_zfqSD#Nwof*G0QbfYt8nu$V6aTQuL&aS zc7k+II1b=ct~nhq%EsivZJc#YCV*6fZ!a%ty&j=3dAwCx3xT~+5}=3~hj2)Ot=ub~ zW?uK*U{S>v_OlI%j!0B-yU)!E zsFz%)U0Qf_x>>Tvu+%*3JnHBhPoTR!!_xFcvJUco^&HyxLpM`M=`je616_UaDeQZw zKhuJ!K&XQhhgN@ugy70DQM};sb^0vq%nU{J_A&n-LzcpOZtDuK{qLP^HV?&gQ{XY3oPixV(UoBBArCBV3EGW;_Q%b<6M*_jzsiNIh^ zA{>x6t$!DMy?dLAhO&(RPrMLL{vg8t+CqippE2S8I6n9H8S>~Z6;JxYIl!6Mq$jGk z@Z6TlH(=y0nU-V7=-)Q819}L{zWxhT{7U>{s%iLdpaKYW{ToztJ;^`Yt3DG*3$QTT z3(I+^o}D>;bmjdXa+V+3JtKww*)nUM(&{P22ULWojqAEcxEAUCNt>((k)IpV}-+2$#&^x|yTZamCAUG*iX4AkadZ6qh++ zO~@{I)K%ZuQf~AryH{!RGR@F30^QM2rNKJo;oa!z@;UGz5?U|$V^H_GE%*M&O(3b< z19&go2oqk_IH(#Md2))wYP{BG2XRqp)_~)l6o)Vj1{eqy;b!2`nUrPel1bucQ8bGa z0#5OUzo7~*=euWDYj#sy%J}))tbi^D{&i)O$2QYbg~2fy3FUnWZkh`KoBV@O<+_w% zI`yRCqJk?ogp0GgUuzAp4bx(qKn%+^c1)XQrY~#XRhi*{YqDdGPbnFIT-uMlUG2+Q zx24{HAF6h0-1RsZfi<&eZRHt#Ep!;gE}u#oCm?F+nXZBXjrgsa;iW-UnRS1iuYxj- zs|r`43@=G-@G}qiFV{8Dka+^v;4$x@fmiyoz}aK7&MQ#Vpvd$6Pkvh+Ht?YP4N3EM zFGBKm_a0BXKYV?aoE|jkHcRs>Kg@Ih@JLr+whgyuu(WkULtP-8 zFp$0qL1zZ}Xi6Og%$3r-+Vp?gF|b)TIt1)a8r>0_VhOG<`G$46Gqu0y*x1~P1vI`n zsY^`x$wd46-Y9Z;Ccn?0MtXFVWu@STT-V!sa?=xw!AZjdqoB>Z(tE(%66Ax+;iCE^BBZx9h#HtkJH^obC=UQn)&3 z5qlg8xxg2QS_EpT?+u_&oz~Tj;lovRECSXnzYN1}0VRQV`{x0?onv*q857CVWmx07 zpcbu@@2?C@ooV)CLs=f$Ul$5adVO2_f#!|!gnX*{9*h^LchVET0Jtb+EeVcpl72eU zPke-maMiNRePkQCmS^sWDmNaUkp$lZBn+t}85cs0toU+#E2O|D#1r}Vh@;IZaWE55 z(mnm|a;W-uID>pR;f!a>#D9;YJC@C*2WxowfbZ8hSR%Vsa(LnwyE)s$l9Gv8`Kd`#umEIq^;cYSek>5p*%@ zf%@WQe&2n#-Py+Aeq|3vLvXRz_JPA^U?HkV_eQ_CXT0u<&t-F*8RU=SW!ckNW6m*t zlh8G{lfW48rWdIn=>Q`kazV#M9!%GFCmGgU4 zUD{t-e(E%ZS~OG5F1&d3qqmBVGchkqa3rWbE#G)Z z&lz<7TPo1!-N;Zao?-C=E;IX<+0n(5LvMhjrEkMz z&IeJ9+bw${=*^WB!td?riz5ho4AF6Ehq$l&9F=D0{P(5yKnGtSbwsP#LB zoqEyQJy-HC+|wU5`c#^*r~&L06d&} zHUlJ9AjT9WJ$Y4WpvTy|G+3ST0pFiel_?0Ljfc%1Eg8DM5!e#!K}_mS5FURYo`O4? zfyYEwXbaeVo=}L?t)56?Q_mNSs3h#W8a(YzEy9Ic<8*7|G)3F}B(2P_8B$==@%pXG z7wtO1E6c8NhQ;!&M<)%LwRg1!uVegRwmahog}Vz7-g4t|BjXz50ilg~vUa4D$&fF~N0yAsDjf%cLaqTlRU*`;OKfCWP54q7aGA0~t#H7G| zf>jYq_(;~?XG+0jX$1$t0#$`B!^Lc?qID4Rbf$m*J5X|U>MBFji#{`jDK5L4te|R? z%8aLT6X^r64PTkifng^l$}8MekCwc=#M=X9q3m`PdN?7NJk}mxC`B@E-V*K9e(U=B z?b+UOLCpsLE@I}UMR}W{qf5pc;XF9Y(#nqp!v2${jXi`^Yl+wQOO|LltD=)>N4dIy z>uD9JC;2vfDju;x z*Cj6>_b*dnfs#GQ^~|uWq0;DtAegr5>TYJ3<#rFr{c3BAM7`LSZe;7PvM=|Ue+-Yq zmD&$o=|25>+#7F~S3)Tb?LHyf7S-<97wgBT<{$MH{A?_faDFY2?;T>Uo(s6JaX0ev zzGx~JFVjkgkw^1TXntc~WXIuVukQ74mKlH}FL4a~=l+!T?8tWoL99Hb|K*criXV#2 zK`~Vf?`DPcL-ns_`LArQ)P>j0O`FY$4>r4Hn0$PM+(J;mgHD2!ry;-J#0vDuo*L9u z44&b~5u4oQ>mc`qX&!ZnfC1`?W1Gc#t&HR?mCEP>L=k>+rmxTTK#@)Tt4idH0fv3} z;(dEI0Nb3E09R_-ksJ~R6&yCq&c0;u6U%s5xGdZlV{YJmN@kEGOGrx)iYrz{-P=U( z?4pWQTwLrAB!Bee?M^o_*qiS3?ve_f}$H}12!fEnV9jTx{?z#BD zOk(DSBv)MfV<-WV_quL-zY|>Hpr2B2l^2drRpz(Jx0gR{{GJ;-n2Mfpe`7~KbGGWT z5a&dB@_%|e&$uSHb=zaFL}?-zL=Z%}QWF7DiU_XhCk9kBSy)k|8H3pfu z8@+;@A$_Z8_NRA5U^TaZ>30aMplp`!#N*T^MaQYpc3x80$k6j7*t^SWn)k}aDzH6m z9X|AL#sD0#*)m|2t*w$7(6uowJYZnx=`K+*x`pl6-3!2Mx?~$#d3(}L?>dToa$N8; zeU%|{%z*ZEj8=c*$EZoQsZj(uuZG+z&dZGxx!hT)voz1;9qH|LkrwUttmNLbW;#mg z${h}rqv{Jw)>ffOUrAzL&1Sz_eyN6qxF6VdWUD<7sqx}C&5eHWoZ|`mMUD1qss=Ql zHtCDYS*dfBwr|FBE@o5dm9B(rOr*Kq@J<^E!Hdpq*68SG%_R_e(#Bb|Lr0R9X1A`P z{H$9Jb42s)rXSKdl2wd>3scN82WACgxQk5FG0Mf7ReakoD4cU&>BNXbzq3P2r88)f zNo=h9O+j&!n^XDA!zPUgOkQGcfNx2OhM|>Tei5Ai;rT95H$P z1)i>x*m+y)d9Y~RBI=&SRlJK)s$kP}bCag)mF1r+~d@a#jA z0h3?Fil?h2BVw~E7&|s}Ez`^N798o&7l53+l_&4SjKxNCyY>+}_bVTUsm>jeg= zT9KbeK}eBtXA=hyD_Mi7qxn%1N0>B31?pI)Dwc~O zKb20h@hP6hyEz9E0u0Vhe$s?9_XFGfKi)hIA9lHjQQ|tInlKIHVE)n~Drqta?AW5a z8dJEo_Nh?2BCad)q+y-ryfG)!j5lnfS?jXVb3Am7bZVV&TJ_Kq9uh+J z?VF&xLHBl^eOCCGD^C7N+<+z&I#9w^Bb6iCBaE&r?6#RK@n798hzMZ3*|cAEerg$5 z8I4=GYT(-*6Y7;^2hnNHo*MY~>*KqS0Qktc0ani9fnLFLM^!fZ$gs@$;X|`I!I4s4 zY?XF=!2y}H=2HHohq2=QSdco@mS?c@O}sG|BFbT&Yx!W`uA`|2e!Fwd;AlR65_IsR z#zbwP%ked{rjJG(4b2yE=ai;4`38MiJ!?BJ$CG!nJ7?x;U6Q3rE}uSZaPAJpkH(9Y zX4G;AL4!c9MI@c!dYOJ5W*2rLdh^6>m3>7PG>|uj2X9k9B?DlX>m+eH&|u%F8L1D5 zxz2@{kn88%-4P(YxFP!cydD}2Fyr>cem9)?%2Bc~WRE>Oxd;Yb`#~CLejy5bqVR_x z{N!p-_J9o(*d@lj=w94sR%_CY1D5-ezvdSWTqEgpQ?uj4XA2u;BrOgPgA9}INICDg z^+KmBtH;rnr-HL_sKXuuci)t_DPO$HXa=-adye%7f8{!dfj*_!mK)m9At1+Dj`@mv ztCaS=HXSbGOUKT0d8+!u3tozqc8O1cjW$@-L{E-6V}}Wz$3pm+r`>v>UawI~zEra+ z`^tpZvu=vhOhL@U7eX?QEwrOHNdm;$Vlsn!ltQKhJ}>IlHz;c z!BS`=R8B!u8b^J%to)NZZ49oXJTI(y-x=h-aL#FA0%PH!Be=Ktd!AX1VnPY{jVyP` zt$XX2H+CH_G}y=1yw83RWqiKYV5Lcr}Y{3hjDZ7_#Hwv4gel{wh4&26Xg^wU9VEw#u4D8F-W}Iq7aze z_5f2_bBVj{o#XcIEBHuH#r6;Lyd*4`c?~!SvWlWf5->ER<=4pU>aM1r)Tb4y9JG4K z`?M=2UAdU@SRC`Ekz({{*B}R{+`H5qMbH&N27mslrFO~rW8G#mU7p~cD0n!;KDeh= zaS{*(chE{N1``c^!z%{vGx>lz73&rt5HQ5gPa36gBi$ZwOC zU)i+~Va?J!L6K0kQ50JcM@IN1jch^VU!IRs)}88;ilzGVF-6O=u<7JiCmOuASHn1z z0>2m77+dI6Iw7<2g<#gPyZkAZ%0U(J%&>@BAR*FLg6G`QO^qJ4JY$p|dSvkR$z1!d z5-_vY-=_D4R@G{7D{5chfhXt*grndcR^(_!&qpDJu6BJBOui(R&Vi(IXgfJY_?TAcOqu- zdFg@mn)8teKBTqpQ#`Ly>{1T&psFLrpNLSrR@O{H#3yJ$!9A}(BhT8~w|ybnNz6^x z-D=~NO~P;Nw9dX;8m_D*3MfSyU&Fzl;rD%raD<4rMp)0YS=__1RTgiugCq3KOSX&DM&XOCVeX$vyQ7u`gW>UId+p{)S778bZx9wr)m(%d)73ynu* za*oqI(*6!a&sVHNydp}uT9CH}0tq~L5FFSAyl{3V%f-gXXS}2-O?gUL=yTTyhQUa` z(V<*Cpc7rGtQ6hjY%%Y2+7W7@fqzUD+kRzk8^4P8QBgrt#ZCrmkY8)9Q?P}393oXA zLq$*DMJCM;2lSI+?`tY=R}}Xo&t??qzwCcddQty;#{JZx+0n>O*N-x^jnj(K*W0M6 zpBYTIn!{vhotPi=$RU_o*p5C=`tponmT|5*GN3{(6u6g3hPG4c)}#u&gPChu4?OwB zHw5hY2FFV7w=-@wbCy7-fCBuj*!nem4c-7s;9Q~TuRkMvf%nko#C@XxeZDZnfDEFc zd2{7CuoKQ98b3aq-fu<$d1MlS=@Sf#3a2^`k`B=p_)x7SUXSkjGwfx=kdMT7oqHMp zJbqjJKlxEUrFnvX`je|2(GStUwi>@WgF8+7F?ZHrW@Ro-IoS(3Q0|_UM4`IYfrqmZ zmdO0?ASoUg3bpO*=Howzjzj4JB0yp83G4+#;1s18hmuIoq((_ioY{v=eG@j?Q$kyS zUc1C1))@wffmJbXj^{Tqa1MFrgdkaqwOo1CN%1tCtaQGAG{P(y1Ho8(y4^g8wkqN$Vdp; zoCMxpl}0{!7g-rD!Y9o-T*H+r9L#L0Hegun{c8sjA^n-hPaER#>T| zG}oL>aJ9eB4)KZwYgPxveL9#}QRfv$@Kma@1u{RAC3TnfsM3 z5O}hjM2#T~)WSL*rrlZ-6O1{ZL7J!x$i>G zzW32?@pzYxwF^=u((pxb<*^`?;*fFGdQinfOhF>s1W#j1wK(Cub{5`-E2)Pft{EEUZ z$#!Um@{Cg7g4vHmh!V3_&2kQQc%q7ND*cQ}S-XNi-of=n>A4dNzqo_T46eCd*UHNH z)Yrf5aDN*Q7Vn8-LqL!=7{w|1V*O1M31lbF*`TS@AHS*$ABm+#YMloD^NEO9VVyAQ z=5zfSPXXBvF+#jmM@qL59h(b_C(c(16+9Tz)0~M=B7Q6EdSKVSod@}LF2u2Xf*BI; zn8t#Om4DBr$A1>2m*!Mu1Zvn@CmM!vEOXRvHH$ix$I*Iu7Iru_wS7YjbF`hBe4i7d zZxxU4bCZ}(U#YkNEC;1|ky_q?M%D|E;7trQaNKy}`|@b3i;cIB>pGSb@Vb$6E~I>< zQ~vCzCuEbrIAfN`8lneoCByC}Gp%sfjeLor>Yw6?#t2I1(iL_kc~ls3MQ69!U&rNF z_2897x@K-zB%i5=7IyUnNHAFm#$Nw~avRdu1Pjss*(j9gXAhm@8ps5RyOtiIi4LvQuxz7~bOv-F7Tqapmxh0o+G3OQt zX_Dt$rYf@p&ZJ1Zoj)3lv?M&HUW1TI5)c~lj-FxdI=UY7c`EMyHh$0|^h3MKzjq5q zrZ){db5ygszLmazLk!6H!!8)*V`{ZWT0&)Lf#s$zgr;16t5M?w{U^;Np)vq3?i}Ju zr|A==0e=?UKezO0Cx>Yt&4q_YH8g;Z(yq2|0T`F+@MY*N+bN**V^wU($TcY231SB8Uc_pdn$!aNjEr{P=?41?Mw5 zu7>F9>`mh+Ek_vz^?Rk4a@qF!K2w_u@14H7crL>Y?Q(Y-jE>qvHENX*-sNz079hjx1$HK4knQ1)hw znI`cq-l3%SBFAr?7jGa$60Gz3%`uG3>lxW%^PR*Oh(QOQ-~d9^@bRk5S$UJ) zY!&5K8DSYmkf;0aQYPT-xSS1ge2j;z;!2T!z$pY*0Li2Fo2*A(@>^qeJlEy8iDEkIAJg)M5)Ux%)i$$ihxQXfr`MM!aRgTSOGGw%q zxUf|at{;`)d9SR;_xf0 zCj{&R+i?BFbYFIm+dd#nly;#!KOisAMM4mI3et%OsKV*@-&HzPVVMJ#;QXw-k=aTh zDe9<;EZDXxDBGw%l$V9UPfG2Y3 zfT3^s;~ls4L$6^dz4uG}g(E{AuuznsF?r)i>+5tCP|x;$q*kcdn4Z(o=y9tLI_<&xUmpUh2=ZRSyc%t9DOHCc%4#W{F%ZRxa;~@17Wo z*Poe{i#v`vqsIQNxynv}7sMQx-nyFq=ETHAOkAOpfk4zGg2~4`7bf#peb6oJ>c7@t znkhQ>$&yj%BN4o1bzw1=BlG6m66%G?UaX*6O6l-*aQu)JpQiSIv%^vNH8Orfh8Weo0oX8 z`ray*P~Rk8=T7hRVtrA^Lv7&Q-W=`)CJlz_?fP!qZEAB}kh7%<>BC`ZgMG8@E?V~m z6l|{y&0)8S2@#iY%D_-TD|C~2m4%IfMjX#P%q}R5wh)XjD$}4 z&efwXmo&?ZU?PX6=V>3Y+2lz=KJT2)qs`cqee!>y6=`1I?gDjI`WK6<3VAf|*QN)b z;X~@Lx|z62OGA)3Ep~yqk=5uiA>qgF^HuO-kAqa^_`PoG2RhJ^guRm-fCrE<{&@T* z8DP*)uEHTxUTYsMHT-2@L%Q3*nqTl4Dy(;OT6=wc|7w?+DAU(G7p0znx}Urz2TgC23^s!29KDZL=y2MgeHMb>9)PFKfN0W%c zH)5_lb@jT0bTX%<6Bi@s=graz(Gz$ES}nn$HTm9B--2@4$>B;L=(pchFxs7844J^P z%rFnl>1nR*UilC4QrXYrCE-SpE$21X1>xif;ldy3l23u496M{rLJ>6ndhJ~WbPq~* z*%w3OI?s3z2$T0(06OE?>v8hnp!9G`BNeYWi^B`M&sA3Y+Z-<49I`pFbmbMZYKZq3 zyC@v|uZe;i%smV< z41n>Y*L7RNxKPt?FFiYT_Q zTb&6`hM|i#Dg)(%i?m6mkDkhz(vbbDQ{27??guGxT&yjqj}6!1loyXvyFKBPBeB)& zn^MNLP6@hgq8bOfc0})Y28}V60}%A)kW1_E*bfHXo%&{{V~UyC4sOE?8g$`t+HWp`OvtQ)n?{Q%MV6IrQ06Nj@-nrk}_ip9;Y>4f(DBZ%6Ky-9`xiRTay zx9l2nqSt*7-B^*l=d42ZK04{NhkjQF@$sM8h&Thid%UUY3+;Saz=6Xx2ZH5)q)U4W z@%%T17)iOR{1=7bNp9suk-|zST<<3k;k8j){Ti&uEm8fvyaMG$(M&K-cC3f$BJ`aQ zODiX!v#oj}8#GcjV29O)HWPI5O$>Z4;su|ey3c}z$_(^XF1VOmezlP?P!n`=ltI14 z5oYY^UAG<6F;QQLa(RM{qnnVi9Qv(4&_ln&e-k|bIQnnU!_B)d@IDZSG^@#-V|%x4 z1SAV-`|R4&$eF*T(h94+0F`bV?x<$g&U(MyI0fvAO6l_Sw$2VI7EJNbxI=gaZ2Pqk zwOM`d4h-`XkpS?s!O*HWV;b|ToQTSL+p4;fw0(WaQJ=EsssXWBTVZm-hqF?B&2D_7 z8O;K!Mkg2^msvPMkS>{x@`gWy5WeNA!^cxG_&k+7Wg%o|=SyCJnSLqn|G=Xs2Y#*; z&+2R!LOXLjy;jCD*gNXza(3yjZP3Tq};DDQbt%YmLW3>q3noqrG>1;>>ipu?-o`_gkO&RG7UR-1;)UL|N`)`C$)%w{PE-P!yuRnGeGCtZN1)5|z zN@nrW9f6&9Ro}P{Dho;{5TKXj6Pj--eth{Ws;G21ov$dsM3Wef_c$gXcb?-Gb+%@N07n(0J{_CcB_&pD)J?(x&C zYi(d1w?qFwm<(8YxH!*=DA%YV0&hV!&}51Pc)4>` z38Ri3G~`{0WN~lots;Dts57?PN8bLE%Sbo98EQ0>)wmZ%`Stx*!YDIr8e#34sq*Bm z%rPLIqtV!z*eRVlr5&|7cQUCtX2;CXTJ=<0@h8`jh3?Wv086n*-G#cUR|x6B`IY5g zL6v20=G0=K*vd`meD{>@;joH9DJ8wQ>$3K8z)Tr>W|qvDoh!|+788GXta&1guhExd z1P&TsSLaPW;x;bBoywf{u1mW@lB2%IpIcru=14qkGs- zqN%dScf$V?O(|zO@geyojVqoxaZV&PlL5&p5he?QCugEUp(D19%xz?EKXxEVnY~LnuQeY?H*&eLe|5sQM z%Lx|xH^V{LgB4Fjei)8Vg<_8kn&_&$=F=W<;zz6Lq+NIaR#2Yf_G(W5EGP`(3nsz` zW+%_i3v1+husaH;T6pSi7=x6q;YupAa4%{%i3;yAULjRq04a%yQYcz27T)987K0vn z=+`Irk}1z$lBxdr|C~(GO-JN(ZKkWqKZ3Xof5L|u)WmNU%dKwX!RkkQ{nDq>g}Ua& zKs{ZA1s50uJUR8KNzWJ4GvHmeAKUe4?#!?KCRY<;LiA^%>A7@#sP+KIQFt{@A>O;t z99MJzH2Eg4^ue&WLelD)rfyfF)qt%7M6LDYZ#p8F8w@$LkH*~@gx=6(VwnKSrn80H zG!~UUhg+VQ%oHJp7 z`;DNz$L>i!03u(+aN%$OtKgQ##+a>ni(22Np_lA zoS;hSx%g!tupN^s#_N@KX6FF;!LBfiQH3Hc3uv-e?}fkM%**usAG93H`EK7F1uAG4 zqdAIyPP=;sg9|h$^%G~Nd4;AD7r5O#Oq>!TtwECoX0C7hbB@>}!3l=rV9-C$T790A zaBV*9ujvc$467=)+RYhGQmZjCKd$K!ht;#b&<7rG%|l!Zo@uEM9HWYHp_w^gX4c1< ze{&|}{eSOF_WDoNz81%Hpl;oEQpGQU4wUHZFCC~#0D3PI0Oi+!gF9O=)H`~0_*{A02d&m?3It=msJUQuku9JQ<;iQl^$XuHBHz#c z8Q(Y(b_#j+ zTFuNubg#v1TUmrrWG98<%HTI-D#{xCr8I$0+b<$Q$>woEGJ{Pt|-^oKYBda;FH z+EfcMiQiaPjsV@6o~RsDlgyzekk(`Z)uWDB{S1U$Q8(f>pF}`7)SMG2$c$MXhuo!X zY4g^7Qb7{Wcqu^)O#$vCUr<9CgI?#H5BFe&<45JgW!_^FRIua>cNo$ z$w_7Qfw!%N+Y2hPA=?&CM?gI55uYzOReZ@Z9Tpx?FWQ|a!O)3xgE`;Lwgv_t&4aD1 zW~r5~g|6p)$XNcN$yW-&-z8ewYEl#4%->`*fJKx@yy?y-dnD}}y$X%@6NvZnhhCZ} z$d3HL1<;hD`IA&F;#x%g*nkwsgG=6%|z%m4t6 zP-P;_8B1)h6wG$g&oYXsYZjVAt`Mtv19a@}#nCBQr816`x!G@^U4}8EoK&BawGQ&s?5DSTW5izCpRxo=+`M)-D zM|_1nI?j&t6rr!v?QwVW9fTJ&KIMMBpZNwMIL$RgiE@cz=Yu_TcT7xAXva?N#@V)s z&E_KfYbt`sk$fSWJ*ZFzi)~42ElT=URyuFG!C}H%$AG=#H6&K5XJy&%xoPuJw0ZhW zjFiS=v>U2P!EUtV z%#1kcR+?iN@@0T&mqAyTk*$+7r|N(nuYpI%wi^bRwDHV_?veT&O4-E3pFzwt&P!dO zn`D(pvzDwsBXUEMGka#8cp20n^w^{cXd)QSE369-OUtr!1`ZE7MUOz6eT}yU>xkPM zHiH5v>#svP^HN9e&n#p(2z*o~G@qUni7p@6;ohRND=d`DvHV0kvWD+ygL!y*dCKHF zAMpEp_9ppVT(A1)wF{8@Qgc>(RgbcGfPX0Wv%fch>7Jeq_v*V!{B>?wYTGyrK#wW_ zdhEeNdoY;uWHq5MgXki6;Z|>-Al(>nZ=8B2Voz5)pG2>c<6S?N9U<1bcstf6zLh9a-nfjE8Ux;V zsR)Sh)tjF$cBKTlT%!MeBKZ5eJqu<$`S{;PUAh}!_+k70r8YPA8)?@`vG{4uU~Y+Q zxlE^+hfXJgMJLbj`Wn5!t3?#E)CQ-6E`6Da+=^o!oZ{CyKWEviA{;)TK0IDjN2Pdv znhN^ri#<;k*s`3rTH1+eO+{7UbSh2iJUg!xSQs1%AH)os55=Kw``;dXuTZ0VHJ37Y zr|T+7#irffiD^wefCJZL!QTz)h$wD9Q z%D*(0pzhKEd!w)l7WXTc$lP{gW;@FU0X3B5eBS_G{(%i(_Snx>_ND+6+flaF$gQ>I z^V}A%?cioDH1_3Eh@_{JNzd5D?(r{9K{y%;hLWF Date: Wed, 11 Dec 2019 13:41:48 +0700 Subject: [PATCH 31/31] Update CHANGELOG.md --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e913a24ee..4b92cb292 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # v5.2.0 -- **[#1388](https://github.com/xmrig/xmrig/pull/1388) Added 1GB huge pages support for Linux.** - - Added new option `1gb-pages` in `randomx` object. +- **[#1388](https://github.com/xmrig/xmrig/pull/1388) Added [1GB huge pages support](https://xmrig.com/docs/miner/hugepages#onegb-huge-pages) for Linux.** + - Added new option `1gb-pages` in `randomx` object with command line equivalent `--randomx-1gb-pages`. - Added automatic huge pages configuration on Linux if use the miner with root privileges. - - Added new [hugepages documentation](https://xmrig.com/docs/miner/hugepages) article. +- **Added [automatic Intel prefetchers configuration](https://xmrig.com/docs/miner/randomx-optimization-guide#intel-specific-optimizations) on Linux.** + - Added new option `wrmsr` in `randomx` object with command line equivalent `--randomx-wrmsr`. - [#1396](https://github.com/xmrig/xmrig/pull/1396) [#1401](https://github.com/xmrig/xmrig/pull/1401) New performance optimizations for Ryzen CPUs. - [#1385](https://github.com/xmrig/xmrig/issues/1385) Added `max-threads-hint` option support for RandomX dataset initialization threads. - [#1386](https://github.com/xmrig/xmrig/issues/1386) Added `priority` option support for RandomX dataset initialization threads.