diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 75557e5f..4ddcbf44 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -17,6 +17,50 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.9.0 +------------- +* Backends + + Expose the memory size of CXL memory devices (Type 3) on Linux. + + The LevelZero backend now reports the "XeLinkBandwidth" distance + matrix between L0 devices (and subdevices) when available. + + Add support for CUDA compute capability up to 9.0. +* Tools + + lstopo now switches to console mode when its output is redirected. + Graphical window mode may be forced back with --of window. + + hwloc-calc now accepts "numa" in -H, and I/O subtypes such as "gpu" + in -I and -N. + + +Version 2.8.0 +------------- +* API + + Add HWLOC_TOPOLOGY_FLAG_NO_DISTANCES, _NO_MEMATTRS and _NO_CPUKINDS + to reduce the overhead when unneeded. + + Add separate Read/Write Bandwidth/Latency memory attributes and + implement them on Linux. +* Backends + + NUMA nodes may now have a subtype such as DRAM, HBM, SPM, or NVM + on heterogeneous memory platforms on Linux. + - Add DAXType and DAXParent attributes on Linux to tell where a + DAX device or its corresponding NUMA node come from (SPM for + Specific-Purpose or NVM for Non-Volatile Memory). + + Detect heterogeneous caches in hybrid CPUs on MacOS X, + thanks to Paul Bone for the help. + + Max frequencies are not ignored in Linux cpukinds anymore (they were + ignored in hwloc 2.7.0), but they may be slightly adjusted to avoid + reporting hybrid CPUs because Intel Turbo Boost Max 3.0. + - See the documentation of environment variable HWLOC_CPUKINDS_MAXFREQ. + + Hardwire the PCI locality of HPE Cray EX235a nodes. +* Tools + + lstopo and other tools may now load Linux and x86 cpuid topology files + from a tarball. + + lstopo may now replace the P# and L# index prefixes with custom strings + thanks to --os-index-prefix and --logical-index-prefix options. +* Misc + + Add --disable-readme to avoid regenerating the top-level hwloc README + file from the documentation. + + Version 2.7.1 ------------- * Workaround crashes when virtual machines report incoherent x86 CPUID diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README index 932d6d09..43210e63 100644 --- a/src/3rdparty/hwloc/README +++ b/src/3rdparty/hwloc/README @@ -78,7 +78,7 @@ debug and report issues. Questions may be sent to the users or developers mailing lists (https:// www.open-mpi.org/community/lists/hwloc.php). -There is also a #hwloc IRC channel on Freenode (irc.freenode.net). +There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat). diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index d17fb44e..af3c4889 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,8 +8,8 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=7 -release=1 +minor=9 +release=0 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Mar 20, 2022" +date="Dec 14, 2022" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=20:3:5 +libhwloc_so_version=21:1:6 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 35bbcc71..18ea1dfa 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -93,7 +93,7 @@ extern "C" { * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020500 +#define HWLOC_API_VERSION 0x00020800 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -971,7 +971,7 @@ HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwlo * * If \p size is 0, \p string may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, @@ -986,7 +986,7 @@ HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_ * * If \p size is 0, \p string may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, @@ -2060,7 +2060,26 @@ enum hwloc_topology_flags_e { * not change to due thread binding changes on Windows * (see ::HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING). */ - HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING = (1UL<<6) + HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING = (1UL<<6), + + /** \brief Ignore distances. + * + * Ignore distance information from the operating systems (and from XML) + * and hence do not use distances for grouping. + */ + HWLOC_TOPOLOGY_FLAG_NO_DISTANCES = (1UL<<7), + + /** \brief Ignore memory attributes. + * + * Ignore memory attribues from the operating systems (and from XML). + */ + HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS = (1UL<<8), + + /** \brief Ignore CPU Kinds. + * + * Ignore CPU kind information from the operating systems (and from XML). + */ + HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS = (1UL<<9) }; /** \brief Set OR'ed flags to non-yet-loaded topology. diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 562a4811..fcaf70ca 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.7.1" +#define HWLOC_VERSION "2.9.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 7 -#define HWLOC_VERSION_RELEASE 1 +#define HWLOC_VERSION_MINOR 9 +#define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index 8d9bb9c8..cd118b38 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -112,7 +112,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t sr * * If \p buflen is 0, \p buf may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); @@ -137,7 +137,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwl * * If \p buflen is 0, \p buf may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); @@ -161,7 +161,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * * * If \p buflen is 0, \p buf may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); @@ -357,11 +357,11 @@ HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_ * The loop must start with hwloc_bitmap_foreach_begin() and end * with hwloc_bitmap_foreach_end() followed by a terminating ';'. * - * \p index is the loop variable; it should be an unsigned int. The - * first iteration will set \p index to the lowest index in the bitmap. + * \p id is the loop variable; it should be an unsigned int. The + * first iteration will set \p id to the lowest index in the bitmap. * Successive iterations will iterate through, in order, all remaining * indexes set in the bitmap. To be specific: each iteration will return a - * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. + * value for \p id such that hwloc_bitmap_isset(bitmap, id) is true. * * The assert prevents the loop from being infinite if the bitmap is infinitely set. * diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h index f2419dd4..d563b437 100644 --- a/src/3rdparty/hwloc/include/hwloc/deprecated.h +++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -55,7 +55,7 @@ hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj * * If \p size is 0, \p string may safely be \c NULL. * - * \return the number of character that were actually written if not truncating, + * \return the number of characters that were actually written if not truncating, * or that would have been written (not including the ending \\0). */ static __hwloc_inline int diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index 44cd7ea1..effa8663 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -35,8 +35,8 @@ extern "C" { * from a core in another node. * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. * The name of this distances structure is "NUMALatency". - * Others distance structures include and "XGMIBandwidth", "XGMIHops" - * and "NVLinkBandwidth". + * Others distance structures include and "XGMIBandwidth", "XGMIHops", + * "XeLinkBandwidth" and "NVLinkBandwidth". * * The matrix may also contain bandwidths between random sets of objects, * possibly provided by the user, as specified in the \p kind attribute. @@ -160,7 +160,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, * Usually only one distances structure may match a given name. * * The name of the most common structure is "NUMALatency". - * Others include "XGMIBandwidth", "XGMIHops" and "NVLinkBandwidth". + * Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth", + * and "NVLinkBandwidth". */ HWLOC_DECLSPEC int hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index f918d816..44994211 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -886,9 +886,6 @@ enum hwloc_distrib_flags_e { * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e. * * \note This function requires the \p roots objects to have a CPU set. - * - * \note This function replaces the now deprecated hwloc_distribute() - * and hwloc_distributev() functions. */ static __hwloc_inline int hwloc_distrib(hwloc_topology_t topology, diff --git a/src/3rdparty/hwloc/include/hwloc/intel-mic.h b/src/3rdparty/hwloc/include/hwloc/intel-mic.h deleted file mode 100644 index c504cd7e..00000000 --- a/src/3rdparty/hwloc/include/hwloc/intel-mic.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright © 2013-2016 Inria. All rights reserved. - * See COPYING in top-level directory. - */ - -/** \file - * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC). - * - * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to - * include this file so as to get topology information for MIC devices. - */ - -#ifndef HWLOC_INTEL_MIC_H -#define HWLOC_INTEL_MIC_H - -#include "hwloc.h" -#include "hwloc/autogen/config.h" -#include "hwloc/helper.h" - -#ifdef HWLOC_LINUX_SYS -#include "hwloc/linux.h" - -#include -#include -#endif - -#include -#include - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC) - * - * This interface offers ways to retrieve topology information about - * Intel Xeon Phi (MIC) devices. - * - * @{ - */ - -/** \brief Get the CPU set of logical processors that are physically - * close to MIC device whose index is \p idx. - * - * Return the CPU set describing the locality of the MIC device whose index is \p idx. - * - * Topology \p topology and device index \p idx must match the local machine. - * I/O devices detection is not needed in the topology. - * - * The function only returns the locality of the device. - * If more information about the device is needed, OS objects should - * be used instead, see hwloc_intel_mic_get_device_osdev_by_index(). - * - * This function is currently only implemented in a meaningful way for - * Linux; other systems will simply get a full cpuset. - */ -static __hwloc_inline int -hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, - int idx __hwloc_attribute_unused, - hwloc_cpuset_t set) -{ -#ifdef HWLOC_LINUX_SYS - /* If we're on Linux, use the sysfs mechanism to get the local cpus */ -#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128 - char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX]; - DIR *sysdir = NULL; - struct dirent *dirent; - unsigned pcibus, pcidev, pcifunc; - - if (!hwloc_topology_is_thissystem(topology)) { - errno = EINVAL; - return -1; - } - - sprintf(path, "/sys/class/mic/mic%d", idx); - sysdir = opendir(path); - if (!sysdir) - return -1; - - while ((dirent = readdir(sysdir)) != NULL) { - if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) { - sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc); - if (hwloc_linux_read_path_as_cpumask(path, set) < 0 - || hwloc_bitmap_iszero(set)) - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); - break; - } - } - - closedir(sysdir); -#else - /* Non-Linux systems simply get a full cpuset */ - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); -#endif - return 0; -} - -/** \brief Get the hwloc OS device object corresponding to the - * MIC device for the given index. - * - * Return the OS device object describing the MIC device whose index is \p idx. - * Return NULL if there is none. - * - * The topology \p topology does not necessarily have to match the current - * machine. For instance the topology may be an XML import of a remote host. - * I/O devices detection must be enabled in the topology. - * - * \note The corresponding PCI device object can be obtained by looking - * at the OS device parent object. - */ -static __hwloc_inline hwloc_obj_t -hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology, - unsigned idx) -{ - hwloc_obj_t osdev = NULL; - while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { - if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type - && osdev->name - && !strncmp("mic", osdev->name, 3) - && atoi(osdev->name + 3) == (int) idx) - return osdev; - } - return NULL; -} - -/** @} */ - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* HWLOC_INTEL_MIC_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h index 02ffa832..acf4da53 100644 --- a/src/3rdparty/hwloc/include/hwloc/memattrs.h +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -54,6 +54,8 @@ extern "C" { * Attribute values for these nodes, if any, may then be obtained with * hwloc_memattr_get_value() and manually compared with the desired criteria. * + * \sa An example is available in doc/examples/memory-attributes.c in the source tree. + * * \note The API also supports specific objects as initiator, * but it is currently not used internally by hwloc. * Users may for instance use it to provide custom performance @@ -65,19 +67,19 @@ extern "C" { /** \brief Memory node attributes. */ enum hwloc_memattr_id_e { - /** \brief "Capacity". - * The capacity is returned in bytes - * (local_memory attribute in objects). + /** \brief + * The \"Capacity\" is returned in bytes (local_memory attribute in objects). * * Best capacity nodes are nodes with higher capacity. * * No initiator is involved when looking at this attribute. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + * \hideinitializer */ HWLOC_MEMATTR_ID_CAPACITY = 0, - /** \brief "Locality". - * The locality is returned as the number of PUs in that locality + /** \brief + * The \"Locality\" is returned as the number of PUs in that locality * (e.g. the weight of its cpuset). * * Best locality nodes are nodes with smaller locality @@ -87,26 +89,87 @@ enum hwloc_memattr_id_e { * * No initiator is involved when looking at this attribute. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + * \hideinitializer */ HWLOC_MEMATTR_ID_LOCALITY = 1, - /** \brief "Bandwidth". - * The bandwidth is returned in MiB/s, as seen from the given initiator location. + /** \brief + * The \"Bandwidth\" is returned in MiB/s, as seen from the given initiator location. + * * Best bandwidth nodes are nodes with higher bandwidth. + * * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * + * This is the average bandwidth for read and write accesses. If the platform + * provides individual read and write bandwidths but no explicit average value, + * hwloc computes and returns the average. + * \hideinitializer */ HWLOC_MEMATTR_ID_BANDWIDTH = 2, - /** \brief "Latency". - * The latency is returned as nanoseconds, as seen from the given initiator location. + /** \brief + * The \"ReadBandwidth\" is returned in MiB/s, as seen from the given initiator location. + * + * Best bandwidth nodes are nodes with higher bandwidth. + * + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * \hideinitializer + */ + HWLOC_MEMATTR_ID_READ_BANDWIDTH = 4, + + /** \brief + * The \"WriteBandwidth\" is returned in MiB/s, as seen from the given initiator location. + * + * Best bandwidth nodes are nodes with higher bandwidth. + * + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * \hideinitializer + */ + HWLOC_MEMATTR_ID_WRITE_BANDWIDTH = 5, + + /** \brief + * The \"Latency\" is returned as nanoseconds, as seen from the given initiator location. + * * Best latency nodes are nodes with smaller latency. + * * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * + * This is the average latency for read and write accesses. If the platform + * provides individual read and write latencies but no explicit average value, + * hwloc computes and returns the average. + * \hideinitializer */ - HWLOC_MEMATTR_ID_LATENCY = 3 + HWLOC_MEMATTR_ID_LATENCY = 3, - /* TODO read vs write, persistence? */ + /** \brief + * The \"ReadLatency\" is returned as nanoseconds, as seen from the given initiator location. + * + * Best latency nodes are nodes with smaller latency. + * + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * \hideinitializer + */ + HWLOC_MEMATTR_ID_READ_LATENCY = 6, + + /** \brief + * The \"WriteLatency\" is returned as nanoseconds, as seen from the given initiator location. + * + * Best latency nodes are nodes with smaller latency. + * + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + * \hideinitializer + */ + HWLOC_MEMATTR_ID_WRITE_LATENCY = 7, + + /* TODO persistence? */ + + HWLOC_MEMATTR_ID_MAX /**< \private Sentinel value */ }; /** \brief A memory attribute identifier. diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index ed4b833d..d7abb02c 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2021 Inria. All rights reserved. + * Copyright © 2013-2022 Inria. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -338,9 +338,15 @@ struct hwloc_component { * This function return 1 by default (show critical only), * 0 in lstopo (show all), * or anything set in HWLOC_HIDE_ERRORS in the environment. + * + * Use macros HWLOC_SHOW_CRITICAL_ERRORS() and HWLOC_SHOW_ALL_ERRORS() + * for clarity. */ HWLOC_DECLSPEC int hwloc_hide_errors(void); +#define HWLOC_SHOW_CRITICAL_ERRORS() (hwloc_hide_errors() < 2) +#define HWLOC_SHOW_ALL_ERRORS() (hwloc_hide_errors() == 0) + /** \brief Add an object to the topology. * * Insert new object \p obj in the topology starting under existing object \p root @@ -501,6 +507,7 @@ hwloc_filter_check_pcidev_subtype_important(unsigned classid) || baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */ || classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */ || classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */ + || classid == 0x0502 /* PCI_CLASS_MEMORY_CXL */ || baseclass == 0x06 /* PCI_BASE_CLASS_BRIDGE with non-PCI downstream. the core will drop the useless ones later */ || baseclass == 0x12 /* Processing Accelerators */); } diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index ae439b51..279ecd84 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -123,6 +123,9 @@ extern "C" { #define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) #define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) #define HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_DONT_CHANGE_BINDING) +#define HWLOC_TOPOLOGY_FLAG_NO_DISTANCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_DISTANCES) +#define HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_MEMATTRS) +#define HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_CPUKINDS) #define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) @@ -381,6 +384,11 @@ extern "C" { #define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY) #define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH) #define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY) +#define HWLOC_MEMATTR_ID_READ_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_READ_BANDWIDTH) +#define HWLOC_MEMATTR_ID_WRITE_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_BANDWIDTH) +#define HWLOC_MEMATTR_ID_READ_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_READ_LATENCY) +#define HWLOC_MEMATTR_ID_WRITE_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_LATENCY) +#define HWLOC_MEMATTR_ID_MAX HWLOC_NAME_CAPS(MEMATTR_ID_MAX) #define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t) #define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name) @@ -862,6 +870,7 @@ extern "C" { #define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy) #define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh) #define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh) +#define hwloc_internal_memattrs_guess_memory_tiers HWLOC_NAME(internal_memattrs_guess_memory_tiers) #define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s) #define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init) diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index 131b0796..c61acb71 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -259,6 +259,7 @@ struct hwloc_topology { unsigned bus_first, bus_last; hwloc_bitmap_t cpuset; } * pci_forced_locality; + hwloc_uint64_t pci_locality_quirks; /* component blacklisting */ unsigned nr_blacklisted_components; @@ -419,6 +420,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology); extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology); extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old); extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value); +extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology); extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology); extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology); diff --git a/src/3rdparty/hwloc/include/private/windows.h b/src/3rdparty/hwloc/include/private/windows.h index 0a061b09..cb3e0d62 100644 --- a/src/3rdparty/hwloc/include/private/windows.h +++ b/src/3rdparty/hwloc/include/private/windows.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 Université Bordeaux - * Copyright © 2020 Inria. All rights reserved. + * Copyright © 2020-2022 Inria. All rights reserved. * * See COPYING in top-level directory. */ @@ -8,13 +8,22 @@ #ifndef HWLOC_PRIVATE_WINDOWS_H #define HWLOC_PRIVATE_WINDOWS_H +#ifndef _ANONYMOUS_UNION #ifdef __GNUC__ #define _ANONYMOUS_UNION __extension__ -#define _ANONYMOUS_STRUCT __extension__ #else #define _ANONYMOUS_UNION +#endif /* __GNUC__ */ +#endif /* _ANONYMOUS_UNION */ + +#ifndef _ANONYMOUS_STRUCT +#ifdef __GNUC__ +#define _ANONYMOUS_STRUCT __extension__ +#else #define _ANONYMOUS_STRUCT #endif /* __GNUC__ */ +#endif /* _ANONYMOUS_STRUCT */ + #define DUMMYUNIONNAME #define DUMMYSTRUCTNAME diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c index 81e3116b..b0381c83 100644 --- a/src/3rdparty/hwloc/src/components.c +++ b/src/3rdparty/hwloc/src/components.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2012 Université Bordeaux * See COPYING in top-level directory. */ @@ -386,7 +386,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, |HWLOC_DISC_PHASE_MISC |HWLOC_DISC_PHASE_ANNOTATE |HWLOC_DISC_PHASE_TWEAK))) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Cannot register discovery component `%s' with invalid phases 0x%x\n", component->name, component->phases); return -1; @@ -476,7 +476,7 @@ hwloc_components_init(void) /* hwloc_static_components is created by configure in static-components.h */ for(i=0; NULL != hwloc_static_components[i]; i++) { if (hwloc_static_components[i]->flags) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Ignoring static component with invalid flags %lx\n", hwloc_static_components[i]->flags); continue; @@ -505,7 +505,7 @@ hwloc_components_init(void) #ifdef HWLOC_HAVE_PLUGINS for(desc = hwloc_plugins; NULL != desc; desc = desc->next) { if (desc->component->flags) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Ignoring plugin `%s' component with invalid flags %lx\n", desc->name, desc->component->flags); continue; @@ -738,7 +738,7 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology, backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, NULL, NULL, NULL); if (!backend) { - if (hwloc_components_verbose || (envvar_forced && hwloc_hide_errors() < 2)) + if (hwloc_components_verbose || (envvar_forced && HWLOC_SHOW_CRITICAL_ERRORS())) fprintf(stderr, "hwloc: Failed to instantiate discovery component `%s'\n", comp->name); return -1; } @@ -835,7 +835,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (comp->phases & ~blacklisted_phases) hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); } else { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Cannot find discovery component `%s'\n", name); } @@ -967,7 +967,7 @@ hwloc_backend_enable(struct hwloc_backend *backend) /* check backend flags */ if (backend->flags) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", backend->component->name, backend->component->phases, backend->flags); return -1; diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c index fc05f17e..6c7c087f 100644 --- a/src/3rdparty/hwloc/src/cpukinds.c +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -1,5 +1,5 @@ /* - * Copyright © 2020-2021 Inria. All rights reserved. + * Copyright © 2020-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -504,7 +504,7 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; else if (!strcmp(env, "no_forced_efficiency")) heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; - else if (hwloc_hide_errors() < 2) + else if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); } diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 7449a858..81e12c55 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2020 Inria. All rights reserved. + * Copyright © 2013-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -218,7 +218,7 @@ hwloc_diff_trees(hwloc_topology_t topo1, hwloc_obj_t obj1, struct hwloc_info_s *info1 = &obj1->infos[i], *info2 = &obj2->infos[i]; if (strcmp(info1->name, info2->name)) goto out_too_complex; - if (strcmp(obj1->infos[i].value, obj2->infos[i].value)) { + if (strcmp(info1->value, info2->value)) { err = hwloc_append_diff_obj_attr_string(obj1, HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO, info1->name, diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index 252c253e..bfc7d61d 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2022 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -860,7 +860,7 @@ struct hwloc_distances_container_s { struct hwloc_distances_s distances; }; -#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL) +#define HWLOC_DISTANCES_CONTAINER_OFFSET ((uintptr_t)(&((struct hwloc_distances_container_s*)NULL)->distances) - (uintptr_t)NULL) #define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET ) static struct hwloc_internal_distances_s * diff --git a/src/3rdparty/hwloc/src/memattrs.c b/src/3rdparty/hwloc/src/memattrs.c index 92efe575..b27ed3ec 100644 --- a/src/3rdparty/hwloc/src/memattrs.c +++ b/src/3rdparty/hwloc/src/memattrs.c @@ -1,11 +1,12 @@ /* - * Copyright © 2020-2021 Inria. All rights reserved. + * Copyright © 2020-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ #include "private/autogen/config.h" #include "hwloc.h" #include "private/private.h" +#include "private/debug.h" /***************************** @@ -49,36 +50,51 @@ hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr, void hwloc_internal_memattrs_prepare(struct hwloc_topology *topology) { -#define NR_DEFAULT_MEMATTRS 4 - topology->memattrs = malloc(NR_DEFAULT_MEMATTRS * sizeof(*topology->memattrs)); + topology->memattrs = malloc(HWLOC_MEMATTR_ID_MAX * sizeof(*topology->memattrs)); if (!topology->memattrs) return; - assert(HWLOC_MEMATTR_ID_CAPACITY < NR_DEFAULT_MEMATTRS); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY], (char *) "Capacity", HWLOC_MEMATTR_FLAG_HIGHER_FIRST, HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); - assert(HWLOC_MEMATTR_ID_LOCALITY < NR_DEFAULT_MEMATTRS); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY], (char *) "Locality", HWLOC_MEMATTR_FLAG_LOWER_FIRST, HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); - assert(HWLOC_MEMATTR_ID_BANDWIDTH < NR_DEFAULT_MEMATTRS); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH], (char *) "Bandwidth", HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); - assert(HWLOC_MEMATTR_ID_LATENCY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_BANDWIDTH], + (char *) "ReadBandwidth", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_BANDWIDTH], + (char *) "WriteBandwidth", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY], (char *) "Latency", HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); - topology->nr_memattrs = NR_DEFAULT_MEMATTRS; + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_LATENCY], + (char *) "ReadLatency", + HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_LATENCY], + (char *) "WriteLatency", + HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + topology->nr_memattrs = HWLOC_MEMATTR_ID_MAX; } static void @@ -1197,3 +1213,214 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology, *nrp = i; return 0; } + + +/************************************** + * Using memattrs to identify HBM/DRAM + */ + +struct hwloc_memory_tier_s { + hwloc_obj_t node; + uint64_t local_bw; + enum hwloc_memory_tier_type_e { + /* warning the order is important for guess_memory_tiers() after qsort() */ + HWLOC_MEMORY_TIER_UNKNOWN, + HWLOC_MEMORY_TIER_DRAM, + HWLOC_MEMORY_TIER_HBM, + HWLOC_MEMORY_TIER_SPM, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm */ + HWLOC_MEMORY_TIER_NVM, + HWLOC_MEMORY_TIER_GPU, + } type; +}; + +static int compare_tiers(const void *_a, const void *_b) +{ + const struct hwloc_memory_tier_s *a = _a, *b = _b; + /* sort by type of tier first */ + if (a->type != b->type) + return a->type - b->type; + /* then by bandwidth */ + if (a->local_bw > b->local_bw) + return -1; + else if (a->local_bw < b->local_bw) + return 1; + return 0; +} + +int +hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_memory_tier_s *tiers; + unsigned i, j, n; + const char *env; + int spm_is_hbm = -1; /* -1 will guess from BW, 0 no, 1 forced */ + int mark_dram = 1; + unsigned first_spm, first_nvm; + hwloc_uint64_t max_unknown_bw, min_spm_bw; + + env = getenv("HWLOC_MEMTIERS_GUESS"); + if (env) { + if (!strcmp(env, "none")) { + return 0; + } else if (!strcmp(env, "default")) { + /* nothing */ + } else if (!strcmp(env, "spm_is_hbm")) { + hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n"); + spm_is_hbm = 1; + } else if (HWLOC_SHOW_CRITICAL_ERRORS()) { + fprintf(stderr, "hwloc: Failed to recognize HWLOC_MEMTIERS_GUESS value %s\n", env); + } + } + + imattr = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH]; + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE); + assert(n); + + tiers = malloc(n * sizeof(*tiers)); + if (!tiers) + return -1; + + for(i=0; isubtype && !strcmp(node->subtype, "GPUMemory")) + tiers[i].type = HWLOC_MEMORY_TIER_GPU; + + if (spm_is_hbm == -1) { + for(j=0; jnr_targets; j++) + if (imattr->targets[j].obj == node) { + imtg = &imattr->targets[j]; + break; + } + if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { + iloc.type = HWLOC_LOCATION_TYPE_CPUSET; + iloc.location.cpuset = node->cpuset; + imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); + if (imi) + tiers[i].local_bw = imi->value; + } + } + } + + /* sort tiers */ + qsort(tiers, n, sizeof(*tiers), compare_tiers); + hwloc_debug("Sorting memory tiers...\n"); + for(i=0; ilogical_index, tiers[i].node->os_index, + tiers[i].type, (unsigned long long) tiers[i].local_bw); + + /* now we have UNKNOWN tiers (sorted by BW), then SPM tiers (sorted by BW), then NVM, then GPU */ + + /* iterate over UNKNOWN tiers, and find their BW */ + for(i=0; i HWLOC_MEMORY_TIER_UNKNOWN) + break; + } + first_spm = i; + /* get max BW from first */ + if (first_spm > 0) + max_unknown_bw = tiers[0].local_bw; + else + max_unknown_bw = 0; + + /* there are no DRAM or HBM tiers yet */ + + /* iterate over SPM tiers, and find their BW */ + for(i=first_spm; i HWLOC_MEMORY_TIER_SPM) + break; + } + first_nvm = i; + /* get min BW from last */ + if (first_nvm > first_spm) + min_spm_bw = tiers[first_nvm-1].local_bw; + else + min_spm_bw = 0; + + /* FIXME: if there's more than 10% between some sets of nodes inside a tier, split it? */ + /* FIXME: if there are cpuset-intersecting nodes in same tier, abort? */ + + if (spm_is_hbm == -1) { + /* if we have BW for all SPM and UNKNOWN + * and all SPM BW are 2x superior to all UNKNOWN BW + */ + hwloc_debug("UNKNOWN-memory-tier max bandwidth %llu\n", (unsigned long long) max_unknown_bw); + hwloc_debug("SPM-memory-tier min bandwidth %llu\n", (unsigned long long) min_spm_bw); + if (max_unknown_bw > 0 && min_spm_bw > 0 && max_unknown_bw*2 < min_spm_bw) { + hwloc_debug("assuming SPM means HBM and !SPM means DRAM since bandwidths are very different\n"); + spm_is_hbm = 1; + } else { + hwloc_debug("cannot assume SPM means HBM\n"); + spm_is_hbm = 0; + } + } + + if (spm_is_hbm) { + for(i=0; isubtype) /* don't overwrite the existing subtype */ + continue; + switch (tiers[i].type) { + case HWLOC_MEMORY_TIER_DRAM: + if (mark_dram) + type = "DRAM"; + break; + case HWLOC_MEMORY_TIER_HBM: + type = "HBM"; + break; + case HWLOC_MEMORY_TIER_SPM: + type = "SPM"; + break; + case HWLOC_MEMORY_TIER_NVM: + type = "NVM"; + break; + default: + /* GPU memory is already marked with subtype="GPUMemory", + * UNKNOWN doesn't deserve any subtype + */ + break; + } + if (type) { + hwloc_debug("Marking node L#%u P#%u as %s\n", tiers[i].node->logical_index, tiers[i].node->os_index, type); + tiers[i].node->subtype = strdup(type); + } + } + + free(tiers); + return 0; +} diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index 977475eb..b5a4b544 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -119,6 +119,13 @@ hwloc_pci_discovery_init(struct hwloc_topology *topology) topology->pci_forced_locality = NULL; topology->first_pci_locality = topology->last_pci_locality = NULL; + +#define HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A (1ULL<<0) +#define HWLOC_PCI_LOCALITY_QUIRK_FAKE (1ULL<<62) + topology->pci_locality_quirks = (uint64_t) -1; + /* -1 is unknown, 0 is disabled, >0 is bitmask of enabled quirks. + * bit 63 should remain unused so that -1 is unaccessible as a bitmask. + */ } void @@ -146,7 +153,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) } free(buffer); } else { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc/pci: Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n", env, (unsigned long) st.st_size); } @@ -333,7 +340,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs } case HWLOC_PCI_BUSID_EQUAL: { static int reported = 0; - if (!reported && hwloc_hide_errors() < 2) { + if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) { fprintf(stderr, "*********************************************************\n"); fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION); fprintf(stderr, "*\n"); @@ -442,13 +449,90 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, return new; } -static struct hwloc_obj * -hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused, - struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused, - struct hwloc_obj *parent __hwloc_attribute_unused) +/* return 1 if a quirk was applied */ +static int +hwloc__pci_find_busid_parent_quirk(struct hwloc_topology *topology, + struct hwloc_pcidev_attr_s *busid, + hwloc_cpuset_t cpuset) { - /* no quirk for now */ - return parent; + if (topology->pci_locality_quirks == (uint64_t)-1 /* unknown */) { + const char *dmi_board_name, *env; + + /* first invokation, detect which quirks are needed */ + topology->pci_locality_quirks = 0; /* no quirk yet */ + + dmi_board_name = hwloc_obj_get_info_by_name(hwloc_get_root_obj(topology), "DMIBoardName"); + if (dmi_board_name && !strcmp(dmi_board_name, "HPE CRAY EX235A")) { + hwloc_debug("enabling for PCI locality quirk for HPE Cray EX235A\n"); + topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A; + } + + env = getenv("HWLOC_PCI_LOCALITY_QUIRK_FAKE"); + if (env && atoi(env)) { + hwloc_debug("enabling for PCI locality fake quirk (attaching everything to last PU)\n"); + topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_FAKE; + } + } + + if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_FAKE) { + unsigned last = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology)); + hwloc_bitmap_set(cpuset, last); + return 1; + } + + if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A) { + /* AMD Trento has xGMI ports connected to individual CCDs (8 cores + L3) + * instead of NUMA nodes (pairs of CCDs within Trento) as is usual in AMD EPYC CPUs. + * This is not described by the ACPI tables, hence we need to manually hardwire + * the xGMI locality for the (currently single) server that currently uses that CPU. + * It's not clear if ACPI tables can/will ever be fixed (would require one initiator + * proximity domain per CCD), or if Linux can/will work around the issue. + */ + if (busid->domain == 0) { + if (busid->bus >= 0xd0 && busid->bus <= 0xd1) { + hwloc_bitmap_set_range(cpuset, 0, 7); + hwloc_bitmap_set_range(cpuset, 64, 71); + return 1; + } + if (busid->bus >= 0xd4 && busid->bus <= 0xd6) { + hwloc_bitmap_set_range(cpuset, 8, 15); + hwloc_bitmap_set_range(cpuset, 72, 79); + return 1; + } + if (busid->bus >= 0xc8 && busid->bus <= 0xc9) { + hwloc_bitmap_set_range(cpuset, 16, 23); + hwloc_bitmap_set_range(cpuset, 80, 87); + return 1; + } + if (busid->bus >= 0xcc && busid->bus <= 0xce) { + hwloc_bitmap_set_range(cpuset, 24, 31); + hwloc_bitmap_set_range(cpuset, 88, 95); + return 1; + } + if (busid->bus >= 0xd8 && busid->bus <= 0xd9) { + hwloc_bitmap_set_range(cpuset, 32, 39); + hwloc_bitmap_set_range(cpuset, 96, 103); + return 1; + } + if (busid->bus >= 0xdc && busid->bus <= 0xde) { + hwloc_bitmap_set_range(cpuset, 40, 47); + hwloc_bitmap_set_range(cpuset, 104, 111); + return 1; + } + if (busid->bus >= 0xc0 && busid->bus <= 0xc1) { + hwloc_bitmap_set_range(cpuset, 48, 55); + hwloc_bitmap_set_range(cpuset, 112, 119); + return 1; + } + if (busid->bus >= 0xc4 && busid->bus <= 0xc6) { + hwloc_bitmap_set_range(cpuset, 56, 63); + hwloc_bitmap_set_range(cpuset, 120, 127); + return 1; + } + } + } + + return 0; } static struct hwloc_obj * @@ -457,7 +541,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_obj_t parent; int forced = 0; - int noquirks = 0; + int noquirks = 0, got_quirked = 0; unsigned i; int err; @@ -490,7 +574,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide if (env) { static int reported = 0; if (!topology->pci_has_forced_locality && !reported) { - if (!hwloc_hide_errors()) + if (HWLOC_SHOW_ALL_ERRORS()) fprintf(stderr, "hwloc/pci: Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env); reported = 1; } @@ -505,7 +589,13 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide } } - if (!forced) { + if (!forced && !noquirks && topology->pci_locality_quirks /* either quirks are unknown yet, or some are enabled */) { + err = hwloc__pci_find_busid_parent_quirk(topology, busid, cpuset); + if (err > 0) + got_quirked = 1; + } + + if (!forced && !got_quirked) { /* get the cpuset by asking the backend that provides the relevant hook, if any. */ struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; if (backend) @@ -520,11 +610,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset); parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); - if (parent) { - if (!noquirks) - /* We found a valid parent. Check that the OS didn't report invalid locality */ - parent = hwloc_pci_fixup_busid_parent(topology, busid, parent); - } else { + if (!parent) { /* Fallback to root */ parent = hwloc_get_root_obj(topology); } @@ -805,19 +891,28 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config, memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4); speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */ width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */ - /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane - * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane - * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane - * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane - * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane - * PCIe Gen6 = 64 GT/s signal-rate per lane with 128/130 encoding = 8 GB/s data-rate per lane + /* + * These are single-direction bandwidths only. + * + * Gen1 used NRZ with 8/10 encoding. + * PCIe Gen1 = 2.5GT/s signal-rate per lane x 8/10 = 0.25GB/s data-rate per lane + * PCIe Gen2 = 5 GT/s signal-rate per lane x 8/10 = 0.5 GB/s data-rate per lane + * Gen3 switched to NRZ with 128/130 encoding. + * PCIe Gen3 = 8 GT/s signal-rate per lane x 128/130 = 1 GB/s data-rate per lane + * PCIe Gen4 = 16 GT/s signal-rate per lane x 128/130 = 2 GB/s data-rate per lane + * PCIe Gen5 = 32 GT/s signal-rate per lane x 128/130 = 4 GB/s data-rate per lane + * Gen6 switched to PAM with with 242/256 FLIT (242B payload protected by 8B CRC + 6B FEC). + * PCIe Gen6 = 64 GT/s signal-rate per lane x 242/256 = 8 GB/s data-rate per lane + * PCIe Gen7 = 128GT/s signal-rate per lane x 242/256 = 16 GB/s data-rate per lane */ /* lanespeed in Gbit/s */ if (speed <= 2) lanespeed = 2.5f * speed * 0.8f; + else if (speed <= 5) + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; else - lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen7 will be 128 GT/s and so on */ + lanespeed = 8.0f * (1<<(speed-3)) * 242/256; /* assume Gen8 will be 256 GT/s and so on */ /* linkspeed in GB/s */ *linkspeed = lanespeed * width / 8; @@ -944,6 +1039,7 @@ hwloc_pci_class_string(unsigned short class_id) switch (class_id) { case 0x0500: return "RAM"; case 0x0501: return "Flash"; + case 0x0502: return "CXLMem"; } return "Memory"; case 0x06: diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c index 5dd4baaa..7b3e515d 100644 --- a/src/3rdparty/hwloc/src/topology-synthetic.c +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -323,17 +323,29 @@ hwloc_synthetic_parse_memory_attr(const char *attr, const char **endp) hwloc_uint64_t size; size = strtoull(attr, (char **) &endptr, 0); if (!hwloc_strncasecmp(endptr, "TB", 2)) { + size *= 1000ULL*1000ULL*1000ULL*1000ULL; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "TiB", 3)) { size <<= 40; - endptr += 2; + endptr += 3; } else if (!hwloc_strncasecmp(endptr, "GB", 2)) { + size *= 1000ULL*1000ULL*1000ULL; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "GiB", 3)) { size <<= 30; - endptr += 2; + endptr += 3; } else if (!hwloc_strncasecmp(endptr, "MB", 2)) { + size *= 1000ULL*1000ULL; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "MiB", 3)) { size <<= 20; - endptr += 2; + endptr += 3; } else if (!hwloc_strncasecmp(endptr, "kB", 2)) { - size <<= 10; + size *= 1000ULL; endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "kiB", 3)) { + size <<= 10; + endptr += 3; } *endp = endptr; return size; @@ -802,15 +814,15 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, } else if (hwloc__obj_type_is_cache(type)) { if (!curlevel->attr.memorysize) { if (1 == curlevel->attr.depth) - /* 32Kb in L1 */ + /* 32KiB in L1 */ curlevel->attr.memorysize = 32*1024; else - /* *4 at each level, starting from 1MB for L2, unified */ + /* *4 at each level, starting from 1MiB for L2, unified */ curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth); } } else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) { - /* 1GB in memory nodes. */ + /* 1GiB in memory nodes. */ curlevel->attr.memorysize = 1024*1024*1024; } diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index df93c5e9..20b617a9 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -366,7 +366,7 @@ hwloc_win_get_processor_groups(void) hwloc_debug("found %lu windows processor groups\n", nr_processor_groups); if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) { - if (!hwloc_hide_errors()) + if (HWLOC_SHOW_ALL_ERRORS()) fprintf(stderr, "hwloc: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n"); } diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index b9bc7fb0..a1558f07 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2022 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -1349,7 +1349,7 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long if (data->apicid_unique) { summarize(backend, infos, flags); - if (has_hybrid(features)) { + if (has_hybrid(features) && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) { /* use hybrid info for cpukinds */ hwloc_bitmap_t atomset = hwloc_bitmap_alloc(); hwloc_bitmap_t coreset = hwloc_bitmap_alloc(); diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index 2075d6fa..b1f20dbf 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -123,6 +123,17 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix); if (obj->gp_index >= topology->next_gp_index) topology->next_gp_index = obj->gp_index + 1; + } else if (!strcmp(name, "id")) { /* forward compat */ + if (!strncmp(value, "obj", 3)) { + obj->gp_index = strtoull(value+3, NULL, 10); + if (!obj->gp_index && hwloc__xml_verbose()) + fprintf(stderr, "%s: unexpected zero id, topology may be invalid\n", state->global->msgprefix); + if (obj->gp_index >= topology->next_gp_index) + topology->next_gp_index = obj->gp_index + 1; + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: unexpected id `%s' not-starting with `obj', ignoring\n", state->global->msgprefix, value); + } } else if (!strcmp(name, "cpuset")) { if (!obj->cpuset) obj->cpuset = hwloc_bitmap_alloc(); @@ -263,7 +274,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN } else if (domain > 0xffff) { static int warned = 0; - if (!warned && hwloc_hide_errors() < 2) + if (!warned && HWLOC_SHOW_ALL_ERRORS()) fprintf(stderr, "hwloc/xml: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); warned = 1; *ignore = 1; @@ -363,7 +374,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN } else if (domain > 0xffff) { static int warned = 0; - if (!warned && hwloc_hide_errors() < 2) + if (!warned && HWLOC_SHOW_ALL_ERRORS()) fprintf(stderr, "hwloc/xml: Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); warned = 1; *ignore = 1; @@ -1235,7 +1246,7 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* next should be before cur */ if (!childrengotignored) { static int reported = 0; - if (!reported && hwloc_hide_errors() < 2) { + if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) { hwloc__xml_import_report_outoforder(topology, next, cur); reported = 1; } @@ -1568,6 +1579,9 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, } } + if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES) + goto out_ignore; + hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0 /* assume grouping was applied when this matrix was discovered before exporting to XML */); /* prevent freeing below */ @@ -1722,7 +1736,8 @@ hwloc__xml_import_memattr(hwloc_topology_t topology, } } - if (name && flags != (unsigned long) -1) { + if (name && flags != (unsigned long) -1 + && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) { hwloc_memattr_id_t _id; ret = hwloc_memattr_get_by_name(topology, name, &_id); @@ -1833,7 +1848,13 @@ hwloc__xml_import_cpukind(hwloc_topology_t topology, goto error; } - hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS) { + hwloc__free_infos(infos, nr_infos); + hwloc_bitmap_free(cpuset); + } else { + hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + hwloc__free_infos(infos, nr_infos); + } return state->global->close_tag(state); @@ -2168,7 +2189,8 @@ done: * but it would require to have those objects in the original XML order (like the first_numanode cousin-list). * because the topology order can be different if some parents are ignored during load. */ - if (nbobjs == data->nbnumanodes) { + if (nbobjs == data->nbnumanodes + && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) { hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t)); uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values)); assert(data->nbnumanodes > 0); /* v1dist->nbobjs is >0 after import */ @@ -2650,7 +2672,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); if (!logical_to_v2array) { - if (!hwloc_hide_errors()) + if (HWLOC_SHOW_ALL_ERRORS()) fprintf(stderr, "hwloc/xml/export/v1: failed to allocated logical_to_v2array\n"); continue; } @@ -3124,9 +3146,11 @@ hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t top continue; imattr = &topology->memattrs[id]; - if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH) - && !imattr->nr_targets) - /* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */ + if (id < HWLOC_MEMATTR_ID_MAX && !imattr->nr_targets) + /* no need to export standard attributes without any target, + * their definition is now standardized, + * the old hwloc importing this XML may recreate these attributes just like it would for a non-imported topology. + */ continue; state->new_child(state, &mstate, "memattr"); diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 54b772ff..47b4658c 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2022 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2022 IBM Corporation. All rights reserved. @@ -114,14 +114,25 @@ int hwloc_topology_abi_check(hwloc_topology_t topology) return topology->topology_abi != HWLOC_TOPOLOGY_ABI ? -1 : 0; } +/* callers should rather use wrappers HWLOC_SHOW_ALL_ERRORS() and HWLOC_SHOW_CRITICAL_ERRORS() for clarity */ int hwloc_hide_errors(void) { static int hide = 1; /* only show critical errors by default. lstopo will show others */ static int checked = 0; if (!checked) { const char *envvar = getenv("HWLOC_HIDE_ERRORS"); - if (envvar) + if (envvar) { hide = atoi(envvar); +#ifdef HWLOC_DEBUG + } else { + /* if debug is enabled and HWLOC_DEBUG_VERBOSE isn't forced to 0, + * show all errors jus like we show all debug messages. + */ + envvar = getenv("HWLOC_DEBUG_VERBOSE"); + if (!envvar || atoi(envvar)) + hide = 0; +#endif + } checked = 1; } return hide; @@ -158,7 +169,7 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms { static int reported = 0; - if (reason && !reported && hwloc_hide_errors() < 2) { + if (reason && !reported && HWLOC_SHOW_CRITICAL_ERRORS()) { char newstr[512]; char oldstr[512]; report_insert_error_format_obj(newstr, sizeof(newstr), new); @@ -3178,7 +3189,7 @@ hwloc_connect_levels(hwloc_topology_t topology) tmpnbobjs = realloc(topology->level_nbobjects, 2 * topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); if (!tmplevels || !tmpnbobjs) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2); /* if one realloc succeeded, make sure the caller will free the new buffer */ @@ -3536,6 +3547,8 @@ hwloc_discover(struct hwloc_topology *topology, /* * Additional discovery */ + hwloc_pci_discovery_prepare(topology); + if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) { dstatus->phase = HWLOC_DISC_PHASE_PCI; hwloc_discover_by_phase(topology, dstatus, "PCI"); @@ -3553,6 +3566,8 @@ hwloc_discover(struct hwloc_topology *topology, hwloc_discover_by_phase(topology, dstatus, "ANNOTATE"); } + hwloc_pci_discovery_exit(topology); /* pci needed up to annotate */ + if (getenv("HWLOC_DEBUG_SORT_CHILDREN")) hwloc_debug_sort_children(topology->levels[0][0]); @@ -3565,17 +3580,17 @@ hwloc_discover(struct hwloc_topology *topology, hwloc_debug("%s", "\nRemoving empty objects\n"); remove_empty(topology, &topology->levels[0][0]); if (!topology->levels[0][0]) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Topology became empty, aborting!\n"); return -1; } if (hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Topology does not contain any PU, aborting!\n"); return -1; } if (hwloc_bitmap_iszero(topology->levels[0][0]->nodeset)) { - if (hwloc_hide_errors() < 2) + if (HWLOC_SHOW_CRITICAL_ERRORS()) fprintf(stderr, "hwloc: Topology does not contain any NUMA node, aborting!\n"); return -1; } @@ -3811,7 +3826,16 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING|HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED + |HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM + |HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES + |HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT + |HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING + |HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING + |HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING + |HWLOC_TOPOLOGY_FLAG_NO_DISTANCES + |HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS + |HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) { errno = EINVAL; return -1; } @@ -4076,15 +4100,11 @@ hwloc_topology_load (struct hwloc_topology *topology) */ hwloc_set_binding_hooks(topology); - hwloc_pci_discovery_prepare(topology); - /* actual topology discovery */ err = hwloc_discover(topology, &dstatus); if (err < 0) goto out; - hwloc_pci_discovery_exit(topology); - #ifndef HWLOC_DEBUG if (getenv("HWLOC_DEBUG_CHECK")) #endif @@ -4106,6 +4126,7 @@ hwloc_topology_load (struct hwloc_topology *topology) /* Same for memattrs */ hwloc_internal_memattrs_need_refresh(topology); hwloc_internal_memattrs_refresh(topology); + hwloc_internal_memattrs_guess_memory_tiers(topology); topology->is_loaded = 1;