diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 9c8c86ccb..261626f42 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -2362,22 +2362,9 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object(hwloc_topology_t to /** \brief Allocate a Group object to insert later with hwloc_topology_insert_group_object(). * * This function returns a new Group object. - * The caller should (at least) initialize its sets before inserting the object. - * See hwloc_topology_insert_group_object(). * - * The \p subtype object attribute may be set to display something else - * than "Group" as the type name for this object in lstopo. - * Custom name/value info pairs may be added with hwloc_obj_add_info() after - * insertion. - * - * The \p kind group attribute should be 0. The \p subkind group attribute may - * be set to identify multiple Groups of the same level. - * - * It is recommended not to set any other object attribute before insertion, - * since the Group may get discarded during insertion. - * - * The object will be destroyed if passed to hwloc_topology_insert_group_object() - * without any set defined. + * The caller should (at least) initialize its sets before inserting + * the object in the topology. See hwloc_topology_insert_group_object(). */ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t topology); @@ -2388,34 +2375,44 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t to * the final location of the Group in the topology. * Then the object can be passed to this function for actual insertion in the topology. * - * The group \p dont_merge attribute may be set to prevent the core from - * ever merging this object with another object hierarchically-identical. - * * Either the cpuset or nodeset field (or both, if compatible) must be set * to a non-empty bitmap. The complete_cpuset or complete_nodeset may be set * instead if inserting with respect to the complete topology * (including disallowed, offline or unknown objects). - * - * It grouping several objects, hwloc_obj_add_other_obj_sets() is an easy way + * If grouping several objects, hwloc_obj_add_other_obj_sets() is an easy way * to build the Group sets iteratively. - * * These sets cannot be larger than the current topology, or they would get * restricted silently. - * * The core will setup the other sets after actual insertion. * + * The \p subtype object attribute may be defined (to a dynamically + * allocated string) to display something else than "Group" as the + * type name for this object in lstopo. + * Custom name/value info pairs may be added with hwloc_obj_add_info() after + * insertion. + * + * The group \p dont_merge attribute may be set to \c 1 to prevent + * the hwloc core from ever merging this object with another + * hierarchically-identical object. + * This is useful when the Group itself describes an important feature + * that cannot be exposed anywhere else in the hierarchy. + * + * The group \p kind attribute may be set to a high value such + * as \c 0xffffffff to tell hwloc that this new Group should always + * be discarded in favor of any existing Group with the same locality. + * * \return The inserted object if it was properly inserted. * - * \return An existing object if the Group was discarded because the topology already - * contained an object at the same location (the Group did not add any locality information). - * Any name/info key pair set before inserting is appended to the existing object. + * \return An existing object if the Group was merged or discarded + * because the topology already contained an object at the same + * location (the Group did not add any hierarchy information). * * \return \c NULL if the insertion failed because of conflicting sets in topology tree. * * \return \c NULL if Group objects are filtered-out of the topology (::HWLOC_TYPE_FILTER_KEEP_NONE). * - * \return \c NULL if the object was discarded because no set was initialized in the Group - * before insert, or all of them were empty. + * \return \c NULL if the object was discarded because no set was + * initialized in the Group before insert, or all of them were empty. */ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t topology, hwloc_obj_t group); diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 8b69185fa..e490466be 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.4.0" +#define HWLOC_VERSION "2.4.1" #define HWLOC_VERSION_MAJOR 2 #define HWLOC_VERSION_MINOR 4 -#define HWLOC_VERSION_RELEASE 0 +#define HWLOC_VERSION_RELEASE 1 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index d498d6060..9a2fdacb4 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2020 Inria. All rights reserved. + * Copyright © 2012-2021 Inria. All rights reserved. * Copyright © 2013, 2018 Université Bordeaux. All right reserved. * See COPYING in top-level directory. */ @@ -82,9 +82,10 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, if (CL_SUCCESS == clret && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) { *domain = 0; /* can't do anything better */ - *bus = (unsigned) amdtopo.pcie.bus; - *dev = (unsigned) amdtopo.pcie.device; - *func = (unsigned) amdtopo.pcie.function; + /* cl_device_topology_amd stores bus ID in cl_char, dont convert those signed char directly to unsigned int */ + *bus = (unsigned) (unsigned char) amdtopo.pcie.bus; + *dev = (unsigned) (unsigned char) amdtopo.pcie.device; + *func = (unsigned) (unsigned char) amdtopo.pcie.function; return 0; } diff --git a/src/3rdparty/hwloc/include/hwloc/rsmi.h b/src/3rdparty/hwloc/include/hwloc/rsmi.h new file mode 100644 index 000000000..a6d55b3c9 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/rsmi.h @@ -0,0 +1,201 @@ +/* + * Copyright © 2012-2020 Inria. All rights reserved. + * Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Written by Advanced Micro Devices, + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the ROCm SMI Management Library. + * + * Applications that use both hwloc and the ROCm SMI Management Library may want to + * include this file so as to get topology information for AMD GPU devices. + */ + +#ifndef HWLOC_RSMI_H +#define HWLOC_RSMI_H + +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" +#ifdef HWLOC_LINUX_SYS +#include "hwloc/linux.h" +#endif + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_rsmi Interoperability with the ROCm SMI Management Library + * + * This interface offers ways to retrieve topology information about + * devices managed by the ROCm SMI Management Library. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to AMD GPU device whose index is \p dv_ind. + * + * Return the CPU set describing the locality of the AMD GPU device + * whose index is \p dv_ind. + * + * Topology \p topology and device \p dv_ind must match the local machine. + * I/O devices detection and the ROCm SMI component are not needed in the + * topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_rsmi_get_device_osdev() + * and hwloc_rsmi_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + uint32_t dv_ind, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX]; + rsmi_status_t ret; + uint64_t bdfid = 0; + unsigned domain, device, bus; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + ret = rsmi_dev_pci_id_get(dv_ind, &bdfid); + if (RSMI_STATUS_SUCCESS != ret) { + errno = EINVAL; + return -1; + } + domain = (bdfid>>32) & 0xffffffff; + bus = ((bdfid & 0xffff)>>8) & 0xff; + device = ((bdfid & 0xff)>>3) & 0x1f; + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domain, bus, device); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * AMD GPU device whose index is \p dv_ind. + * + * Return the OS device object describing the AMD GPU device whose + * index is \p dv_ind. Returns NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the ROCm SMI component must be enabled in the + * topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_rsmi_get_device_osdev_by_index(hwloc_topology_t topology, uint32_t dv_ind) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && !strncmp("rsmi", osdev->name, 4) + && atoi(osdev->name + 4) == (int) dv_ind) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to AMD GPU device, + * whose index is \p dv_ind. + * + * Return the hwloc OS device object that describes the given + * AMD GPU, whose index is \p dv_ind Return NULL if there is none. + * + * Topology \p topology and device \p dv_ind must match the local machine. + * I/O devices detection and the ROCm SMI component must be enabled in the + * topology. If not, the locality of the object may still be found using + * hwloc_rsmi_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_rsmi_get_device_osdev(hwloc_topology_t topology, uint32_t dv_ind) +{ + hwloc_obj_t osdev; + rsmi_status_t ret; + uint64_t bdfid = 0; + unsigned domain, device, bus, func; + uint64_t id; + char uuid[64]; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + + ret = rsmi_dev_pci_id_get(dv_ind, &bdfid); + if (RSMI_STATUS_SUCCESS != ret) { + errno = EINVAL; + return NULL; + } + domain = (bdfid>>32) & 0xffffffff; + bus = ((bdfid & 0xffff)>>8) & 0xff; + device = ((bdfid & 0xff)>>3) & 0x1f; + func = bdfid & 0x7; + + ret = rsmi_dev_unique_id_get(dv_ind, &id); + if (RSMI_STATUS_SUCCESS != ret) + uuid[0] = '\0'; + else + sprintf(uuid, "%lx", id); + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + const char *info; + + if (strncmp(osdev->name, "rsmi", 4)) + continue; + + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == domain + && pcidev->attr->pcidev.bus == bus + && pcidev->attr->pcidev.dev == device + && pcidev->attr->pcidev.func == func) + return osdev; + + info = hwloc_obj_get_info_by_name(osdev, "AMDUUID"); + if (info && !strcmp(info, uuid)) + return osdev; + } + + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_RSMI_H */ diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c index 5f2dd1aa8..ef6297d7e 100644 --- a/src/3rdparty/hwloc/src/cpukinds.c +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -1,5 +1,5 @@ /* - * Copyright © 2020 Inria. All rights reserved. + * Copyright © 2020-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -270,7 +270,7 @@ hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology) unsigned i,j; for(i=0; inr_cpukinds; i++) for(j=i+1; jnr_cpukinds; j++) - if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency) + if (topology->cpukinds[i].ranking_value == topology->cpukinds[j].ranking_value) /* if any duplicate, fail */ return -1; return 0; diff --git a/src/3rdparty/hwloc/src/static-components.h b/src/3rdparty/hwloc/src/static-components.h index f2cb254a6..dac227a60 100644 --- a/src/3rdparty/hwloc/src/static-components.h +++ b/src/3rdparty/hwloc/src/static-components.h @@ -1,4 +1,9 @@ -#include +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; static const struct hwloc_component * hwloc_static_components[] = { &hwloc_noos_component, &hwloc_xml_component, diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 267384ee5..71e396e29 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -908,6 +908,16 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns int gotnuma = 0; int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL); +#ifdef HWLOC_DEBUG + hwloc_debug("\nSummary of x86 CPUID topology:\n"); + for(i=0; ilevels[0][0]->cpuset) { - /* somebody else discovered things */ + /* somebody else discovered things, reconnect levels so that we can look at them */ + hwloc_topology_reconnect(topology, 0); if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) { /* only PUs were discovered, as much as we would, complete the topology with everything else */ alreadypus = 1; @@ -1595,7 +1606,6 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta } /* several object types were added, we can't easily complete, just do partial discovery */ - hwloc_topology_reconnect(topology, 0); ret = hwloc_look_x86(backend, flags); if (ret) hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index fe04dd943..6aacc052a 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1462,6 +1462,9 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, unsigned long long u; if (heterotypes) { hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE; + if (!*tmp) + /* reached the end of this indexes attribute */ + break; if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) { if (hwloc__xml_verbose()) fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n", diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 94387ece6..3944f3c19 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -567,8 +567,9 @@ hwloc_free_unlinked_object(hwloc_obj_t obj) } /* Replace old with contents of new object, and make new freeable by the caller. - * Only updates next_sibling/first_child pointers, - * so may only be used during early discovery. + * Requires reconnect (for siblings pointers and group depth), + * fixup of sets (only the main cpuset was likely compared before merging), + * and update of total_memory and group depth. */ static void hwloc_replace_linked_object(hwloc_obj_t old, hwloc_obj_t new) @@ -1348,7 +1349,7 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) /* returns the result of merge, or NULL if not merged */ static __hwloc_inline hwloc_obj_t -hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) +hwloc__insert_try_merge_group(hwloc_topology_t topology, hwloc_obj_t old, hwloc_obj_t new) { if (new->type == HWLOC_OBJ_GROUP && old->type == HWLOC_OBJ_GROUP) { /* which group do we keep? */ @@ -1359,6 +1360,7 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) /* keep the new one, it doesn't want to be merged */ hwloc_replace_linked_object(old, new); + topology->modified = 1; return new; } else { @@ -1366,9 +1368,12 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) /* keep the old one, it doesn't want to be merged */ return old; - /* compare subkinds to decice who to keep */ - if (new->attr->group.kind < old->attr->group.kind) + /* compare subkinds to decide which group to keep */ + if (new->attr->group.kind < old->attr->group.kind) { + /* keep smaller kind */ hwloc_replace_linked_object(old, new); + topology->modified = 1; + } return old; } } @@ -1394,6 +1399,7 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) * and let the caller free the new object */ hwloc_replace_linked_object(old, new); + topology->modified = 1; return old; } else { @@ -1435,7 +1441,7 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur int setres = res; if (res == HWLOC_OBJ_EQUAL) { - hwloc_obj_t merged = hwloc__insert_try_merge_group(child, obj); + hwloc_obj_t merged = hwloc__insert_try_merge_group(topology, child, obj); if (merged) return merged; /* otherwise compare actual types to decide of the inclusion */ @@ -1931,12 +1937,24 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t if (!res) return NULL; - if (res != obj) - /* merged */ + + if (res != obj && res->type != HWLOC_OBJ_GROUP) + /* merged, not into a Group, nothing to update */ return res; + /* res == obj means that the object was inserted. + * We need to reconnect levels, fill all its cpu/node sets, + * compute its total memory, group depth, etc. + * + * res != obj usually means that our new group was merged into an + * existing object, no need to recompute anything. + * However, if merging with an existing group, depending on their kinds, + * the contents of obj may overwrite the contents of the old group. + * This requires reconnecting levels, filling sets, recomputing total memory, etc. + */ + /* properly inserted */ - hwloc_obj_add_children_sets(obj); + hwloc_obj_add_children_sets(res); if (hwloc_topology_reconnect(topology, 0) < 0) return NULL; @@ -1948,7 +1966,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t #endif hwloc_topology_check(topology); - return obj; + return res; } hwloc_obj_t @@ -4658,6 +4676,9 @@ hwloc__check_misc_children(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, static void hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t obj) { + hwloc_uint64_t total_memory; + hwloc_obj_t child; + assert(!hwloc_bitmap_isset(gp_indexes, obj->gp_index)); hwloc_bitmap_set(gp_indexes, obj->gp_index); @@ -4715,6 +4736,18 @@ hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_ assert(hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type) == obj->type); } + /* check total memory */ + total_memory = 0; + if (obj->type == HWLOC_OBJ_NUMANODE) + total_memory += obj->attr->numanode.local_memory; + for_each_child(child, obj) { + total_memory += child->total_memory; + } + for_each_memory_child(child, obj) { + total_memory += child->total_memory; + } + assert(total_memory == obj->total_memory); + /* check children */ hwloc__check_normal_children(topology, gp_indexes, obj); hwloc__check_memory_children(topology, gp_indexes, obj);