From 2b29b81b898e7d547f8b80d0bd38aa8632d09dfd Mon Sep 17 00:00:00 2001 From: XMRig <support@xmrig.com> Date: Sun, 28 Jul 2019 09:24:53 +0700 Subject: [PATCH] Use internal hwloc for MSVC. --- CMakeLists.txt | 2 +- src/3rdparty/hwloc/AUTHORS | 44 + src/3rdparty/hwloc/CMakeLists.txt | 38 + src/3rdparty/hwloc/COPYING | 39 + src/3rdparty/hwloc/NEWS | 1599 ++++++ src/3rdparty/hwloc/README | 85 + src/3rdparty/hwloc/VERSION | 47 + src/3rdparty/hwloc/include/hwloc.h | 2270 +++++++++ .../hwloc/include/hwloc/autogen/config.h | 59 + src/3rdparty/hwloc/include/hwloc/bitmap.h | 467 ++ src/3rdparty/hwloc/include/hwloc/cuda.h | 220 + src/3rdparty/hwloc/include/hwloc/cudart.h | 177 + src/3rdparty/hwloc/include/hwloc/deprecated.h | 206 + src/3rdparty/hwloc/include/hwloc/diff.h | 289 ++ src/3rdparty/hwloc/include/hwloc/distances.h | 271 + src/3rdparty/hwloc/include/hwloc/export.h | 278 + src/3rdparty/hwloc/include/hwloc/gl.h | 135 + .../hwloc/include/hwloc/glibc-sched.h | 125 + src/3rdparty/hwloc/include/hwloc/helper.h | 1160 +++++ src/3rdparty/hwloc/include/hwloc/inlines.h | 146 + src/3rdparty/hwloc/include/hwloc/intel-mic.h | 134 + .../hwloc/include/hwloc/linux-libnuma.h | 273 + src/3rdparty/hwloc/include/hwloc/linux.h | 79 + src/3rdparty/hwloc/include/hwloc/nvml.h | 181 + src/3rdparty/hwloc/include/hwloc/opencl.h | 206 + .../hwloc/include/hwloc/openfabrics-verbs.h | 150 + src/3rdparty/hwloc/include/hwloc/plugins.h | 542 ++ src/3rdparty/hwloc/include/hwloc/rename.h | 765 +++ src/3rdparty/hwloc/include/hwloc/shmem.h | 137 + .../hwloc/include/private/autogen/config.h | 672 +++ .../hwloc/include/private/components.h | 43 + .../hwloc/include/private/cpuid-x86.h | 86 + src/3rdparty/hwloc/include/private/debug.h | 83 + .../include/private/internal-components.h | 41 + src/3rdparty/hwloc/include/private/misc.h | 583 +++ src/3rdparty/hwloc/include/private/netloc.h | 578 +++ src/3rdparty/hwloc/include/private/private.h | 417 ++ .../hwloc/include/private/solaris-chiptype.h | 43 + src/3rdparty/hwloc/include/private/xml.h | 108 + src/3rdparty/hwloc/src/base64.c | 309 ++ src/3rdparty/hwloc/src/bind.c | 922 ++++ src/3rdparty/hwloc/src/bitmap.c | 1676 ++++++ src/3rdparty/hwloc/src/components.c | 785 +++ src/3rdparty/hwloc/src/diff.c | 492 ++ src/3rdparty/hwloc/src/distances.c | 920 ++++ src/3rdparty/hwloc/src/misc.c | 166 + src/3rdparty/hwloc/src/pci-common.c | 941 ++++ src/3rdparty/hwloc/src/shmem.c | 287 ++ src/3rdparty/hwloc/src/static-components.h | 15 + src/3rdparty/hwloc/src/topology-noos.c | 65 + src/3rdparty/hwloc/src/topology-synthetic.c | 1521 ++++++ src/3rdparty/hwloc/src/topology-windows.c | 1189 +++++ src/3rdparty/hwloc/src/topology-x86.c | 1583 ++++++ .../hwloc/src/topology-xml-nolibxml.c | 919 ++++ src/3rdparty/hwloc/src/topology-xml.c | 2886 +++++++++++ src/3rdparty/hwloc/src/topology.c | 4484 +++++++++++++++++ src/3rdparty/hwloc/src/traversal.c | 616 +++ src/backend/cpu/cpu.cmake | 13 +- 58 files changed, 32562 insertions(+), 5 deletions(-) create mode 100644 src/3rdparty/hwloc/AUTHORS create mode 100644 src/3rdparty/hwloc/CMakeLists.txt create mode 100644 src/3rdparty/hwloc/COPYING create mode 100644 src/3rdparty/hwloc/NEWS create mode 100644 src/3rdparty/hwloc/README create mode 100644 src/3rdparty/hwloc/VERSION create mode 100644 src/3rdparty/hwloc/include/hwloc.h create mode 100644 src/3rdparty/hwloc/include/hwloc/autogen/config.h create mode 100644 src/3rdparty/hwloc/include/hwloc/bitmap.h create mode 100644 src/3rdparty/hwloc/include/hwloc/cuda.h create mode 100644 src/3rdparty/hwloc/include/hwloc/cudart.h create mode 100644 src/3rdparty/hwloc/include/hwloc/deprecated.h create mode 100644 src/3rdparty/hwloc/include/hwloc/diff.h create mode 100644 src/3rdparty/hwloc/include/hwloc/distances.h create mode 100644 src/3rdparty/hwloc/include/hwloc/export.h create mode 100644 src/3rdparty/hwloc/include/hwloc/gl.h create mode 100644 src/3rdparty/hwloc/include/hwloc/glibc-sched.h create mode 100644 src/3rdparty/hwloc/include/hwloc/helper.h create mode 100644 src/3rdparty/hwloc/include/hwloc/inlines.h create mode 100644 src/3rdparty/hwloc/include/hwloc/intel-mic.h create mode 100644 src/3rdparty/hwloc/include/hwloc/linux-libnuma.h create mode 100644 src/3rdparty/hwloc/include/hwloc/linux.h create mode 100644 src/3rdparty/hwloc/include/hwloc/nvml.h create mode 100644 src/3rdparty/hwloc/include/hwloc/opencl.h create mode 100644 src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h create mode 100644 src/3rdparty/hwloc/include/hwloc/plugins.h create mode 100644 src/3rdparty/hwloc/include/hwloc/rename.h create mode 100644 src/3rdparty/hwloc/include/hwloc/shmem.h create mode 100644 src/3rdparty/hwloc/include/private/autogen/config.h create mode 100644 src/3rdparty/hwloc/include/private/components.h create mode 100644 src/3rdparty/hwloc/include/private/cpuid-x86.h create mode 100644 src/3rdparty/hwloc/include/private/debug.h create mode 100644 src/3rdparty/hwloc/include/private/internal-components.h create mode 100644 src/3rdparty/hwloc/include/private/misc.h create mode 100644 src/3rdparty/hwloc/include/private/netloc.h create mode 100644 src/3rdparty/hwloc/include/private/private.h create mode 100644 src/3rdparty/hwloc/include/private/solaris-chiptype.h create mode 100644 src/3rdparty/hwloc/include/private/xml.h create mode 100644 src/3rdparty/hwloc/src/base64.c create mode 100644 src/3rdparty/hwloc/src/bind.c create mode 100644 src/3rdparty/hwloc/src/bitmap.c create mode 100644 src/3rdparty/hwloc/src/components.c create mode 100644 src/3rdparty/hwloc/src/diff.c create mode 100644 src/3rdparty/hwloc/src/distances.c create mode 100644 src/3rdparty/hwloc/src/misc.c create mode 100644 src/3rdparty/hwloc/src/pci-common.c create mode 100644 src/3rdparty/hwloc/src/shmem.c create mode 100644 src/3rdparty/hwloc/src/static-components.h create mode 100644 src/3rdparty/hwloc/src/topology-noos.c create mode 100644 src/3rdparty/hwloc/src/topology-synthetic.c create mode 100644 src/3rdparty/hwloc/src/topology-windows.c create mode 100644 src/3rdparty/hwloc/src/topology-x86.c create mode 100644 src/3rdparty/hwloc/src/topology-xml-nolibxml.c create mode 100644 src/3rdparty/hwloc/src/topology-xml.c create mode 100644 src/3rdparty/hwloc/src/topology.c create mode 100644 src/3rdparty/hwloc/src/traversal.c diff --git a/CMakeLists.txt b/CMakeLists.txt index f9dd6fd54..a1779f53f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -261,4 +261,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTP_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${HWLOC_LIBRARY}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB}) diff --git a/src/3rdparty/hwloc/AUTHORS b/src/3rdparty/hwloc/AUTHORS new file mode 100644 index 000000000..7187a723d --- /dev/null +++ b/src/3rdparty/hwloc/AUTHORS @@ -0,0 +1,44 @@ +hwloc Authors +============= + +The following cumulative list contains the names of most individuals +who have committed code to the hwloc repository +(either directly or through a third party). + +Name Affiliation(s) +--------------------------- -------------------- +Grzegorz Andrejczuk Intel +Cédric Augonnet University of Bordeaux +Guillaume Beauchamp Inria +Ahmad Boissetri Binzagr Inria +Cyril Bordage Inria +Nicholas Buroker UWL +Christopher M. Cantalupo Intel +Jérôme Clet-Ortega University of Bordeaux +Ludovic Courtès Inria +Clément Foyer Inria +Nathalie Furmento CNRS +Bryon Gloden +Brice Goglin Inria +Gilles Gouaillardet RIST +Joshua Hursey UWL +Alexey Kardashevskiy IBM +Rob Latham ANL +Douglas MacFarland UWL +Marc Marí BSC +Jonathan L Peyton Intel +Piotr Luc Intel +Antoine Rougier intern from University of Bordeaux +Jeff Squyres Cisco +Samuel Thibault University of Bordeaux +Jean-Yves VET DDN +Benjamin Worpitz +Jeff Zhao Zhaoxin + +Affiliaion abbreviations: +------------------------- +ANL = Argonne National Lab +BSC = Barcelona Supercomputing Center +Cisco = Cisco Systems, Inc. +CNRS = Centre national de la recherche scientifique (France) +UWL = University of Wisconsin-La Crosse diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt new file mode 100644 index 000000000..431c11eb3 --- /dev/null +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required (VERSION 2.8) +project (hwloc C) + +include_directories(include) +include_directories(src) + +add_definitions(/D_CRT_SECURE_NO_WARNINGS) +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT") + +set(HEADERS + include/hwloc.h + src/static-components.h + ) + +set(SOURCES + src/base64.c + src/bind.c + src/bitmap.c + src/components.c + src/diff.c + src/distances.c + src/misc.c + src/pci-common.c + src/shmem.c + src/topology.c + src/topology-noos.c + src/topology-synthetic.c + src/topology-windows.c + src/topology-x86.c + src/topology-xml.c + src/topology-xml-nolibxml.c + src/traversal.c + ) + +add_library(hwloc STATIC + ${HEADERS} + ${SOURCES} + ) diff --git a/src/3rdparty/hwloc/COPYING b/src/3rdparty/hwloc/COPYING new file mode 100644 index 000000000..e77516e18 --- /dev/null +++ b/src/3rdparty/hwloc/COPYING @@ -0,0 +1,39 @@ +Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. +Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. +Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. +Copyright © 2004-2005 The Regents of the University of California. All rights reserved. +Copyright © 2009 CNRS +Copyright © 2009-2016 Inria. All rights reserved. +Copyright © 2009-2015 Université Bordeaux +Copyright © 2009-2015 Cisco Systems, Inc. All rights reserved. +Copyright © 2009-2012 Oracle and/or its affiliates. All rights reserved. +Copyright © 2010 IBM +Copyright © 2010 Jirka Hladky +Copyright © 2012 Aleksej Saushev, The NetBSD Foundation +Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. +Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved. +Copyright © 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. +Copyright © 2015-2016 Intel, Inc. All rights reserved. +See COPYING in top-level directory. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS new file mode 100644 index 000000000..664c8d55c --- /dev/null +++ b/src/3rdparty/hwloc/NEWS @@ -0,0 +1,1599 @@ +Copyright © 2009 CNRS +Copyright © 2009-2019 Inria. All rights reserved. +Copyright © 2009-2013 Université Bordeaux +Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of hwloc since version +0.9 (as initially released as "libtopology", then re-branded to "hwloc" +in v0.9.1). + + +Version 2.0.4 (also included in 1.11.13 when appropriate) +------------- +* Add support for Linux 5.3 new sysfs cpu topology files with Die information. +* Add support for Intel v2 Extended Topology Enumeration in the x86 backend. +* Tiles, Modules and Dies are exposed as Groups for now. + + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent + Die groups from being automatically merged with identical parent or children. +* Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. +* Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + +Version 2.0.3 (also included in 1.11.12 when appropriate) +------------- +* Fix build on Cygwin, thanks to Marco Atzeri for the patches. +* Fix a corner case of hwloc_topology_restrict() where children would + become out-of-order. +* Fix the return length of export_xmlbuffer() functions to always + include the ending \0. +* Fix lstopo --children-order argument parsing. + + +Version 2.0.2 (also included in 1.11.11 when appropriate) +------------- +* Add support for Hygon Dhyana processors in the x86 backend, + thanks to Pu Wen for the patch. +* Fix symbol renaming to also rename internal components, + thanks to Evan Ramos for the patch. +* Fix build on HP-UX, thanks to Richard Lloyd for reporting the issues. +* Detect PCI link speed without being root on Linux >= 4.13. +* Add HWLOC_VERSION* macros to the public headers, + thanks to Gilles Gouaillardet for the suggestion. + + +Version 2.0.1 (also included in 1.11.10 when relevant) +------------- +* Bump the library soname to 15:0:0 to avoid conflicts with hwloc 1.11.x + releases. The hwloc 2.0.0 soname was buggy (12:0:0), applications will + have to be recompiled. +* Serialize pciaccess discovery to fix concurrent topology loads in + multiple threads. +* Fix hwloc-dump-hwdata to only process SMBIOS information that correspond + to the KNL and KNM configuration. +* Add a heuristic for guessing KNL/KNM memory and cluster modes when + hwloc-dump-hwdata could not run as root earlier. +* Add --no-text lstopo option to remove text from some boxes in the + graphical output. Mostly useful for removing Group labels. +* Some minor fixes to memory binding. + + +Version 2.0.0 +------------- +*** The ABI of the library has changed. *** + For instance some hwloc_obj fields were reordered, added or removed, see below. + + HWLOC_API_VERSION and hwloc_get_api_version() now give 0x00020000. + + See "How do I handle ABI breaks and API upgrades ?" in the FAQ + and "Upgrading to hwloc 2.0 API" in the documentation. +* Major API changes + + Memory, I/O and Misc objects are now stored in dedicated children lists, + not in the usual children list that is now only used for CPU-side objects. + - hwloc_get_next_child() may still be used to iterate over these 4 lists + of children at once. + - hwloc_obj_type_is_normal(), _memory() and _io() may be used to check + the kind of a given object type. + + Topologies always have at least one NUMA object. On non-NUMA machines, + a single NUMA object is added to describe the entire machine memory. + The NUMA level cannot be ignored anymore. + + The NUMA level is special since NUMA nodes are not in the main hierarchy + of objects anymore. Its depth is a fake negative depth that should not be + compared with normal levels. + - If all memory objects are attached to parents at the same depth, + it may be retrieved with hwloc_get_memory_parents_depth(). + + The HWLOC_OBJ_CACHE type is replaced with 8 types HWLOC_OBJ_L[1-5]CACHE + and HWLOC_OBJ_L[1-3]ICACHE that remove the need to disambiguate levels + when looking for caches with _by_type() functions. + - New hwloc_obj_type_is_{,d,i}cache() functions may be used to check whether + a given type is a cache. + + Reworked ignoring/filtering API + - Replace hwloc_topology_ignore*() functions with hwloc_topology_set_type_filter() + and hwloc_topology_set_all_types_filter(). + . Contrary to hwloc_topology_ignore_{type,all}_keep_structure() which + removed individual objects, HWLOC_TYPE_FILTER_KEEP_STRUCTURE only removes + entire levels (so that topology do not become too asymmetric). + - Remove HWLOC_TOPOLOGY_FLAG_ICACHES in favor of hwloc_topology_set_icache_types_filter() + with HWLOC_TYPE_FILTER_KEEP_ALL. + - Remove HWLOC_TOPOLOGY_FLAG_IO_DEVICES, _IO_BRIDGES and _WHOLE_IO in favor of + hwloc_topology_set_io_types_filter() with HWLOC_TYPE_FILTER_KEEP_ALL or + HWLOC_TYPE_FILTER_KEEP_IMPORTANT. + + The distance API has been completely reworked. It is now described + in hwloc/distances.h. + + Return values + - Most functions in hwloc/bitmap.h now return an int that may be negative + in case of failure to realloc/extend the internal storage of a bitmap. + - hwloc_obj_add_info() also returns an int in case allocations fail. +* Minor API changes + + Object attributes + - obj->memory is removed. + . local_memory and page_types attributes are now in obj->attr->numanode + . total_memory moves obj->total_memory. + - Objects do not have allowed_cpuset and allowed_nodeset anymore. + They are only available for the entire topology using + hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset(). + - Objects now have a "subtype" field that supersedes former "Type" and + "CoProcType" info attributes. + + Object and level depths are now signed ints. + + Object string printing and parsing + - hwloc_type_sscanf() deprecates the old hwloc_obj_type_sscanf(). + - hwloc_type_sscanf_as_depth() is added to convert a type name into + a level depth. + - hwloc_obj_cpuset_snprintf() is deprecated in favor of hwloc_bitmap_snprintf(). + + Misc objects + - Replace hwloc_topology_insert_misc_object_by_cpuset() with + hwloc_topology_insert_group_object() to precisely specify the location + of an additional hierarchy level in the topology. + - Misc objects have their own level and depth to iterate over all of them. + - Misc objects may now only be inserted as a leaf object with + hwloc_topology_insert_misc_object() which deprecates + hwloc_topology_insert_misc_object_by_parent(). + + hwloc_topology_restrict() doesn't remove objects that contain memory + by default anymore. + - The list of existing restrict flags was modified. + + The discovery support array now contains some NUMA specific bits. + + XML export functions take an additional flags argument, + for instance for exporting XMLs that are compatible with hwloc 1.x. + + Functions diff_load_xml*(), diff_export_xml*() and diff_destroy() in + hwloc/diff.h do not need a topology as first parameter anymore. + + hwloc_parse_cpumap_file () superseded by hwloc_linux_read_path_as_cpumask() + in hwloc/linux.h. + + HWLOC_MEMBIND_DEFAULT and HWLOC_MEMBIND_FIRSTTOUCH were clarified. +* New APIs and Features + + Add hwloc/shmem.h for sharing topologies between processes running on + the same machine (for reducing the memory footprint). + + Add the experimental netloc subproject. It is disabled by default + and can be enabled with --enable-netloc. + It currently brings command-line tools to gather and visualize the + topology of InfiniBand fabrics, and an API to convert such topologies + into Scotch architectures for process mapping. + See the documentation for details. +* Removed APIs and features + + Remove the online_cpuset from struct hwloc_obj. Offline PUs get unknown + topologies on Linux nowadays, and wrong topology on Solaris. Other OS + do not support them. And one cannot do much about them anyway. Just keep + them in complete_cpuset. + + Remove the now-unused "System" object type HWLOC_OBJ_SYSTEM, + defined to MACHINE for backward compatibility. + + The almost-unused "os_level" attribute has been removed from the + hwloc_obj structure. + + Remove the custom interface for assembling the topologies of different + nodes as well as the hwloc-assembler tools. + + hwloc_topology_set_fsroot() is removed, the environment variable + HWLOC_FSROOT may be used for the same remote testing/debugging purpose. + + Remove the deprecated hwloc_obj_snprintf(), hwloc_obj_type_of_string(), + hwloc_distribute[v](). + * Remove Myrinet Express interoperability (hwloc/myriexpress.h). + + Remove Kerrighed support from the Linux backend. + + Remove Tru64 (OSF/1) support. + - Remove HWLOC_MEMBIND_REPLICATE which wasn't available anywhere else. +* Backend improvements + + Linux + - OS devices do not have to be attached through PCI anymore, + for instance enabling the discovery of NVDIMM block devices. + - Remove the dependency on libnuma. + - Add a SectorSize attribute to block OS devices. + + Mac OS X + - Fix detection of cores and hyperthreads. + - Add CPUVendor, Model, ... attributes. + + Windows + - Add get_area_memlocation(). +* Tools + + lstopo and hwloc-info have a new --filter option matching the new filtering API. + + lstopo can be given --children-order=plain to force a basic displaying + of memory and normal children together below their parent. + + hwloc-distances was removed and replaced with lstopo --distances. +* Misc + + Exports + - Exporting to synthetic now ignores I/O and Misc objects. + + PCI discovery + - Separate OS device discovery from PCI discovery. Only the latter is disabled + with --disable-pci at configure time. Both may be disabled with --disable-io. + - The `linuxpci' component is now renamed into `linuxio'. + - The old `libpci' component name from hwloc 1.6 is not supported anymore, + only the `pci' name from hwloc 1.7 is now recognized. + - The HWLOC_PCI_<domain>_<bus>_LOCALCPUS environment variables are superseded + with a single HWLOC_PCI_LOCALITY where bus ranges may be specified. + - Do not set PCI devices and bridges name automatically. Vendor and device + names are already in info attributes. + + Components and discovery + - Add HWLOC_SYNTHETIC environment variable to enforce a synthetic topology + as if hwloc_topology_set_synthetic() had been called. + - HWLOC_COMPONENTS doesn't support xml or synthetic component attributes + anymore, they should be passed in HWLOC_XMLFILE or HWLOC_SYNTHETIC instead. + - HWLOC_COMPONENTS takes precedence over other environment variables + for selecting components. + + hwloc now requires a C99 compliant compiler. + + +Version 1.11.9 +-------------- +* Add support for Zhaoxin ZX-C and ZX-D processors in the x86 backend, + thanks to Jeff Zhao for the patch. +* Fix AMD Epyc 24-core L3 cache locality in the x86 backend. +* Don't crash in the x86 backend when the CPUID vendor string is unknown. +* Fix the missing pu discovery support bit on some OS. +* Fix the management of the lstopoStyle info attribute for custom colors. +* Add verbose warnings when failing to load hwloc v2.0+ XMLs. + + +Version 1.11.8 +-------------- +* Multiple Solaris improvements, thanks to Maureen Chew for the help: + + Detect caches on Sparc. + + Properly detect allowed/disallowed PUs and NUMA nodes with processor sets. + + Add hwloc_get_last_cpu_location() support for the current thread. +* Add support for CUDA compute capability 7.0 and fix support for 6.[12]. +* Tools improvements + + Fix search for objects by physical index in command-line tools. + + Add missing "cpubind:get_thisthread_last_cpu_location" in the output + of hwloc-info --support. + + Add --pid and --name to specify target processes in hwloc-ps. + + Display thread names in lstopo and hwloc-ps on Linux. +* Doc improvements + + Add a FAQ entry about building on Windows. + + Install missing sub-manpage for hwloc_obj_add_info() and + hwloc_obj_get_info_by_name(). + + +Version 1.11.7 +-------------- +* Fix hwloc-bind --membind for CPU-less NUMA nodes (again). + Thanks to Gilles Gouaillardet for reporting the issue. +* Fix a memory leak on IBM S/390 platforms running Linux. +* Fix a memory leak when forcing the x86 backend first on amd64/topoext + platforms running Linux. +* Command-line tools now support "hbm" instead "numanode" for filtering + only high-bandwidth memory nodes when selecting locations. + + hwloc-bind also support --hbm and --no-hbm for filtering only or + no HBM nodes. + Thanks to Nicolas Denoyelle for the suggestion. +* Add --children and --descendants to hwloc-info for listing object + children or object descendants of a specific type. +* Add --no-index, --index, --no-attrs, --attrs to disable/enable display + of index numbers or attributes in the graphical lstopo output. +* Try to gather hwloc-dump-hwdata output from all possible locations + in hwloc-gather-topology. +* Updates to the documentation of locations in hwloc(7) and + command-line tools manpages. + + +Version 1.11.6 +-------------- +* Make the Linux discovery about twice faster, especially on the CPU side, + by trying to avoid sysfs file accesses as much as possible. +* Add support for AMD Family 17h processors (Zen) SMT cores in the Linux + and x86 backends. +* Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the + HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the + set of allowed resources from the local operating system even if the + topology was loaded from XML or synthetic. +* Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not + overlap currently defined ranges in the bitmap. +* Don't reset the lstopo zoom scale when moving the X11 window. +* lstopo now has --flags for manually setting topology flags. +* hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects. + + +Version 1.11.5 +-------------- +* Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch. +* Reenable distance gathering on Solaris, disabled by mistake since v1.0. + Thanks to TU Wien for the help. +* Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with + empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM. + Thanks to Nicolas Denoyelle for the report. +* Fix XML import of multiple distance matrices. +* Add a FAQ entry about "hwloc is only a structural model, it ignores + performance models, memory bandwidth, etc.?" + + +Version 1.11.4 +-------------- +* Add MemoryMode and ClusterMode attributes in the Machine object on KNL. + Add doc/examples/get-knl-modes.c for an example of retrieving them. + Thanks to Grzegorz Andrejczuk. +* Fix Linux build with -m32 with respect to libudev. + Thanks to Paul Hargrove for reporting the issue. +* Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting + the issue and providing the patch. +* Don't forget to display OS device children in the graphical lstopo. +* Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch. +* Properly handle realloc() failures, thanks to Bryon Gloden for reporting + the issue. +* Fix lstopo crash in ascii/fig/windows outputs when some objects have a + lstopoStyle info attribute. + + +Version 1.11.3 +-------------- +* Bug fixes + + Fix a memory leak on Linux S/390 hosts with books. + + Fix /proc/mounts parsing on Linux by using mntent.h. + Thanks to Nathan Hjelm for reporting the issue. + + Fix a x86 infinite loop on VMware due to the x2APIC feature being + advertised without actually being fully supported. + Thanks to Jianjun Wen for reporting the problem and testing the patch. + + Fix the return value of hwloc_alloc() on mmap() failure. + Thanks to Hugo Brunie for reporting the issue. + + Fix the return value of command-line tools in some error cases. + + Do not break individual thread bindings during x86 backend discovery in a + multithreaded process. Thanks to Farouk Mansouri for the report. + + Fix hwloc-bind --membind for CPU-less NUMA nodes. + + Fix some corner cases in the XML export/import of application userdata. +* API Improvements + + Add HWLOC_MEMBIND_BYNODESET flag so that membind() functions accept + either cpusets or nodesets. + + Add hwloc_get_area_memlocation() to check where pages are actually + allocated. Only implemented on Linux for now. + - There's no _nodeset() variant, but the new flag HWLOC_MEMBIND_BYNODESET + is supported. + + Make hwloc_obj_type_sscanf() parse back everything that may be outputted + by hwloc_obj_type_snprintf(). +* Detection Improvements + + Allow the x86 backend to add missing cache levels, so that it completes + what the Solaris backend lacks. + Thanks to Ryan Zezeski for reporting the issue. + + Do not filter-out FibreChannel PCI adapters by default anymore. + Thanks to Matt Muggeridge for the report. + + Add support for CUDA compute capability 6.x. +* Tools + + Add --support to hwloc-info to list supported features, just like with + hwloc_topology_get_support(). + - Also add --objects and --topology to explicitly switch between the + default modes. + + Add --tid to let hwloc-bind operate on individual threads on Linux. + + Add --nodeset to let hwloc-bind report memory binding as NUMA node sets. + + hwloc-annotate and lstopo don't drop application userdata from XMLs anymore. + - Add --cu to hwloc-annotate to drop these application userdata. + + Make the hwloc-dump-hwdata dump directory configurable through configure + options such as --runstatedir or --localstatedir. +* Misc Improvements + + Add systemd service template contrib/systemd/hwloc-dump-hwdata.service + for launching hwloc-dump-hwdata at boot on Linux. + Thanks to Grzegorz Andrejczuk. + + Add HWLOC_PLUGINS_BLACKLIST environment variable to prevent some plugins + from being loaded. Thanks to Alexandre Denis for the suggestion. + + Small improvements for various Windows build systems, + thanks to Jonathan L Peyton and Marco Atzeri. + + +Version 1.11.2 +-------------- +* Improve support for Intel Knights Landing Xeon Phi on Linux: + + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory + (MCDRAM) together through "Cluster" groups so that the local MCDRAM is + easy to find. + - See "How do I find the local MCDRAM NUMA node on Intel Knights + Landing Xeon Phi?" in the documentation. + - For uniformity across all KNL configurations, always have a NUMA node + object even if the host is UMA. + + Fix the detection of the memory-side cache: + - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information + into /var/run/hwloc/ as root during boot, and load this dumped + information from the hwloc library at runtime. + - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights + Landing Xeon Phi?" in the documentation. + Thanks to Grzegorz Andrejczuk for the patches and for the help. +* The x86 and linux backends may now be combined for discovering CPUs + through x86 CPUID and memory from the Linux kernel. + This is useful for working around buggy CPU information reported by Linux + (for instance the AMD Bulldozer/Piledriver bug below). + Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment. +* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer) + in the x86 backend. Thanks to many users who helped. +* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1 + for AMD Opteron 61xx (Magny-Cours) processors. +* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches + it discovers, or to caches discovered by other backends earlier. + Thanks to Guillaume Beauchamp for the patch. +* Fix the management on alloc_membind() allocation failures on AIX, HP-UX + and OSF/Tru64. +* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects + below PUs. +* lstopo improvements in X11 and Windows graphical mode: + + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale, + or fit the entire window. + + Display all keyboard shortcuts in the console. +* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0 + in the environment when --enable-debug was passed to configure. +* Add a FAQ entry "What are these Group objects in my topology?". + + +Version 1.11.1 +-------------- +* Detection fixes + + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to + workaround buggy Linux kernels. + Thanks to Takahiro Kawashima and Gilles Gouaillardet. + + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors + in the x86 backend. Thanks to Guillaume Beauchamp for the patch. + + Detect block devices directly attached to PCI without a controller, + for instance NVMe disks. Thanks to Barry M. Tannenbaum. + + Add the PCISlot attribute to all PCI functions instead of only the + first one. +* Miscellaneous internal fixes + + Ignore PCI bridges that could fail assertions by reporting buggy + secondary-subordinate bus numbers + Thanks to George Bosilca for reporting the issue. + + Fix an overzealous assertion when inserting an intermediate Group object + while Groups are totally ignored. + + Fix a memory leak on Linux on AMD processors with dual-core compute units. + Thanks to Bob Benner. + + Fix a memory leak on failure to load a xml diff file. + + Fix some segfaults when inputting an invalid synthetic description. + + Fix a segfault when plugins fail to find core symbols. + Thanks to Guy Streeter. +* Many fixes and improvements in the Windows backend: + + Fix the discovery of more than 32 processors and multiple processor + groups. Thanks to Barry M. Tannenbaum for the help. + + Add thread binding set support in case of multiple process groups. + + Add thread binding get support. + + Add get_last_cpu_location() support for the current thread. + + Disable the unsupported process binding in case of multiple processor + groups. + + Fix/update the Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for the help. +* Tools fixes + + Fix a segfault when displaying logical indexes in the graphical lstopo. + Thanks to Guillaume Mercier for reporting the issue. + + Fix lstopo linking with X11 libraries, for instance on Mac OS X. + Thanks to Scott Atchley and Pierre Ramet for reporting the issue. + + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable + resources from the output anymore and those may be annotated as well. + + Command-line tools may now import XML from the standard input with -i -.xml + + Add missing documentation for the hwloc-info --no-icaches option. + + +Version 1.11.0 +-------------- +* API + + Socket objects are renamed into Package to align with the terminology + used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket" + name are still supported for backward compatibility. + + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification. + HWLOC_OBJ_NODE is still supported for backward compatibility. + "Node" and "NUMANode" strings are supported as in earlier releases. +* Detection improvements + + Add support for Intel Knights Landing Xeon Phi. + Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski. + + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID + info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe + for the help. + - Add --disable-libudev to avoid dependency on the libudev library. + + Add "MemoryModule" Misc objects with information about DIMMs, on Linux + when privileged and when I/O is enabled. + Thanks to Vineet Pedaballe for the help. + + Add a PCISlot attribute to PCI devices on Linux when supported to + identify the physical PCI slot where the board is plugged. + + Add CPUStepping info attribute on x86 processors, + thanks to Thomas Röhl for the suggestion. + + Ignore the device-tree on non-Power architectures to avoid buggy + detection on ARM. Thanks to Orion Poplawski for reporting the issue. + + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity + for the PCI links on the second processor. + + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz. + + Many fixes to the x86 backend + - Add L1i and fix L2/L3 type on old AMD processors without topoext support. + - Fix Intel CPU family and model numbers when basic family isn't 6 or 15. + - Fix package IDs on recent AMD processors. + - Fix misc issues due to incomplete APIC IDs on x2APIC processors. + - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs. + + Gather total machine memory on NetBSD. +* Tools + + lstopo + - Collapse identical PCI devices unless --no-collapse is given. + This avoids gigantic outputs when a PCI device contains dozens of + identical virtual functions. + - The ASCII art output is now called "ascii", for instance in + "lstopo -.ascii". + The former "txt" extension is retained for backward compatibility. + - Automatically scales graphical box width to the inner text in Cairo, + ASCII and Windows outputs. + - Add --rect to lstopo to force rectangular layout even for NUMA nodes. + - Add --restrict-flags to configure the behavior of --restrict. + - Objects may have a "Type" info attribute to specify a better type name + and display it in lstopo. + - Really export all verbose information to the given output file. + + hwloc-annotate + - May now operate on all types of objects, including I/O. + - May now insert Misc objects in the topology. + - Do not drop instruction caches and I/O devices from the output anymore. + + Fix lstopo path in hwloc-gather-topology after install. +* Misc + + Fix hwloc/cudart.h for machines with multiple PCI domains, + thanks to Imre Kerr for reporting the problem. + + Fix PCI Bridge-specific depth attribute. + + Fix hwloc_bitmap_intersect() for two infinite bitmaps. + + Fix some corner cases in the building of levels on large NUMA machines + with non-uniform NUMA groups and I/Os. + + Improve the performance of object insertion by cpuset for large + topologies. + + Prefix verbose XML import errors with the source name. + + Improve pkg-config checks and error messages. + + Fix excluding after a component with an argument in the HWLOC_COMPONENTS + environment variable. +* Documentation + + Fix the recommended way in documentation and examples to allocate memory + on some node, it should use HWLOC_MEMBIND_BIND. + Thanks to Nicolas Bouzat for reporting the issue. + + Add a "Miscellaneous objects" section in the documentation. + + Add a FAQ entry "What happens to my topology if I disable symmetric + multithreading, hyper-threading, etc. ?" to the documentation. + + +Version 1.10.1 +-------------- +* Actually remove disallowed NUMA nodes from nodesets when the whole-system + flag isn't enabled. +* Fix the gathering of PCI domains. Thanks to James Custer for reporting + the issue and providing a patch. +* Fix the merging of identical parent and child in presence of Misc objects. + Thanks to Dave Love for reporting the issue. +* Fix some misordering of children when merging with ignore_keep_structure() + in partially allowed topologies. +* Fix an overzealous assertion in the debug code when running on a single-PU + host with I/O. Thanks to Thomas Van Doren for reporting the issue. +* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs) + and OSF/Tru64 backend. +* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren + for reporting the issue. +* Fix support for future very large caches in the x86 backend. +* Fix vendor/device names for SR-IOV PCI devices on Linux. +* Fix an unlikely crash in case of buggy hierarchical distance matrix. +* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and + Erik Schnetter for helping debugging. +* Fix hwloc_bitmap_isincluded() in case of infinite sets. +* Change hwloc-ls.desktop into a lstopo.desktop and only install it if + lstopo is built with Cairo/X11 support. It cannot work with a non-graphical + lstopo or hwloc-ls. +* Add support for the renaming of Socket into Package in future releases. +* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE + in future releases. +* Clarify the documentation of distance matrices in hwloc.h and in the manpage + of the hwloc-distances. Thanks to Dave Love for the suggestion. +* Improve some error messages by displaying more information about the + hwloc library in use. +* Document how to deal with the ABI break when upgrading to the upcoming 2.0 + See "How do I handle ABI breaks and API upgrades ?" in the FAQ. + + +Version 1.10.0 +-------------- +* API + + Add hwloc_topology_export_synthetic() to export a topology to a + synthetic string without using lstopo. See the Synthetic topologies + section in the documentation. + + Add hwloc_topology_set/get_userdata() to let the application save + a private pointer in the topology whenever it needs a way to find + its own object corresponding to a topology. + + Add hwloc_get_numanode_obj_by_os_index() and document that this function + as well as hwloc_get_pu_obj_by_os_index() are good at converting + nodesets and cpusets into objects. + + hwloc_distrib() does not ignore any objects anymore when there are + too many of them. They get merged with others instead. + Thanks to Tim Creech for reporting the issue. +* Tools + + hwloc-bind --get <command-line> now executes the command after displaying + the binding instead of ignoring the command entirely. + Thanks to John Donners for the suggestion. + + Clarify that memory sizes shown in lstopo are local by default + unless specified (total memory added in the root object). +* Synthetic topologies + + Synthetic topology descriptions may now specify attributes such as + memory sizes and OS indexes. See the Synthetic topologies section + in the documentation. + + lstopo now exports in this fully-detailed format by default. + The new option --export-synthetic-flags may be used to revert + back the old format. +* Documentation + + Add the doc/examples/ subdirectory with several real-life examples, + including the already existing hwloc-hello.C for basics. + Thanks to Rob Aulwes for the suggestion. + + Improve the documentation of CPU and memory binding in the API. + + Add a FAQ entry about operating system errors, especially on AMD + platforms with buggy cache information. + + Add a FAQ entry about loading many topologies in a single program. +* Misc + + Work around buggy Linux kernels reporting 2 sockets instead + 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor. + + pciutils/libpci support is now removed since libpciaccess works + well and there's also a Linux-specific PCI backend. For the record, + pciutils was GPL and therefore disabled by default since v1.6.2. + + Add --disable-cpuid configure flag to work around buggy processor + simulators reporting invalid CPUID information. + Thanks for Andrew Friedley for reporting the issue. + + Fix a racy use of libltdl when manipulating multiple topologies in + different threads. + Thanks to Andra Hugo for reporting the issue and testing patches. + + Fix some build failures in private/misc.h. + Thanks to Pavan Balaji and Ralph Castain for the reports. + + Fix failures to detect X11/Xutil.h on some Solaris platforms. + Thanks to Siegmar Gross for reporting the failure. + + The plugin ABI has changed, this release will not load plugins + built against previous hwloc releases. + + +Version 1.9.1 +------------- +* Fix a crash when the PCI locality is invalid. Attach to the root object + instead. Thanks to Nicolas Denoyelle for reporting the issue. +* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue. +* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly + available. Thanks to Nick Papior Andersen for reporting the problem. +* Mark Linux file descriptors as close-on-exec to avoid leaks on exec. +* Fix some minor memory leaks. + + +Version 1.9.0 +------------- +* API + + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with + type-specific attributes such as Cache/Group depth and Cache type. + hwloc_obj_type_of_string() is moved to hwloc/deprecated.h. + + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the + last CPU where a Linux thread given by TID ran. + + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions. + hwloc_distribute[v]() is moved to hwloc/deprecated.h. + + Don't mix total and local memory when displaying verbose object attributes + with hwloc_obj_attr_snprintf() or in lstopo. +* Backends + + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for + x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific + support added in v1.8.1. Requested by Ralph Castain. + + Add many CPU- and Platform-related info attributes on ARM and POWER + platforms, in the Machine and Socket objects. + + Add CUDA info attributes describing the number of multiprocessors and + cores and the size of the global, shared and L2 cache memories in CUDA + OS devices. + + Add OpenCL info attributes describing the number of compute units and + the global memory size in OpenCL OS devices. + + The synthetic backend now accepts extended types such as L2Cache, L1i or + Group3. lstopo also exports synthetic strings using these extended types. +* Tools + + lstopo + - Do not overwrite output files by default anymore. + Pass -f or --force to enforce it. + - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes + in the graphical output. + - Fix export to stdout when specifying a Cairo-based output type + with --of. + + hwloc-ps + - Add -e or --get-last-cpu-location to report where processes/threads + run instead of where they are bound. + - Report locations as likely-more-useful objects such as Cores or Sockets + instead of Caches when possible. + + hwloc-bind + - Fix failure on Windows when not using --pid. + - Add -e as a synonym to --get-last-cpu-location. + + hwloc-distrib + - Add --reverse to distribute using last objects first and singlify + into last bits first. Thanks to Jirka Hladky for the suggestion. + + hwloc-info + - Report unified caches when looking for data or instruction cache + ancestor objects. +* Misc + + Add experimental Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for his help and for providing the first draft. + + Fix some overzealous assertions and warnings about the ordering of + objects on a level with respect to cpusets. The ordering is only + guaranteed for complete cpusets (based on the first bit in sets). + + Fix some memory leaks when importing xml diffs and when exporting a + "too complex" entry. + + +Version 1.8.1 +------------- +* Fix the cpuid code on Windows 64bits so that the x86 backend gets + enabled as expected and can populate CPU information. + Thanks to Robin Scher for reporting the problem. +* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running + on x86 architecture. Thanks to Ralph Castain for the suggestion. +* Work around buggy BIOS reporting duplicate NUMA nodes on Linux. + Thanks to Jeff Becker for reporting the problem and testing the patch. +* Add a name to the lstopo graphical window. Thanks to Michael Prokop + for reporting the issue. + + +Version 1.8.0 +------------- +* New components + + Add the "linuxpci" component that always works on Linux even when + libpciaccess and libpci aren't available (and even with a modified + file-system root). By default the old "pci" component runs first + because "linuxpci" lacks device names (obj->name is always NULL). +* API + + Add the topology difference API in hwloc/diff.h for manipulating + many similar topologies. + + Add hwloc_topology_dup() for duplicating an entire topology. + + hwloc.h and hwloc/helper.h have been reorganized to clarify the + documentation sections. The actual inline code has moved out of hwloc.h + into the new hwloc/inlines.h. + + Deprecated functions are now in hwloc/deprecated.h, and not in the + official documentation anymore. +* Tools + + Add hwloc-diff and hwloc-patch tools together with the new diff API. + + Add hwloc-compress-dir to (de)compress an entire directory of XML files + using hwloc-diff and hwloc-patch. + + Object colors in the graphical output of lstopo may be changed by adding + a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage + for details. Thanks to Jirka Hladky for discussing the idea. + + hwloc-gather-topology may now gather I/O-related files on Linux when + --io is given. Only the linuxpci component supports discovering I/O + objects from these extended tarballs. + + hwloc-annotate now supports --ri to remove/replace info attributes with + a given name. + + hwloc-info supports "root" and "all" special locations for dumping + information about the root object. + + lstopo now supports --append-legend to append custom lines of text + to the legend in the graphical output. Thanks to Jirka Hladky for + discussing the idea. + + hwloc-calc and friends have a more robust parsing of locations given + on the command-line and they report useful error messages about it. + + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and + hwloc-distrib, and add --restrict to hwloc-bind for uniformity among + tools. +* Misc + + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already + loaded topology now returns an error (deprecated since release 1.6.1). + + Fix the initialisation of cpusets and nodesets in Group objects added + when inserting PCI hostbridges. + + Never merge Group objects that were added explicitly by the user with + hwloc_custom_insert_group_object_by_parent(). + + Add a sanity check during dynamic plugin loading to prevent some + crashes when hwloc is dynamically loaded by another plugin mechanisms. + + Add --with-hwloc-plugins-path to specify the install/load directories + of plugins. + + Add the MICSerialNumber info attribute to the root object when running + hwloc inside a Xeon Phi to match the same attribute in the MIC OS device + when running in the host. + + +Version 1.7.2 +------------- +* Do not create invalid block OS devices on very old Linux kernel such + as RHEL4 2.6.9. +* Fix PCI subvendor/device IDs. +* Fix the management of Misc objects inserted by parent. + Thanks to Jirka Hladky for reporting the problem. +* Add a Port<n>State into attribute to OpenFabrics OS devices. +* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices. +* Improve verbose error messages when failing to load from XML. + + +Version 1.7.1 +------------- +* Fix a failed assertion in the distance grouping code when loading a XML + file that already contains some groups. + Thanks to Laercio Lima Pilla for reporting the problem. +* Remove unexpected Group objects when loading XML topologies with I/O + objects and NUMA distances. + Thanks to Elena Elkina for reporting the problem and testing patches. +* Fix PCI link speed discovery when using libpciaccess. +* Fix invalid libpciaccess virtual function device/vendor IDs when using + SR-IOV PCI devices on Linux. +* Fix GL component build with old NVCtrl releases. + Thanks to Jirka Hladky for reporting the problem. +* Fix embedding breakage caused by libltdl. + Thanks to Pavan Balaji for reporting the problem. +* Always use the system-wide libltdl instead of shipping one inside hwloc. +* Document issues when enabling plugins while embedding hwloc in another + project, in the documentation section Embedding hwloc in Other Software. +* Add a FAQ entry "How to get useful topology information on NetBSD?" + in the documentation. +* Somes fixes in the renaming code for embedding. +* Miscellaneous minor build fixes. + + +Version 1.7.0 +------------- +* New operating system backends + + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the + documentation for details. Thanks to Jeff Hammond, Christopher Samuel + and Erik Schnetter for their help. + + Add NetBSD support, thanks to Aleksej Saushev. +* New I/O device discovery + + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC) + on Linux. Thanks to Jerome Vienne for helping. + + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs. + + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices + on the AMD OpenCL implementation. + + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays. + + Add GPU OS devices such as "nvml0" for NVIDIA GPUs. + Thanks to Marwan Abdellah and Stefan Eilemann for helping. + These new OS devices have some string info attributes such as CoProcType, + GPUModel, etc. to better identify them. + See the I/O Devices and Attributes documentation sections for details. +* New components + + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device + discovery. + + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed. + All of these new components may be built as plugins. They may also be + disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure. + See the I/O Devices, Components and Plugins, and FAQ documentation + sections for details. +* API + + Add hwloc_topology_get_flags(). + + Add hwloc/plugins.h for building external plugins. + See the Adding new discovery components and plugins section. +* Interoperability + + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h + to retrieve the locality of OS devices that correspond to AMD OpenCL + GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11 + displays, or to Intel Xeon Phi (MIC) device indexes. + + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert + between CUDA devices or indexes and hwloc OS devices. + + Add hwloc_ibv_get_device_osdev() and clarify the requirements + of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h. +* Tools + + hwloc-info is not only a synonym of lstopo -s anymore, it also + dumps information about objects given on the command-line. +* Documentation + + Add a section "Existing components and plugins". + + Add a list of common OS devices in section "Software devices". + + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness + issues because of GPUs. + + Clarify the documentation of inline helpers in hwloc/myriexpress.h + and hwloc/openfabrics-verbs.h. +* Misc + + Improve cache detection on AIX. + + The HWLOC_COMPONENTS variable now excludes the components whose + names are prefixed with '-'. + + lstopo --ignore PU now works when displaying the topology in + graphical and textual mode (not when exporting to XML). + + Make sure I/O options always appear in lstopo usage, not only when + using pciutils/libpci. + + Remove some unneeded Linux specific includes from some interoperability + headers. + + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote + manpages. Thanks to Guy Streeter for the report. + + Fix a memory leak on AIX when getting memory binding. + + Fix many small memory leaks on Linux. + + The `libpci' component is now called `pci' but the old name is still + accepted in the HWLOC_COMPONENTS variable for backward compatibility. + + +Version 1.6.2 +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. + + +Version 1.6.1 +------------- +* Fix some crash or buggy detection in the x86 backend when Linux + cgroups/cpusets restrict the available CPUs. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Calling hwloc_topology_load() multiple times on the same topology + is officially deprecated. hwloc will warn in such cases. +* Add some documentation about existing plugins/components, package + dependencies, and I/O devices specification on the command-line. + + +Version 1.6.0 +------------- +* Major changes + + Reorganize the backend infrastructure to support dynamic selection + of components and dynamic loading of plugins. For details, see the + new documentation section Components and plugins. + - The HWLOC_COMPONENTS variable lets one replace the default discovery + components. + - Dynamic loading of plugins may be enabled with --enable-plugins + (except on AIX and Windows). It will build libxml2 and libpci + support as separated modules. This helps reducing the dependencies + of the core hwloc library when distributed as a binary package. +* Backends + + Add CPUModel detection on Darwin and x86/FreeBSD. + Thanks to Robin Scher for providing ways to implement this. + + The x86 backend now adds CPUModel info attributes to socket objects + created by other backends that do not natively support this attribute. + + Fix detection on FreeBSD in case of cpuset restriction. Thanks to + Sebastian Kuzminsky for reporting the problem. +* XML + + Add hwloc_topology_set_userdata_import/export_callback(), + hwloc_export_obj_userdata() and _userdata_base64() to let + applications specify how to save/restore the custom data they placed + in the userdata private pointer field of hwloc objects. +* Tools + + Add hwloc-annotate program to add string info attributes to XML + topologies. + + Add --pid-cmd to hwloc-ps to append the output of a command to each + PID line. May be used for showing Open MPI process ranks, see the + hwloc-ps(1) manpage for details. + + hwloc-bind now exits with an error if binding fails; the executable + is not launched unless binding suceeeded or --force was given. + + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error + messages. + + Fix command-line pid support in windows tools. + + All programs accept --verbose as a synonym to -v. +* Misc + + Fix some DIR descriptor leaks on Linux. + + Fix I/O device lists when some were filtered out after a XML import. + + Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. + + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or + lstopo --merge, compare object types before deciding which one of two + identical object to remove (e.g. keep sockets in favor of caches). + + Add some GUID- and LID-related info attributes to OpenFabrics + OS devices. + + Only add CPUType socket attributes on Solaris/Sparc. Other cases + don't report reliable information (Solaris/x86), and a replacement + is available as the Architecture string info in the Machine object. + + Add missing Backend string info on Solaris in most cases. + + Document object attributes and string infos in a new Attributes + section in the documentation. + + Add a section about Synthetic topologies in the documentation. + + +Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. +* Fix some DIR descriptor leaks on Linux. +* Fix I/O device lists when some were filtered out after a XML import. +* Add missing Backend string info on Solaris in most cases. +* Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. + + +Version 1.5.1 +------------- +* Fix block OS device detection on Linux kernel 3.3 and later. + Thanks to Guy Streeter for reporting the problem and testing the fix. +* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to + Sebastian Kuzminsky for reporting problems and testing patches. +* Fix 64bit detection on FreeBSD. +* Fix some corner cases in the management of the thissystem flag with + respect to topology flags and environment variables. +* Fix some corner cases in command-line parsing checks in hwloc-distrib + and hwloc-distances. +* Make sure we do not miss some block OS devices on old Linux kernels + when a single PCI device has multiple IDE hosts/devices behind it. +* Do not disable I/O devices or instruction caches in hwloc-assembler output. + + +Version 1.5.0 +------------- +* Backends + + Do not limit the number of processors to 1024 on Solaris anymore. + + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt. + + XML topology files do not depend on the locale anymore. Float numbers + such as NUMA distances or PCI link speeds now always use a dot as a + decimal separator. + + Add instruction caches detection on Linux, AIX, Windows and Darwin. + + Add get_last_cpu_location() support for the current thread on AIX. + + Support binding on AIX when threads or processes were bound with + bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue + and testing patches, and to Farid Parpia for explaining the binding + interfaces. + + Improve AMD topology detection in the x86 backend (for FreeBSD) using + the topoext feature. +* API + + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be + detected at build-time. + + Add a cache type attribute describind Data, Instruction and Unified + caches. Caches with different types but same depth (for instance L1d + and L1i) are placed on different levels. + + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of + of the given cache depth and type, for instance L1i or L2. + It helps disambiguating the case where hwloc_get_type_depth() returns + HWLOC_TYPE_DEPTH_MULTIPLE. + + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is + passed to hwloc_topology_set_flags() before load. + + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in + openfabrics-verbs.h to find the hwloc OS device object corresponding to + an OpenFabrics device. +* Tools + + Add lstopo-no-graphics, a lstopo built without graphical support to + avoid dependencies on external libraries such as Cairo and X11. When + supported, graphical outputs are only available in the original lstopo + program. + - Packagers splitting lstopo and lstopo-no-graphics into different + packages are advised to use the alternatives system so that lstopo + points to the best available binary. + + Instruction caches are enabled in lstopo by default. Use --no-icaches + to disable them. + + Add -t/--threads to show threads in hwloc-ps. +* Removal of obsolete components + + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and + superseded by the bitmap API (hwloc/bitmap.h) since v1.1. + hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_* + compatibility wrappers are now gone. + + Remove Linux libnuma conversion helpers for the deprecated and + broken nodemask_t interface. + + Remove support for "Proc" type name, it was superseded by "PU" in v1.0. + + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0. +* Misc + + Fix PCIe 3.0 link speed computation. + + Non-printable characters are dropped from strings during XML export. + + Fix importing of escaped characters with the minimalistic XML backend. + + Assert hwloc_is_thissystem() in several I/O related helpers. + + Fix some memory leaks in the x86 backend for FreeBSD. + + Minor fixes to ease native builds on Windows. + + Limit the number of retries when operating on all threads within a + process on Linux if the list of threads is heavily getting modified. + + +Version 1.4.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.5 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.5 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix PCIe 3.0 link speed computation. +* Fix importing of escaped characters with the minimalistic XML backend. +* Fix a memory leak in the x86 backend. + + +Version 1.4.2 +------------- +* Fix build on Solaris 9 and earlier when fabsf() is not a compiler + built-in. Thanks to Igor Galić for reporting the problem. +* Fix support for more than 32 processors on Windows. Thanks to Hartmut + Kaiser for reporting the problem. +* Fix process-wide binding and cpulocation routines on Linux when some + threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting + the issue. +* Make installed scripts executable. Thanks to Jirka Hladky for reporting + the problem. +* Fix libtool revision management when building for Windows. This fix was + also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser + for reporting the problem. +* Fix the __hwloc_inline keyword in public headers when compiling with a + C++ compiler. +* Add Port info attribute to network OS devices inside OpenFabrics PCI + devices so as to identify which interface corresponds to which port. +* Document requirements for interoperability helpers: I/O devices discovery + is required for some of them; the topology must match the current host + for most of them. + + +Version 1.4.1 +------------- +* This release contains all changes from v1.3.2. +* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. +* Fix memory leaks in some get_membind() functions. +* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) + in case of out-of-order NUMA node ids. +* Fix some overzealous assertions in the distance grouping code. +* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics + helpers on Linux. Thanks to Albert Solernou for reporting the problem. +* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ). +* Fix memory binding documentation. Thanks to Karl Napf for reporting the + issues. + + +Version 1.4.0 (does not contain all v1.3.2 changes) +------------- +* Major features + + Add "custom" interface and "assembler" tools to build multi-node + topology. See the Multi-node Topologies section in the documentation + for details. +* Interface improvements + + Add symmetric_subtree object attribute to ease assumptions when consulting + regular symmetric topologies. + + Add a CPUModel and CPUType info attribute to Socket objects on Linux + and Solaris. + + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index + of an object within a subtree of the topology. + + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects + corresponding to CUDA devices. +* Discovery improvements + + Add a group object above partial distance matrices to make sure + the matrices are available in the final topology, except when this + new object would contradict the existing hierarchy. + + Grouping by distances now also works when loading from XML. + + Fix some corner cases in object insertion, for instance when dealing + with NUMA nodes without any CPU. +* Backends + + Implement hwloc_get_area_membind() on Linux. + + Honor I/O topology flags when importing from XML. + + Further improve XML-related error checking and reporting. + + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1. +* Tools + + Add synthetic exporting of symmetric topologies to lstopo. + + lstopo --horiz and --vert can now be applied to some specific object types. + + lstopo -v -p now displays distance matrices with physical indexes. + + Add hwloc-distances utility to list distances. +* Documentation + + Fix and/or document the behavior of most inline functions in hwloc/helper.h + when the topology contains some I/O or Misc objects. + + Backend documentation enhancements. +* Bug fixes + + Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. + + Fix FreeBSD build without cpuid support. + + Fix several Windows build issues. + + Fix inline keyword definition in public headers. + + Fix dependencies in the embedded library. + + Improve visibility support detection. Thanks to Dave Love for providing + the patch. + + Remove references to internal symbols in the tools. + + +Version 1.3.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.4 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.4 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. + + +Version 1.3.2 +------------- +* Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. +* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting + the issue. +* Fix build with Solaris Studio 12 compiler when XML is disabled. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix installation with old GNU sed, for instance on Red Hat 8. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix PCI locality when Linux cgroups restrict the available CPUs. +* Fix floating point issue when grouping by distance on mips64 architecture. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory. +* Fix support for gccfss compilers with broken ffs() support. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Fix FreeBSD build without cpuid support. +* Fix several Windows build issues. +* Fix inline keyword definition in public headers. +* Fix dependencies in the embedded library. +* Detect when a compiler such as xlc may not report compile errors + properly, causing some configure checks to be wrong. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Improve visibility support detection. Thanks to Dave Love for providing + the patch. +* Remove references to internal symbols in the tools. +* Fix installation on systems with limited command-line size. + Thanks to Paul H. Hargrove for reporting the problem. +* Further improve XML-related error checking and reporting. + + +Version 1.3.1 +------------- +* Fix pciutils detection with pkg-config when not installed in standard + directories. +* Fix visibility options detection with the Solaris Studio compiler. + Thanks to Igor Galić and Terry Dontje for reporting the problems. +* Fix support for old Linux sched.h headers such as those found + on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems. +* Fix inline and attribute support for Solaris compilers. Thanks to + Dave Love for reporting the problems. +* Print a short summary at the end of the configure output. Thanks to + Stefan Eilemann for the suggestion. +* Add --disable-libnuma configure option to disable libnuma-based + memory binding support on Linux. Thanks to Rayson Ho for the + suggestion. +* Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to + Nathan Phillip Brink for raising the issue. +* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove + for reporting the problem. +* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t + being either pid_t and pthread_t on Unix, or HANDLE on Windows. + + +Version 1.3.0 +------------- +* Major features + + Add I/O devices and bridges to the topology using the pciutils + library. Only enabled after setting the relevant flag with + hwloc_topology_set_flags() before hwloc_topology_load(). See the + I/O Devices section in the documentation for details. +* Discovery improvements + + Add associativity to the cache attributes. + + Add support for s390/z11 "books" on Linux. + + Add the HWLOC_GROUPING_ACCURACY environment variable to relax + distance-based grouping constraints. See the Environment Variables + section in the documentation for details about grouping behavior + and configuration. + + Allow user-given distance matrices to remove or replace those + discovered by the OS backend. +* XML improvements + + XML is now always supported: a minimalistic custom import/export + code is used when libxml2 is not available. It is only guaranteed + to read XML files generated by hwloc. + + hwloc_topology_export_xml() and export_xmlbuffer() now return an + integer. + + Add hwloc_free_xmlbuffer() to free the buffer allocated by + hwloc_topology_export_xmlbuffer(). + + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1. +* Minor API updates + + Add hwloc_obj_add_info to customize object info attributes. +* Tools + + lstopo now displays I/O devices by default. Several options are + added to configure the I/O discovery. + + hwloc-calc and hwloc-bind now accept I/O devices as input. + + Add --restrict option to hwloc-calc and hwloc-distribute. + + Add --sep option to change the output field separator in hwloc-calc. + + Add --whole-system option to hwloc-ps. + + +Version 1.2.2 +------------- +* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report. +* Fix XML import of very large page sizes or counts on 32bits platform, + thanks to Karsten Hopp for the RedHat ticket. +* Fix crash when administrator limitations such as Linux cgroup require + to restrict distance matrices. Thanks to Ake Sandgren for reporting the + problem. +* Fix the removal of objects such as AMD Magny-Cours dual-node sockets + in case of administrator restrictions. +* Improve error reporting and messages in case of wrong synthetic topology + description. +* Several other minor internal fixes and documentation improvements. + + +Version 1.2.1 +------------- +* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting + logical processors with different core IDs on Linux. +* Fix hwloc-ps crash when listing processes from another Linux cpuset. + Thanks to Carl Smith for reporting the problem. +* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries + for reporting the problems. +* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting + the problem. +* Make configure fail if --enable-xml or --enable-cairo is given and + proper support cannot be found. Thanks to Andreas Kupries for reporting + the XML problem. +* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann + for reporting the problem. +* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele + Fatigati for reporting the problem. +* Fix object distance detection on Solaris. +* Add pthread_self weak symbol to ease static linking. +* Minor documentation fixes. + + +Version 1.2.0 +------------- +* Major features + + Expose latency matrices in the API as an array of distance structures + within objects. Add several helpers to find distances. + + Add hwloc_topology_set_distance_matrix() and environment variables + to provide a matrix of distances between a given set of objects. + + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location() + to retrieve the processors where a process or thread recently ran. + - Add the corresponding --get-last-cpu-location option to hwloc-bind. + + Add hwloc_topology_restrict() to restrict an existing topology to a + given cpuset. + - Add the corresponding --restrict option to lstopo. +* Minor API updates + + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps + and strings such as 4-5,7-9,12,15- + + hwloc_bitmap_set/clr_range() now support infinite ranges. + + Clarify the difference between inserting Misc objects by cpuset or by + parent. + + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error. +* Discovery improvements + + x86 backend (for freebsd): add x2APIC support + + Support standard device-tree phandle, to get better support on e.g. ARM + systems providing it. + + Detect cache size on AIX. Thanks Christopher and IBM. + + Improve grouping to support asymmetric topologies. +* Tools + + Command-line tools now support "all" and "root" special locations + consisting in the entire topology, as well as type names with depth + attributes such as L2 or Group4. + + hwloc-calc improvements: + - Add --number-of/-N option to report the number of objects of a given + type or depth. + - -I is now equivalent to --intersect for listing the indexes of + objects of a given type or depth that intersects the input. + - Add -H to report the output as a hierarchical combination of types + and depths. + + Add --thissystem to lstopo. + + Add lstopo-win, a console-less lstopo variant on Windows. +* Miscellaneous + + Remove C99 usage from code base. + + Rename hwloc-gather-topology.sh into hwloc-gather-topology + + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks + Andriy Gapon for the fix. + + +Version 1.1.2 +------------- +* Fix a segfault in the distance-based grouping code when some objects + are not placed in any group. Thanks to Bernd Kallies for reporting + the problem and providing a patch. +* Fix the command-line parsing of hwloc-bind --mempolicy interleave. + Thanks to Guy Streeter for reporting the problem. +* Stop truncating the output in hwloc_obj_attr_snprintf() and in the + corresponding lstopo output. Thanks to Guy Streeter for reporting the + problem. +* Fix object levels ordering in synthetic topologies. +* Fix potential incoherency between device tree and kernel information, + when SMT is disabled on Power machines. +* Fix and document the behavior of hwloc_topology_set_synthetic() in case + of invalid argument. Thanks to Guy Streeter for reporting the problem. +* Add some verbose error message reporting when it looks like the OS + gives erroneous information. +* Do not include unistd.h and stdint.h in public headers on Windows. +* Move config.h files into their own subdirectories to avoid name + conflicts when AC_CONFIG_HEADERS adds -I's for them. +* Remove the use of declaring variables inside "for" loops. +* Some other minor fixes. +* Many minor documentation fixes. + + +Version 1.1.1 +------------- +* Add hwloc_get_api_version() which returns the version of hwloc used + at runtime. Thanks to Guy Streeter for the suggestion. +* Fix the number of hugepages reported for NUMA nodes on Linux. +* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap. + Thanks to Bernd Kallies for reporting the problem. +* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong. + Thanks to Guy Streeter for reporting the problem. +* Fix hwloc_get_membind_nodeset() on Linux. + Thanks to Bernd Kallies for reporting the problem and providing a patch. +* Fix some file descriptor leaks in the Linux discovery. +* Fix the minimum width of NUMA nodes, caches and the legend in the graphical + lstopo output. Thanks to Jirka Hladky for reporting the problem. +* Various fixes to bitmap conversion from/to taskset-strings. +* Fix and document snprintf functions behavior when the buffer size is too + small or zero. Thanks to Guy Streeter for reporting the problem. +* Fix configure to avoid spurious enabling of the cpuid backend. + Thanks to Tim Anderson for reporting the problem. +* Cleanup error management in hwloc-gather-topology.sh. + Thanks to Jirka Hladky for reporting the problem and providing a patch. +* Add a manpage and usage for hwloc-gather-topology.sh on Linux. + Thanks to Jirka Hladky for providing a patch. +* Memory binding documentation enhancements. + + +Version 1.1.0 +------------- + +* API + + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be + detected at build-time. + + Add a memory binding interface. + + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by + the bitmap API (hwloc/bitmap.h) which offers the same features with more + generic names since it applies to CPU sets, node sets and more. + Backward compatibility with the cpuset API and ABI is still provided but + it will be removed in a future release. + Old types (hwloc_cpuset_t, ...) are still available as a way to clarify + what kind of hwloc_bitmap_t each API function manipulates. + Upgrading to the new API only requires to replace hwloc_cpuset_ function + calls with the corresponding hwloc_bitmap_ calls, with the following + renaming exceptions: + - hwloc_cpuset_cpu -> hwloc_bitmap_only + - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut + - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf + + Add an `infos' array in each object to store couples of info names and + values. It enables generic storage of things like the old dmi board infos + that were previously stored in machine specific attributes. + + Add linesize cache attribute. +* Features + + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated, + the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed. + + Improve the distance-based grouping code to better support irregular + distance matrices. + + Add support for device-tree to get cache information (useful on Power + architectures). +* Helpers + + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability + with CUDA Runtime and Driver APIs. + + Add Myrinet Express helper in myriexpress.h to ease interoperability. +* Tools + + lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'. + Physical indexes are also printed as `P#N' instead of `phys=N' within + object attributes (in parentheses). + + Add a legend at the bottom of the lstopo graphical output, use --no-legend + to remove it. + + Add hwloc-ps to list process' bindings. + + Add --membind and --mempolicy options to hwloc-bind. + + Improve tools command-line options by adding a generic --input option + (and more) which replaces the old --xml, --synthetic and --fsys-root. + + Cleanup lstopo output configuration by adding --output-format. + + Add --intersect in hwloc-calc, and replace --objects with --largest. + + Add the ability to work on standard input in hwloc-calc. + + Add --from, --to and --at in hwloc-distrib. + + Add taskset-specific functions and command-line tools options to + manipulate CPU set strings in the format of the taskset program. + + Install hwloc-gather-topology.sh on Linux. + + +Version 1.0.3 +------------- + +* Fix support for Linux cpuset when emulated by a cgroup mount point. +* Remove unneeded runtime dependency on libibverbs.so in the library and + all utils programs. +* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes + for NUMA nodes. +* lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + +Version 1.0.2 +------------- + +* Public headers can now be included directly from C++ programs. +* Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for + reporting the issue. +* Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue. +* Revert 1.0.1 patch that ignored sockets with unknown ID values since it + only slightly helped POWER7 machines with old Linux kernels while it + prevents recent kernels from getting the complete POWER7 topology. +* Fix hwloc_get_common_ancestor_obj(). +* Remove arch-specific bits in public headers. +* Some fixes in the lstopo graphical output. +* Various man page clarifications and minor updates. + + +Version 1.0.1 +------------- + +* Various Solaris fixes. Thanks to Yannick Martin for reporting the issue. +* Fix "non-native" builds on x86 platforms (e.g., when building 32 + bit executables with compilers that natively build 64 bit). +* Ignore sockets with unknown ID values (which fixes issues on POWER7 + machines). Thanks to Greg Bauer for reporting the issue. +* Various man page clarifications and minor updates. +* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique(). +* Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος + Παπαδογιαννάκ for reporting the issue. +* Fixed warnings when compiling with -DNDEBUG. + + +Version 1.0.0 +------------- + +* The ABI of the library has changed. +* Backend updates + + Add FreeBSD support. + + Add x86 cpuid based backend. + + Add Linux cgroup support to the Linux cpuset code. + + Support binding of entire multithreaded process on Linux. + + Fix and enable Group support in Windows. + + Cleanup XML export/import. +* Objects + + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit", + its stringified type name is now "PU". + + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping + objects according to NUMA distances or arbitrary OS aggregation. + + Rework memory attributes. + + Add different cpusets in each object to specify processors that + are offline, unavailable, ... + + Cleanup the storage of object names and DMI infos. +* Features + + Add support for looking up specific PID topology information. + + Add hwloc_topology_export_xml() to export the topology in a XML file. + + Add hwloc_topology_get_support() to retrieve the supported features + for the current topology context. + + Support non-SYSTEM object as the root of the tree, use MACHINE in + most common cases. + + Add hwloc_get_*cpubind() routines to retrieve the current binding + of processes and threads. +* API + + Add HWLOC_API_VERSION to help detect the currently used API version. + + Add missing ending "e" to *compare* functions. + + Add several routines to emulate PLPA functions. + + Rename and rework the cpuset and/or/xor/not/clear operators to output + their result in a dedicated argument instead of modifying one input. + + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf(). + + Clarify the use of parent and ancestor in the API, do not use father. + + Replace hwloc_get_system_obj() with hwloc_get_root_obj(). + + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter + isn't public. + + Relax constraints in hwloc_obj_type_of_string(). + + Improve displaying of memory sizes. + + Add 0x prefix to cpuset strings. +* Tools + + lstopo now displays logical indexes by default, use --physical to + revert back to OS/physical indexes. + + Add colors in the lstopo graphical outputs to distinguish between online, + offline, reserved, ... objects. + + Extend lstopo to show cpusets, filter objects by type, ... + + Renamed hwloc-mask into hwloc-calc which supports many new options. +* Documentation + + Add a hwloc(7) manpage containing general information. + + Add documentation about how to switch from PLPA to hwloc. + + Cleanup the distributed documentation files. +* Miscellaneous + + Many compilers warning fixes. + + Cleanup the ABI by using the visibility attribute. + + Add project embedding support. + + +Version 0.9.4 (unreleased) +-------------------------- + +* Fix reseting colors to normal in lstopo -.txt output. +* Fix Linux pthread_t binding error report. + + +Version 0.9.3 +------------- + +* Fix autogen.sh to work with Autoconf 2.63. +* Fix various crashes in particular conditions: + - xml files with root attributes + - offline CPUs + - partial sysfs support + - unparseable /proc/cpuinfo + - ignoring NUMA level while Misc level have been generated +* Tweak documentation a bit +* Do not require the pthread library for binding the current thread on Linux +* Do not erroneously consider the sched_setaffinity prototype is the old version + when there is actually none. +* Fix _syscall3 compilation on archs for which we do not have the + sched_setaffinity system call number. +* Fix AIX binding. +* Fix libraries dependencies: now only lstopo depends on libtermcap, fix + binutils-gold link +* Have make check always build and run hwloc-hello.c +* Do not limit size of a cpuset. + + +Version 0.9.2 +------------- + +* Trivial documentation changes. + + +Version 0.9.1 +------------- + +* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the + BSD license. +* The prefix of all functions and tools is now hwloc, and some public + functions were also renamed for real. +* Group NUMA nodes into Misc objects according to their physical distance + that may be reported by the OS/BIOS. + May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment. +* Ignore offline CPUs on Solaris. +* Improved binding support on AIX. +* Add HP-UX support. +* CPU sets are now allocated/freed dynamically. +* Add command line options to tune the lstopo graphical output, add + semi-graphical textual output +* Extend topobind to support multiple cpusets or objects on the command + line as topomask does. +* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve + the physical location of IB devices. + + +Version 0.9 (libtopology) +------------------------- + +* First release. diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README new file mode 100644 index 000000000..5567b4d14 --- /dev/null +++ b/src/3rdparty/hwloc/README @@ -0,0 +1,85 @@ +Introduction + +The Hardware Locality (hwloc) software project aims at easing the process of +discovering hardware resources in parallel architectures. It offers +command-line tools and a C API for consulting these resources, their locality, +attributes, and interconnection. hwloc primarily aims at helping +high-performance computing (HPC) applications, but is also applicable to any +project seeking to exploit code and/or data locality on modern computing +platforms. + +hwloc is actually made of two subprojects distributed together: + + * The original hwloc project for describing the internals of computing nodes. + It is described in details starting at section Hardware Locality (hwloc) + Introduction. + * The network-oriented companion called netloc (Network Locality), described + in details starting with section Network Locality (netloc). + +See also the Related pages tab above for links to other sections. + +Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc +APIs are documented after these sections. + +Installation + +hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD +license. It is hosted as a sub-project of the overall Open MPI project (http:// +www.open-mpi.org/). Note that hwloc does not require any functionality from +Open MPI -- it is a wholly separate (and much smaller!) project and code base. +It just happens to be hosted as part of the overall Open MPI project. + +Basic Installation + +Installation is the fairly common GNU-based process: + +shell$ ./configure --prefix=... +shell$ make +shell$ make install + +hwloc- and netloc-specific configure options and requirements are documented in +sections hwloc Installation and Netloc Installation respectively. + +Also note that if you install supplemental libraries in non-standard locations, +hwloc's configure script may not be able to find them without some help. You +may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on +the configure command line. + +For example, if libpciaccess was installed into /opt/pciaccess, hwloc's +configure script may not find it be default. Try adding PKG_CONFIG_PATH to the +./configure command line, like this: + +./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... + +Running the "lstopo" tool is a good way to check as a graphical output whether +hwloc properly detected the architecture of your node. Netloc command-line +tools can be used to display the network topology interconnecting your nodes. + +Installing from a Git clone + +Additionally, the code can be directly cloned from Git: + +shell$ git clone https://github.com/open-mpi/hwloc.git +shell$ cd hwloc +shell$ ./autogen.sh + +Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required +when building from a Git clone. + +Nightly development snapshots are available on the web site, they can be +configured and built without any need for Git or GNU Autotools. + +Questions and Bugs + +Bugs should be reported in the tracker (https://github.com/open-mpi/hwloc/ +issues). Opening a new issue automatically displays lots of hints about how to +debug and report issues. + +Questions may be sent to the users or developers mailing lists (http:// +www.open-mpi.org/community/lists/hwloc.php). + +There is also a #hwloc IRC channel on Freenode (irc.freenode.net). + + + +See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation. diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION new file mode 100644 index 000000000..5ebc6bb47 --- /dev/null +++ b/src/3rdparty/hwloc/VERSION @@ -0,0 +1,47 @@ +# This is the VERSION file for hwloc, describing the precise version +# of hwloc in this distribution. The various components of the version +# number below are combined to form a single version number string. + +# major, minor, and release are generally combined in the form +# <major>.<minor>.<release>. If release is zero, then it is omitted. + +# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. + +major=2 +minor=0 +release=4 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 (beta +# release 1), sc2005 (Super Computing 2005 release). The only +# requirement is that it must be entirely printable ASCII characters +# and have no white space. + +greek= + +# The date when this release was created + +date="Jun 03, 2019" + +# If snapshot=1, then use the value from snapshot_version as the +# entire hwloc version (i.e., ignore major, minor, release, and +# greek). This is only set to 1 when making snapshot tarballs. +snapshot=0 +snapshot_version=${major}.${minor}.${release}${greek}-git + +# The shared library version of hwloc's public library. This version +# is maintained in accordance with the "Library Interface Versions" +# chapter from the GNU Libtool documentation. Notes: + +# 1. Since version numbers are associated with *releases*, the version +# number maintained on the hwloc git master (and developer branches) +# is always 0:0:0. + +# 2. Version numbers are described in the Libtool current:revision:age +# format. + +libhwloc_so_version=15:3:0 +libnetloc_so_version=0:0:0 + +# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h new file mode 100644 index 000000000..ee6da6fd1 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -0,0 +1,2270 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/*===================================================================== + * PLEASE GO READ THE DOCUMENTATION! + * ------------------------------------------------ + * $tarball_directory/doc/doxygen-doc/ + * or + * http://www.open-mpi.org/projects/hwloc/doc/ + *===================================================================== + * + * FAIR WARNING: Do NOT expect to be able to figure out all the + * subtleties of hwloc by simply reading function prototypes and + * constant descrptions here in this file. + * + * Hwloc has wonderful documentation in both PDF and HTML formats for + * your reading pleasure. The formal documentation explains a LOT of + * hwloc-specific concepts, provides definitions, and discusses the + * "big picture" for many of the things that you'll find here in this + * header file. + * + * The PDF/HTML documentation was generated via Doxygen; much of what + * you'll see in there is also here in this file. BUT THERE IS A LOT + * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h! + * + * There are entire paragraph-length descriptions, discussions, and + * pretty prictures to explain subtle corner cases, provide concrete + * examples, etc. + * + * Please, go read the documentation. :-) + * + * Moreover there are several examples of hwloc use under doc/examples + * in the source tree. + * + *=====================================================================*/ + +/** \file + * \brief The hwloc API. + * + * See hwloc/bitmap.h for bitmap specific macros. + * See hwloc/helper.h for high-level topology traversal helpers. + * See hwloc/inlines.h for the actual inline code of some functions below. + * See hwloc/export.h for exporting topologies to XML or to synthetic descriptions. + * See hwloc/distances.h for querying and modifying distances between objects. + * See hwloc/diff.h for manipulating differences between similar topologies. + */ + +#ifndef HWLOC_H +#define HWLOC_H + +#include <hwloc/autogen/config.h> +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> + +/* + * Symbol transforms + */ +#include <hwloc/rename.h> + +/* + * Bitmap definitions + */ + +#include <hwloc/bitmap.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_api_version API version + * @{ + */ + +/** \brief Indicate at build time which hwloc API version is being used. + * + * This number is updated to (X<<16)+(Y<<8)+Z when a new release X.Y.Z + * actually modifies the API. + * + * Users may check for available features at build time using this number + * (see \ref faq_upgrade). + * + * \note This should not be confused with HWLOC_VERSION, the library version. + * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION + * even if their HWLOC_VERSION are different. + */ +#define HWLOC_API_VERSION 0x00020000 + +/** \brief Indicate at runtime which hwloc API version was used at build time. + * + * Should be ::HWLOC_API_VERSION if running on the same version. + */ +HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); + +/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ +#define HWLOC_COMPONENT_ABI 5 + +/** @} */ + + + +/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t) + * + * Hwloc uses bitmaps to represent two distinct kinds of object sets: + * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t). + * These types are both typedefs to a common back end type + * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions + * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see + * \ref hwlocality_bitmap). + * + * The rationale for having two different types is that even though + * the actions one wants to perform on these types are the same (e.g., + * enable and disable individual items in the set/mask), they're used + * in very different contexts: one for specifying which processors to + * use and one for specifying which NUMA nodes to use. Hence, the + * name difference is really just to reflect the intent of where the + * type is used. + * + * @{ + */ + +/** \brief A CPU set is a bitmap whose bits are set according to CPU + * physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * + * Each bit may be converted into a PU object using + * hwloc_get_pu_obj_by_os_index(). + */ +typedef hwloc_bitmap_t hwloc_cpuset_t; +/** \brief A non-modifiable ::hwloc_cpuset_t. */ +typedef hwloc_const_bitmap_t hwloc_const_cpuset_t; + +/** \brief A node set is a bitmap whose bits are set according to NUMA + * memory node physical OS indexes. + * + * It may be consulted and modified with the bitmap API as any + * ::hwloc_bitmap_t (see hwloc/bitmap.h). + * Each bit may be converted into a NUMA node object using + * hwloc_get_numanode_obj_by_os_index(). + * + * When binding memory on a system without any NUMA node, + * the single main memory bank is considered as NUMA node #0. + * + * See also \ref hwlocality_helper_nodeset_convert. + */ +typedef hwloc_bitmap_t hwloc_nodeset_t; +/** \brief A non-modifiable ::hwloc_nodeset_t. + */ +typedef hwloc_const_bitmap_t hwloc_const_nodeset_t; + +/** @} */ + + + +/** \defgroup hwlocality_object_types Object Types + * @{ + */ + +/** \brief Type of topology object. + * + * \note Do not rely on the ordering or completeness of the values as new ones + * may be defined in the future! If you need to compare types, use + * hwloc_compare_types() instead. + */ +#define HWLOC_OBJ_TYPE_MIN HWLOC_OBJ_MACHINE /**< \private Sentinel value */ +typedef enum { + HWLOC_OBJ_MACHINE, /**< \brief Machine. + * A set of processors and memory with cache + * coherency. + * + * This type is always used for the root object of a topology, + * and never used anywhere else. + * Hence its parent is always \c NULL. + */ + + HWLOC_OBJ_PACKAGE, /**< \brief Physical package. + * The physical package that usually gets inserted + * into a socket on the motherboard. + * A processor package usually contains multiple cores. + */ + HWLOC_OBJ_CORE, /**< \brief Core. + * A computation unit (may be shared by several + * logical processors). + */ + HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. + * An execution unit (may share a core with some + * other logical processors, e.g. in the case of + * an SMT core). + * + * This is the smallest object representing CPU resources, + * it cannot have any child except Misc objects. + * + * Objects of this kind are always reported and can + * thus be used as fallback when others are not. + */ + + HWLOC_OBJ_L1CACHE, /**< \brief Level 1 Data (or Unified) Cache. */ + HWLOC_OBJ_L2CACHE, /**< \brief Level 2 Data (or Unified) Cache. */ + HWLOC_OBJ_L3CACHE, /**< \brief Level 3 Data (or Unified) Cache. */ + HWLOC_OBJ_L4CACHE, /**< \brief Level 4 Data (or Unified) Cache. */ + HWLOC_OBJ_L5CACHE, /**< \brief Level 5 Data (or Unified) Cache. */ + + HWLOC_OBJ_L1ICACHE, /**< \brief Level 1 instruction Cache (filtered out by default). */ + HWLOC_OBJ_L2ICACHE, /**< \brief Level 2 instruction Cache (filtered out by default). */ + HWLOC_OBJ_L3ICACHE, /**< \brief Level 3 instruction Cache (filtered out by default). */ + + HWLOC_OBJ_GROUP, /**< \brief Group objects. + * Objects which do not fit in the above but are + * detected by hwloc and are useful to take into + * account for affinity. For instance, some operating systems + * expose their arbitrary processors aggregation this + * way. And hwloc may insert such objects to group + * NUMA nodes according to their distances. + * See also \ref faq_groups. + * + * These objects are removed when they do not bring + * any structure (see ::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + */ + + HWLOC_OBJ_NUMANODE, /**< \brief NUMA node. + * An object that contains memory that is directly + * and byte-accessible to the host processors. + * It is usually close to some cores (the corresponding objects + * are descendants of the NUMA node object in the hwloc tree). + * + * There is always at least one such object in the topology + * even if the machine is not NUMA. + * + * Memory objects are not listed in the main children list, + * but rather in the dedicated Memory children list. + * + * NUMA nodes have a special depth ::HWLOC_TYPE_DEPTH_NUMANODE + * instead of a normal depth just like other objects in the + * main tree. + */ + + HWLOC_OBJ_BRIDGE, /**< \brief Bridge (filtered out by default). + * Any bridge that connects the host or an I/O bus, + * to another I/O bus. + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + * I/O objects are not listed in the main children list, + * but rather in the dedicated io children list. + * I/O objects have NULL CPU and node sets. + */ + HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device (filtered out by default). + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + * I/O objects are not listed in the main children list, + * but rather in the dedicated io children list. + * I/O objects have NULL CPU and node sets. + */ + HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device (filtered out by default). + * They are not added to the topology unless I/O discovery + * is enabled with hwloc_topology_set_flags(). + * I/O objects are not listed in the main children list, + * but rather in the dedicated io children list. + * I/O objects have NULL CPU and node sets. + */ + + HWLOC_OBJ_MISC, /**< \brief Miscellaneous objects (filtered out by default). + * Objects without particular meaning, that can e.g. be + * added by the application for its own use, or by hwloc + * for miscellaneous objects such as MemoryModule (DIMMs). + * These objects are not listed in the main children list, + * but rather in the dedicated misc children list. + * Misc objects may only have Misc objects as children, + * and those are in the dedicated misc children list as well. + * Misc objects have NULL CPU and node sets. + */ + + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ +} hwloc_obj_type_t; + +/** \brief Cache type. */ +typedef enum hwloc_obj_cache_type_e { + HWLOC_OBJ_CACHE_UNIFIED, /**< \brief Unified cache. */ + HWLOC_OBJ_CACHE_DATA, /**< \brief Data cache. */ + HWLOC_OBJ_CACHE_INSTRUCTION /**< \brief Instruction cache (filtered out by default). */ +} hwloc_obj_cache_type_t; + +/** \brief Type of one side (upstream or downstream) of an I/O bridge. */ +typedef enum hwloc_obj_bridge_type_e { + HWLOC_OBJ_BRIDGE_HOST, /**< \brief Host-side of a bridge, only possible upstream. */ + HWLOC_OBJ_BRIDGE_PCI /**< \brief PCI-side of a bridge. */ +} hwloc_obj_bridge_type_t; + +/** \brief Type of a OS device. */ +typedef enum hwloc_obj_osdev_type_e { + HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device. + * For instance "sda" on Linux. */ + HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device. + * For instance ":0.0" for a GL display, + * "card0" for a Linux DRM device. */ + HWLOC_OBJ_OSDEV_NETWORK, /**< \brief Operating system network device. + * For instance the "eth0" interface on Linux. */ + HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device. + * For instance the "mlx4_0" InfiniBand HCA, + * or "hfi1_0" Omni-Path interface on Linux. */ + HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. + * For instance the "dma0chan0" DMA channel on Linux. */ + HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. + * For instance "mic0" for a Xeon Phi (MIC) on Linux, + * "opencl0d0" for a OpenCL device, + * "cuda0" for a CUDA device. */ +} hwloc_obj_osdev_type_t; + +/** \brief Compare the depth of two object types + * + * Types shouldn't be compared as they are, since newer ones may be added in + * the future. This function returns less than, equal to, or greater than zero + * respectively if \p type1 objects usually include \p type2 objects, are the + * same as \p type2 objects, or are included in \p type2 objects. If the types + * can not be compared (because neither is usually contained in the other), + * ::HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always + * be compared (usually, a system contains machines which contain nodes which + * contain packages which contain caches, which contain cores, which contain + * processors). + * + * \note ::HWLOC_OBJ_PU will always be the deepest, + * while ::HWLOC_OBJ_MACHINE is always the highest. + * + * \note This does not mean that the actual topology will respect that order: + * e.g. as of today cores may also contain caches, and packages may also contain + * nodes. This is thus just to be seen as a fallback comparison method. + */ +HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const; + +enum hwloc_compare_types_e { + HWLOC_TYPE_UNORDERED = INT_MAX /**< \brief Value returned by hwloc_compare_types() when types can not be compared. \hideinitializer */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_objects Object Structure and Attributes + * @{ + */ + +union hwloc_obj_attr_u; + +/** \brief Structure of a topology object + * + * Applications must not modify any field except \p hwloc_obj.userdata. + */ +struct hwloc_obj { + /* physical information */ + hwloc_obj_type_t type; /**< \brief Type of object */ + char *subtype; /**< \brief Subtype string to better describe the type field. */ + + unsigned os_index; /**< \brief OS-provided physical index number. + * It is not guaranteed unique across the entire machine, + * except for PUs and NUMA nodes. + * Set to HWLOC_UNKNOWN_INDEX if unknown or irrelevant for this object. + */ +#define HWLOC_UNKNOWN_INDEX (unsigned)-1 + + char *name; /**< \brief Object-specific name if any. + * Mostly used for identifying OS devices and Misc objects where + * a name string is more useful than numerical indexes. + */ + + hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in NUMA nodes below this object. */ + + union hwloc_obj_attr_u *attr; /**< \brief Object type-specific Attributes, + * may be \c NULL if no attribute value was found */ + + /* global position */ + int depth; /**< \brief Vertical index in the hierarchy. + * + * For normal objects, this is the depth of the horizontal level + * that contains this object and its cousins of the same type. + * If the topology is symmetric, this is equal to the parent depth + * plus one, and also equal to the number of parent/child links + * from the root object to here. + * + * For special objects (NUMA nodes, I/O and Misc) that are not + * in the main tree, this is a special negative value that + * corresponds to their dedicated level, + * see hwloc_get_type_depth() and ::hwloc_get_type_depth_e. + * Those special values can be passed to hwloc functions such + * hwloc_get_nbobjs_by_depth() as usual. + */ + unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects, + * hence guaranteed unique across the entire machine. + * Could be a "cousin_rank" since it's the rank within the "cousin" list below + * Note that this index may change when restricting the topology + * or when inserting a group. + */ + + /* cousins are all objects of the same type (and depth) across the entire topology */ + struct hwloc_obj *next_cousin; /**< \brief Next object of same type and depth */ + struct hwloc_obj *prev_cousin; /**< \brief Previous object of same type and depth */ + + /* children of the same parent are siblings, even if they may have different type and depth */ + struct hwloc_obj *parent; /**< \brief Parent, \c NULL if root (Machine object) */ + unsigned sibling_rank; /**< \brief Index in parent's \c children[] array. Or the index in parent's Memory, I/O or Misc children list. */ + struct hwloc_obj *next_sibling; /**< \brief Next object below the same parent (inside the same list of children). */ + struct hwloc_obj *prev_sibling; /**< \brief Previous object below the same parent (inside the same list of children). */ + /** @name List and array of normal children below this object (except Memory, I/O and Misc children). */ + /**@{*/ + unsigned arity; /**< \brief Number of normal children. + * Memory, Misc and I/O children are not listed here + * but rather in their dedicated children list. + */ + struct hwloc_obj **children; /**< \brief Normal children, \c children[0 .. arity -1] */ + struct hwloc_obj *first_child; /**< \brief First normal child */ + struct hwloc_obj *last_child; /**< \brief Last normal child */ + /**@}*/ + + int symmetric_subtree; /**< \brief Set if the subtree of normal objects below this object is symmetric, + * which means all normal children and their children have identical subtrees. + * + * Memory, I/O and Misc children are ignored. + * + * If set in the topology root object, lstopo may export the topology + * as a synthetic string. + */ + + /** @name List of Memory children below this object. */ + /**@{*/ + unsigned memory_arity; /**< \brief Number of Memory children. + * These children are listed in \p memory_first_child. + */ + struct hwloc_obj *memory_first_child; /**< \brief First Memory child. + * NUMA nodes are listed here (\p memory_arity and \p memory_first_child) + * instead of in the normal children list. + * See also hwloc_obj_type_is_memory(). + */ + /**@}*/ + + /** @name List of I/O children below this object. */ + /**@{*/ + unsigned io_arity; /**< \brief Number of I/O children. + * These children are listed in \p io_first_child. + */ + struct hwloc_obj *io_first_child; /**< \brief First I/O child. + * Bridges, PCI and OS devices are listed here (\p io_arity and \p io_first_child) + * instead of in the normal children list. + * See also hwloc_obj_type_is_io(). + */ + /**@}*/ + + /** @name List of Misc children below this object. */ + /**@{*/ + unsigned misc_arity; /**< \brief Number of Misc children. + * These children are listed in \p misc_first_child. + */ + struct hwloc_obj *misc_first_child; /**< \brief First Misc child. + * Misc objects are listed here (\p misc_arity and \p misc_first_child) + * instead of in the normal children list. + */ + /**@}*/ + + /* cpusets and nodesets */ + hwloc_cpuset_t cpuset; /**< \brief CPUs covered by this object + * + * This is the set of CPUs for which there are PU objects in the topology + * under this object, i.e. which are known to be physically contained in this + * object and known how (the children path between this object and the PU + * objects). + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * some of these CPUs may not be allowed for binding, + * see hwloc_topology_get_allowed_cpuset(). + * + * \note All objects have non-NULL CPU and node sets except Misc and I/O objects. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, + * + * This may include not only the same as the cpuset field, but also some CPUs for + * which topology information is unknown or incomplete, some offlines CPUs, and + * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag + * is not set. + * Thus no corresponding PU object may be found in the topology, because the + * precise position is undefined. It is however known that it would be somewhere + * under this object. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + hwloc_nodeset_t nodeset; /**< \brief NUMA nodes covered by this object or containing this object + * + * This is the set of NUMA nodes for which there are NUMA node objects in the + * topology under or above this object, i.e. which are known to be physically + * contained in this object or containing it and known how (the children path + * between this object and the NUMA node objects). + * + * In the end, these nodes are those that are close to the current object. + * + * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, + * some of these nodes may not be allowed for allocation, + * see hwloc_topology_get_allowed_nodeset(). + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so only the first bit may be set in \p nodeset. + * + * \note All objects have non-NULL CPU and node sets except Misc and I/O objects. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object, + * + * This may include not only the same as the nodeset field, but also some NUMA + * nodes for which topology information is unknown or incomplete, some offlines + * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM + * flag is not set. + * Thus no corresponding NUMA node object may be found in the topology, because the + * precise position is undefined. It is however known that it would be + * somewhere under this object. + * + * If there are no NUMA nodes in the machine, all the memory is close to this + * object, so only the first bit is set in \p complete_nodeset. + * + * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. + */ + + struct hwloc_info_s *infos; /**< \brief Array of stringified info type=name. */ + unsigned infos_count; /**< \brief Size of infos array. */ + + /* misc */ + void *userdata; /**< \brief Application-given private data pointer, + * initialized to \c NULL, use it as you wish. + * See hwloc_topology_set_userdata_export_callback() in hwloc/export.h + * if you wish to export this field to XML. */ + + hwloc_uint64_t gp_index; /**< \brief Global persistent index. + * Generated by hwloc, unique across the topology (contrary to os_index) + * and persistent across topology changes (contrary to logical_index). + * Mostly used internally, but could also be used by application to identify objects. + */ +}; +/** + * \brief Convenience typedef; a pointer to a struct hwloc_obj. + */ +typedef struct hwloc_obj * hwloc_obj_t; + +/** \brief Object type-specific Attributes */ +union hwloc_obj_attr_u { + /** \brief NUMA node-specific Object Attributes */ + struct hwloc_numanode_attr_s { + hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */ + unsigned page_types_len; /**< \brief Size of array \p page_types */ + /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0. + * + * The array is sorted by increasing \p size fields. + * It contains \p page_types_len slots. + */ + struct hwloc_memory_page_type_s { + hwloc_uint64_t size; /**< \brief Size of pages */ + hwloc_uint64_t count; /**< \brief Number of pages of this size */ + } * page_types; + } numanode; + + /** \brief Cache-specific Object Attributes */ + struct hwloc_cache_attr_s { + hwloc_uint64_t size; /**< \brief Size of cache in bytes */ + unsigned depth; /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */ + unsigned linesize; /**< \brief Cache-line size in bytes. 0 if unknown */ + int associativity; /**< \brief Ways of associativity, + * -1 if fully associative, 0 if unknown */ + hwloc_obj_cache_type_t type; /**< \brief Cache type */ + } cache; + /** \brief Group-specific Object Attributes */ + struct hwloc_group_attr_s { + unsigned depth; /**< \brief Depth of group object. + * It may change if intermediate Group objects are added. */ + unsigned kind; /**< \brief Internally-used kind of group. */ + unsigned subkind; /**< \brief Internally-used subkind to distinguish different levels of groups with same kind */ + unsigned char dont_merge; /**< \brief Flag preventing groups from being automatically merged with identical parent or children. */ + } group; + /** \brief PCI Device specific Object Attributes */ + struct hwloc_pcidev_attr_s { + unsigned short domain; + unsigned char bus, dev, func; + unsigned short class_id; + unsigned short vendor_id, device_id, subvendor_id, subdevice_id; + unsigned char revision; + float linkspeed; /* in GB/s */ + } pcidev; + /** \brief Bridge specific Object Attribues */ + struct hwloc_bridge_attr_s { + union { + struct hwloc_pcidev_attr_s pci; + } upstream; + hwloc_obj_bridge_type_t upstream_type; + union { + struct { + unsigned short domain; + unsigned char secondary_bus, subordinate_bus; + } pci; + } downstream; + hwloc_obj_bridge_type_t downstream_type; + unsigned depth; + } bridge; + /** \brief OS Device specific Object Attributes */ + struct hwloc_osdev_attr_s { + hwloc_obj_osdev_type_t type; + } osdev; +}; + +/** \brief Object info + * + * \sa hwlocality_info_attr + */ +struct hwloc_info_s { + char *name; /**< \brief Info name */ + char *value; /**< \brief Info value */ +}; + +/** @} */ + + + +/** \defgroup hwlocality_creation Topology Creation and Destruction + * @{ + */ + +struct hwloc_topology; +/** \brief Topology context + * + * To be initialized with hwloc_topology_init() and built with hwloc_topology_load(). + */ +typedef struct hwloc_topology * hwloc_topology_t; + +/** \brief Allocate a topology context. + * + * \param[out] topologyp is assigned a pointer to the new allocated context. + * + * \return 0 on success, -1 on error. + */ +HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp); + +/** \brief Build the actual topology + * + * Build the actual topology once initialized with hwloc_topology_init() and + * tuned with \ref hwlocality_configuration and \ref hwlocality_setsource routines. + * No other routine may be called earlier using this topology context. + * + * \param topology is the topology to be loaded with objects. + * + * \return 0 on success, -1 on error. + * + * \note On failure, the topology is reinitialized. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + * + * \note This function may be called only once per topology. + * + * \note The binding of the current thread or process may temporarily change + * during this call but it will be restored before it returns. + * + * \sa hwlocality_configuration and hwlocality_setsource + */ +HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology); + +/** \brief Terminate and free a topology context + * + * \param topology is the topology to be freed + */ +HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology); + +/** \brief Duplicate a topology. + * + * The entire topology structure as well as its objects + * are duplicated into a new one. + * + * This is useful for keeping a backup while modifying a topology. + * + * \note Object userdata is not duplicated since hwloc does not know what it point to. + * The objects of both old and new topologies will point to the same userdata. + */ +HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology); + +/** \brief Verify that the topology is compatible with the current hwloc library. + * + * This is useful when using the same topology structure (in memory) + * in different libraries that may use different hwloc installations + * (for instance if one library embeds a specific version of hwloc, + * while another library uses a default system-wide hwloc installation). + * + * If all libraries/programs use the same hwloc installation, this function + * always returns success. + * + * \return \c 0 on success. + * + * \return \c -1 with \p errno set to \c EINVAL if incompatible. + * + * \note If sharing between processes with hwloc_shmem_topology_write(), + * the relevant check is already performed inside hwloc_shmem_topology_adopt(). + */ +HWLOC_DECLSPEC int hwloc_topology_abi_check(hwloc_topology_t topology); + +/** \brief Run internal checks on a topology structure + * + * The program aborts if an inconsistency is detected in the given topology. + * + * \param topology is the topology to be checked + * + * \note This routine is only useful to developers. + * + * \note The input topology should have been previously loaded with + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_levels Object levels, depths and types + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Get the depth of the hierarchical tree of objects. + * + * This is the depth of ::HWLOC_OBJ_PU objects plus one. + * + * \note NUMA nodes, I/O and Misc objects are ignored when computing + * the depth of the tree (they are placed on special levels). + */ +HWLOC_DECLSPEC int hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type. + * + * If no object of this type is present on the underlying architecture, or if + * the OS doesn't provide this kind of information, the function returns + * ::HWLOC_TYPE_DEPTH_UNKNOWN. + * + * If type is absent but a similar type is acceptable, see also + * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). + * + * If ::HWLOC_OBJ_GROUP is given, the function may return ::HWLOC_TYPE_DEPTH_MULTIPLE + * if multiple levels of Groups exist. + * + * If a NUMA node, I/O or Misc object type is given, the function returns a virtual + * value because these objects are stored in special levels that are not CPU-related. + * This virtual depth may be passed to other hwloc functions such as + * hwloc_get_obj_by_depth() but it should not be considered as an actual + * depth by the application. In particular, it should not be compared with + * any other object depth or with the entire topology depth. + * \sa hwloc_get_memory_parents_depth(). + * + * \sa hwloc_type_sscanf_as_depth() for returning the depth of objects + * whose type is given as a string. + */ +HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type); + +enum hwloc_get_type_depth_e { + HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */ + HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology (only for Groups). \hideinitializer */ + HWLOC_TYPE_DEPTH_NUMANODE = -3, /**< \brief Virtual depth for NUMA nodes. \hideinitializer */ + HWLOC_TYPE_DEPTH_BRIDGE = -4, /**< \brief Virtual depth for bridge object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_PCI_DEVICE = -5, /**< \brief Virtual depth for PCI device object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_OS_DEVICE = -6, /**< \brief Virtual depth for software device object level. \hideinitializer */ + HWLOC_TYPE_DEPTH_MISC = -7 /**< \brief Virtual depth for Misc object. \hideinitializer */ +}; + +/** \brief Return the depth of parents where memory objects are attached. + * + * Memory objects have virtual negative depths because they are not part of + * the main CPU-side hierarchy of objects. This depth should not be compared + * with other level depths. + * + * If all Memory objects are attached to Normal parents at the same depth, + * this parent depth may be compared to other as usual, for instance + * for knowing whether NUMA nodes is attached above or below Packages. + * + * \return The depth of Normal parents of all memory children + * if all these parents have the same depth. For instance the depth of + * the Package level if all NUMA nodes are attached to Package objects. + * + * \return ::HWLOC_TYPE_DEPTH_MULTIPLE if Normal parents of all + * memory children do not have the same depth. For instance if some + * NUMA nodes are attached to Packages while others are attached to + * Groups. + */ +HWLOC_DECLSPEC int hwloc_get_memory_parents_depth (hwloc_topology_t topology); + +/** \brief Returns the depth of objects of type \p type or below + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically found + * inside \p type. + * + * This function is only meaningful for normal object types. + * If a memory, I/O or Misc object type is given, the corresponding virtual + * depth is always returned (see hwloc_get_type_depth()). + * + * May return ::HWLOC_TYPE_DEPTH_MULTIPLE for ::HWLOC_OBJ_GROUP just like + * hwloc_get_type_depth(). + */ +static __hwloc_inline int +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the depth of objects of type \p type or above + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically + * containing \p type. + * + * This function is only meaningful for normal object types. + * If a memory, I/O or Misc object type is given, the corresponding virtual + * depth is always returned (see hwloc_get_type_depth()). + * + * May return ::HWLOC_TYPE_DEPTH_MULTIPLE for ::HWLOC_OBJ_GROUP just like + * hwloc_get_type_depth(). + */ +static __hwloc_inline int +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the type of objects at depth \p depth. + * + * \p depth should between 0 and hwloc_topology_get_depth()-1. + * + * \return (hwloc_obj_type_t)-1 if depth \p depth does not exist. + */ +HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, int depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level at depth \p depth. + */ +HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, int depth) __hwloc_attribute_pure; + +/** \brief Returns the width of level type \p type + * + * If no object for that type exists, 0 is returned. + * If there are several levels with objects of that type, -1 is returned. + */ +static __hwloc_inline int +hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; + +/** \brief Returns the top-object of the topology-tree. + * + * Its type is ::HWLOC_OBJ_MACHINE. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx from depth \p depth */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, int depth, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the topology object at logical index \p idx with type \p type + * + * If no object for that type exists, \c NULL is returned. + * If there are several levels with objects of that type (::HWLOC_OBJ_GROUP), + * \c NULL is returned and the caller may fallback to hwloc_get_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; + +/** \brief Returns the next object at depth \p depth. + * + * If \p prev is \c NULL, return the first object at depth \p depth. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, int depth, hwloc_obj_t prev); + +/** \brief Returns the next object of type \p type. + * + * If \p prev is \c NULL, return the first object at type \p type. If + * there are multiple or no depth for given type, return \c NULL and + * let the caller fallback to hwloc_get_next_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev); + +/** @} */ + + + +/** \defgroup hwlocality_object_strings Converting between Object Types and Attributes, and Strings + * @{ + */ + +/** \brief Return a constant stringified object type. + * + * This function is the basic way to convert a generic type into a string. + * The output string may be parsed back by hwloc_type_sscanf(). + * + * hwloc_obj_type_snprintf() may return a more precise output for a specific + * object, but it requires the caller to provide the output buffer. + */ +HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; + +/** \brief Stringify the type of a given topology object into a human-readable form. + * + * Contrary to hwloc_obj_type_string(), this function includes object-specific + * attributes (such as the Group depth, the Bridge type, or OS device type) + * in the output, and it requires the caller to provide the output buffer. + * + * The output is guaranteed to be the same for all objects of a same topology level. + * + * If \p verbose is 1, longer type names are used, e.g. L1Cache instead of L1. + * + * The output string may be parsed back by hwloc_type_sscanf(). + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, + hwloc_obj_t obj, + int verbose); + +/** \brief Stringify the attributes of a given topology object into a human-readable form. + * + * Attribute values are separated by \p separator. + * + * Only the major attributes are printed in non-verbose mode. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, + hwloc_obj_t obj, const char * __hwloc_restrict separator, + int verbose); + +/** \brief Return an object type and attributes from a type string. + * + * Convert strings such as "Package" or "L1iCache" into the corresponding types. + * Matching is case-insensitive, and only the first letters are actually + * required to match. + * + * The matched object type is set in \p typep (which cannot be \c NULL). + * + * Type-specific attributes, for instance Cache type, Cache depth, Group depth, + * Bridge type or OS Device type may be returned in \p attrp. + * Attributes that are not specified in the string (for instance "Group" + * without a depth, or "L2Cache" without a cache type) are set to -1. + * + * \p attrp is only filled if not \c NULL and if its size specified in \p attrsize + * is large enough. It should be at least as large as union hwloc_obj_attr_u. + * + * \return 0 if a type was correctly identified, otherwise -1. + * + * \note This function is guaranteed to match any string returned by + * hwloc_obj_type_string() or hwloc_obj_type_snprintf(). + * + * \note This is an extended version of the now deprecated hwloc_obj_type_sscanf(). + */ +HWLOC_DECLSPEC int hwloc_type_sscanf(const char *string, + hwloc_obj_type_t *typep, + union hwloc_obj_attr_u *attrp, size_t attrsize); + +/** \brief Return an object type and its level depth from a type string. + * + * Convert strings such as "Package" or "L1iCache" into the corresponding types + * and return in \p depthp the depth of the corresponding level in the + * topology \p topology. + * + * If no object of this type is present on the underlying architecture, + * ::HWLOC_TYPE_DEPTH_UNKNOWN is returned. + * + * If multiple such levels exist (for instance if giving Group without any depth), + * the function may return ::HWLOC_TYPE_DEPTH_MULTIPLE instead. + * + * The matched object type is set in \p typep if \p typep is non \c NULL. + * + * \note This function is similar to hwloc_type_sscanf() followed + * by hwloc_get_type_depth() but it also automatically disambiguates + * multiple group levels etc. + * + * \note This function is guaranteed to match any string returned by + * hwloc_obj_type_string() or hwloc_obj_type_snprintf(). + */ +HWLOC_DECLSPEC int hwloc_type_sscanf_as_depth(const char *string, + hwloc_obj_type_t *typep, + hwloc_topology_t topology, int *depthp); + +/** @} */ + + + +/** \defgroup hwlocality_info_attr Consulting and Adding Key-Value Info Attributes + * + * @{ + */ + +/** \brief Search the given key name in object infos and return the corresponding value. + * + * If multiple keys match the given name, only the first one is returned. + * + * \return \c NULL if no such key exists. + */ +static __hwloc_inline const char * +hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure; + +/** \brief Add the given info name and value pair to the given object. + * + * The info is appended to the existing info array even if another key + * with the same name already exists. + * + * The input strings are copied before being added in the object infos. + * + * \return \c 0 on success, \c -1 on error. + * + * \note This function may be used to enforce object colors in the lstopo + * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb" + * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details. + * + * \note If \p value contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml() in hwloc/export.h. + */ +HWLOC_DECLSPEC int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value); + +/** @} */ + + + +/** \defgroup hwlocality_cpubinding CPU binding + * + * Some operating systems only support binding threads or processes to a single PU. + * Others allow binding to larger sets such as entire Cores or Packages or + * even random sets of invididual PUs. In such operating system, the scheduler + * is free to run the task on one of these PU, then migrate it to another PU, etc. + * It is often useful to call hwloc_bitmap_singlify() on the target CPU set before + * passing it to the binding function to avoid these expensive migrations. + * See the documentation of hwloc_bitmap_singlify() for details. + * + * Some operating systems do not provide all hwloc-supported + * mechanisms to bind processes, threads, etc. + * hwloc_topology_get_support() may be used to query about the actual CPU + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1. + * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object + * processes/threads. errno is set to \c EXDEV when the requested cpuset + * can not be enforced (e.g. some systems only allow one CPU, and some + * other systems only allow one NUMA node). + * + * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable version that should be preferred over the others, + * whenever possible, is the following one which just binds the current program, + * assuming it is single-threaded: + * + * \code + * hwloc_set_cpubind(topology, set, 0), + * \endcode + * + * If the program may be multithreaded, the following one should be preferred + * to only bind the current thread: + * + * \code + * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD), + * \endcode + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note To unbind, just call the binding function with either a full cpuset or + * a cpuset equal to the system cpuset. + * + * \note On some operating systems, CPU binding may have effects on memory binding, see + * ::HWLOC_CPUBIND_NOMEMBIND + * + * \note Running lstopo \--top or hwloc-ps can be a very convenient tool to check + * how binding actually happened. + * @{ + */ + +/** \brief Process/Thread binding flags. + * + * These bit flags can be used to refine the binding policy. + * + * The default (0) is to bind the current process, assumed to be + * single-threaded, in a non-strict way. This is the most portable + * way to bind as all operating systems usually provide it. + * + * \note Not all systems support all kinds of binding. See the + * "Detailed Description" section of \ref hwlocality_cpubinding for a + * description of errors that can occur. + */ +typedef enum { + /** \brief Bind all threads of the current (possibly) multithreaded process. + * \hideinitializer */ + HWLOC_CPUBIND_PROCESS = (1<<0), + + /** \brief Bind current thread of current process. + * \hideinitializer */ + HWLOC_CPUBIND_THREAD = (1<<1), + + /** \brief Request for strict binding from the OS. + * + * By default, when the designated CPUs are all busy while other + * CPUs are idle, operating systems may execute the thread/process + * on those other CPUs instead of the designated CPUs, to let them + * progress anyway. Strict binding means that the thread/process + * will _never_ execute on other cpus than the designated CPUs, even + * when those are busy with other tasks and other CPUs are idle. + * + * \note Depending on the operating system, strict binding may not + * be possible (e.g., the OS does not implement it) or not allowed + * (e.g., for an administrative reasons), and the function will fail + * in that case. + * + * When retrieving the binding of a process, this flag checks + * whether all its threads actually have the same binding. If the + * flag is not given, the binding of each thread will be + * accumulated. + * + * \note This flag is meaningless when retrieving the binding of a + * thread. + * \hideinitializer + */ + HWLOC_CPUBIND_STRICT = (1<<2), + + /** \brief Avoid any effect on memory binding + * + * On some operating systems, some CPU binding function would also + * bind the memory on the corresponding NUMA node. It is often not + * a problem for the application, but if it is, setting this flag + * will make hwloc avoid using OS functions that would also bind + * memory. This will however reduce the support of CPU bindings, + * i.e. potentially return -1 with errno set to ENOSYS in some + * cases. + * + * This flag is only meaningful when used with functions that set + * the CPU binding. It is ignored when used with functions that get + * CPU binding information. + * \hideinitializer + */ + HWLOC_CPUBIND_NOMEMBIND = (1<<3) +} hwloc_cpubind_flags_t; + +/** \brief Bind current process or thread on cpus given in physical bitmap \p set. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); + +/** \brief Get current process or thread binding. + * + * Writes into \p set the physical cpuset which the process or thread (according to \e + * flags) was last bound to. + */ +HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Bind a process \p pid on cpus given in physical bitmap \p set. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the binding is applied to that specific thread. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags); + +/** \brief Get the current physical binding of process \p pid. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags, + * the binding for that specific thread is returned. + * + * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +#ifdef hwloc_thread_t +/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags); +#endif + +#ifdef hwloc_thread_t +/** \brief Get the current physical binding of thread \p tid. + * + * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags); +#endif + +/** \brief Get the last physical CPU where the current process or thread ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \p flags can include either ::HWLOC_CPUBIND_PROCESS or ::HWLOC_CPUBIND_THREAD to + * specify whether the query should be for the whole process (union of all CPUs + * on which all threads are running), or only the current thread. If the + * process is single-threaded, flags can be set to zero to let hwloc use + * whichever method is available on the underlying OS. + */ +HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + +/** \brief Get the last physical CPU where a process ran. + * + * The operating system may move some tasks from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note As a special case on Linux, if a tid (thread ID) is supplied + * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags, + * the last CPU location of that specific thread is returned. + * + * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags. + */ +HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + +/** @} */ + + + +/** \defgroup hwlocality_membinding Memory binding + * + * Memory binding can be done three ways: + * + * - explicit memory allocation thanks to hwloc_alloc_membind() and friends: + * the binding will have effect on the memory allocated by these functions. + * - implicit memory binding through binding policy: hwloc_set_membind() and + * friends only define the current policy of the process, which will be + * applied to the subsequent calls to malloc() and friends. + * - migration of existing memory ranges, thanks to hwloc_set_area_membind() + * and friends, which move already-allocated data. + * + * Not all operating systems support all three ways. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * + * When the requested binding operation is not available and the + * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1. + * \p errno will be set to \c ENOSYS when the system does support + * the specified action or policy + * (e.g., some systems only allow binding memory on a per-thread + * basis, whereas other systems only allow binding memory for all + * threads in a process). + * \p errno will be set to EXDEV when the requested set can not be enforced + * (e.g., some systems only allow binding memory to a single NUMA node). + * + * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well, + * or the operating system may use a slightly different operation + * (with side-effects, smaller binding set, etc.) + * when the requested operation is not exactly supported. + * + * The most portable form that should be preferred over the others + * whenever possible is as follows. + * It allocates some memory hopefully bound to the specified set. + * To do so, hwloc will possibly have to change the current memory + * binding policy in order to actually get the memory bound, if the OS + * does not provide any other way to simply allocate bound memory + * without changing the policy for all allocations. That is the + * difference with hwloc_alloc_membind(), which will never change the + * current memory binding policy. + * + * \code + * hwloc_alloc_membind_policy(topology, size, set, + * HWLOC_MEMBIND_BIND, 0); + * \endcode + * + * Each hwloc memory binding function takes a bitmap argument that + * is a CPU set by default, or a NUMA memory node set if the flag + * ::HWLOC_MEMBIND_BYNODESET is specified. + * See \ref hwlocality_object_sets and \ref hwlocality_bitmap for a + * discussion of CPU sets and NUMA memory node sets. + * It is also possible to convert between CPU set and node set using + * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset(). + * + * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes. + * Binding by nodeset should therefore be preferred whenever possible. + * + * \sa Some example codes are available under doc/examples/ in the source tree. + * + * \note On some operating systems, memory binding affects the CPU + * binding; see ::HWLOC_MEMBIND_NOCPUBIND + * @{ + */ + +/** \brief Memory binding policy. + * + * These constants can be used to choose the binding policy. Only one policy can + * be used at a time (i.e., the values cannot be OR'ed together). + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding policy support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Reset the memory allocation policy to the system default. + * Depending on the operating system, this may correspond to + * ::HWLOC_MEMBIND_FIRSTTOUCH (Linux), + * or ::HWLOC_MEMBIND_BIND (AIX, HP-UX, Solaris, Windows). + * This policy is never returned by get membind functions. + * The nodeset argument is ignored. + * \hideinitializer */ + HWLOC_MEMBIND_DEFAULT = 0, + + /** \brief Allocate each memory page individually on the local NUMA + * node of the thread that touches it. + * + * The given nodeset should usually be hwloc_topology_get_topology_nodeset() + * so that the touching thread may run and allocate on any node in the system. + * + * On AIX, if the nodeset is smaller, pages are allocated locally (if the local + * node is in the nodeset) or from a random non-local node (otherwise). + * \hideinitializer */ + HWLOC_MEMBIND_FIRSTTOUCH = 1, + + /** \brief Allocate memory on the specified nodes. + * \hideinitializer */ + HWLOC_MEMBIND_BIND = 2, + + /** \brief Allocate memory on the given nodes in an interleaved + * / round-robin manner. The precise layout of the memory across + * multiple NUMA nodes is OS/system specific. Interleaving can be + * useful when threads distributed across the specified NUMA nodes + * will all be accessing the whole memory range concurrently, since + * the interleave will then balance the memory references. + * \hideinitializer */ + HWLOC_MEMBIND_INTERLEAVE = 3, + + /** \brief For each page bound with this policy, by next time + * it is touched (and next time only), it is moved from its current + * location to the local NUMA node of the thread where the memory + * reference occurred (if it needs to be moved at all). + * \hideinitializer */ + HWLOC_MEMBIND_NEXTTOUCH = 4, + + /** \brief Returned by get_membind() functions when multiple + * threads or parts of a memory area have differing memory binding + * policies. + * Also returned when binding is unknown because binding hooks are empty + * when the topology is loaded from XML without HWLOC_THISSYSTEM=1, etc. + * \hideinitializer */ + HWLOC_MEMBIND_MIXED = -1 +} hwloc_membind_policy_t; + +/** \brief Memory binding flags. + * + * These flags can be used to refine the binding policy. + * All flags can be logically OR'ed together with the exception of + * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD; + * these two flags are mutually exclusive. + * + * Not all systems support all kinds of binding. + * hwloc_topology_get_support() may be used to query about the actual memory + * binding support in the currently used operating system. + * See the "Detailed Description" section of \ref hwlocality_membinding + * for a description of errors that can occur. + */ +typedef enum { + /** \brief Set policy for all threads of the specified (possibly + * multithreaded) process. This flag is mutually exclusive with + * ::HWLOC_MEMBIND_THREAD. + * \hideinitializer */ + HWLOC_MEMBIND_PROCESS = (1<<0), + + /** \brief Set policy for a specific thread of the current process. + * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS. + * \hideinitializer */ + HWLOC_MEMBIND_THREAD = (1<<1), + + /** Request strict binding from the OS. The function will fail if + * the binding can not be guaranteed / completely enforced. + * + * This flag has slightly different meanings depending on which + * function it is used with. + * \hideinitializer */ + HWLOC_MEMBIND_STRICT = (1<<2), + + /** \brief Migrate existing allocated memory. If the memory cannot + * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error + * will be returned. + * \hideinitializer */ + HWLOC_MEMBIND_MIGRATE = (1<<3), + + /** \brief Avoid any effect on CPU binding. + * + * On some operating systems, some underlying memory binding + * functions also bind the application to the corresponding CPU(s). + * Using this flag will cause hwloc to avoid using OS functions that + * could potentially affect CPU bindings. Note, however, that using + * NOCPUBIND may reduce hwloc's overall memory binding + * support. Specifically: some of hwloc's memory binding functions + * may fail with errno set to ENOSYS when used with NOCPUBIND. + * \hideinitializer + */ + HWLOC_MEMBIND_NOCPUBIND = (1<<4), + + /** \brief Consider the bitmap argument as a nodeset. + * + * The bitmap argument is considered a nodeset if this flag is given, + * or a cpuset otherwise by default. + * + * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes. + * Binding by nodeset should therefore be preferred whenever possible. + * \hideinitializer + */ + HWLOC_MEMBIND_BYNODESET = (1<<5) +} hwloc_membind_flags_t; + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) specified by \p set + * + * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is + * specified, the current process is assumed to be single-threaded. + * This is the most portable form as it permits hwloc to use either + * process-based OS functions or thread-based OS functions, depending + * on which are available. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread. + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the current process. Passing ::HWLOC_MEMBIND_THREAD specifies that + * the query target is the current policy and nodeset for only the + * thread invoking this function. + * + * If neither of these flags are passed (which is the most portable + * method), the process is assumed to be single threaded. This allows + * hwloc to use either process-based OS functions or thread-based OS + * functions, depending on which are available. + * + * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS + * is also specified. In this case, hwloc will check the default + * memory policies and nodesets for all threads in the process. If + * they are not identical, -1 is returned and errno is set to EXDEV. + * If they are identical, the values are returned in \p set and \p + * policy. + * + * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and + * ::HWLOC_MEMBIND_STRICT is \em not specified), the default set + * from each thread is logically OR'ed together. + * If all threads' default policies are the same, \p policy is set to + * that policy. If they are different, \p policy is set to + * ::HWLOC_MEMBIND_MIXED. + * + * In the ::HWLOC_MEMBIND_THREAD case (or when neither + * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there + * is only one set and policy; they are returned in \p set and + * \p policy, respectively. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) specified by \p set + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the default memory binding policy and physical locality of the + * specified process. + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the current memory binding policies and nodesets in + * the queried target. + * + * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query + * target is the current policies and nodesets for all the threads in + * the specified process. If ::HWLOC_MEMBIND_PROCESS is not specified + * (which is the most portable method), the process is assumed to be + * single threaded. This allows hwloc to use either process-based OS + * functions or thread-based OS functions, depending on which are + * available. + * + * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to + * this function. + * + * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default + * memory policies and nodesets for all threads in the specified + * process. If they are not identical, -1 is returned and errno is + * set to EXDEV. If they are identical, the values are returned in \p + * set and \p policy. + * + * Otherwise, \p set is set to the logical OR of all threads' + * default set. If all threads' default policies + * are the same, \p policy is set to that policy. If they are + * different, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + */ +HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) specified by \p set. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \return 0 if \p len is 0. + * \return -1 with errno set to ENOSYS if the action is not supported + * \return -1 with errno set to EXDEV if the binding cannot be enforced + */ +HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); + +/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of + * the memory identified by (\p addr, \p len ). + * + * This function has two output parameters: \p set and \p policy. + * The values returned in these parameters depend on both the \p flags + * passed in and the memory binding policies and nodesets of the pages + * in the address range. + * + * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first + * checked to see if they all have the same memory binding policy and + * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * If they are identical across all pages, the set and policy are + * returned in \p set and \p policy, respectively. + * + * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA + * node(s) containing pages in the address range is calculated. + * If all pages in the target have the same policy, it is returned in + * \p policy. Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * If any other flags are specified, -1 is returned and errno is set + * to EINVAL. + * + * If \p len is 0, -1 is returned and errno is set to EINVAL. + */ +HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); + +/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated. + * + * Fills \p set according to the NUMA nodes where the memory area pages + * are physically allocated. If no page is actually allocated yet, + * \p set may be empty. + * + * If pages spread to multiple nodes, it is not specified whether they spread + * equitably, or whether most of them are on a single node, etc. + * + * The operating system may move memory pages from one processor + * to another at any time according to their binding, + * so this function may return something that is already + * outdated. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified in \p flags, set is + * considered a nodeset. Otherwise it's a cpuset. + * + * If \p len is 0, \p set is emptied. + */ +HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, int flags); + +/** \brief Allocate some memory + * + * This is equivalent to malloc(), except that it tries to allocate + * page-aligned memory from the OS. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len); + +/** \brief Allocate some memory on NUMA memory nodes specified by \p set + * + * \return NULL with errno set to ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given + * \return NULL with errno set to ENOMEM if the memory allocation failed + * even before trying to bind. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + * + * \note The allocated memory should be freed with hwloc_free(). + */ +HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Allocate some memory on NUMA memory nodes specified by \p set + * + * This is similar to hwloc_alloc_membind_nodeset() except that it is allowed to change + * the current memory binding policy, thus providing more binding support, at + * the expense of changing the current state. + * + * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. + * Otherwise it's a cpuset. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; + +/** \brief Free memory that was previously allocated by hwloc_alloc() + * or hwloc_alloc_membind(). + */ +HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len); + +/** @} */ + + + +/** \defgroup hwlocality_setsource Changing the Source of Topology Discovery + * + * If none of the functions below is called, the default is to detect all the objects + * of the machine that the caller is allowed to access. + * + * This default behavior may also be modified through environment variables + * if the application did not modify it already. + * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML + * file as if hwloc_topology_set_xml() had been called. + * Setting HWLOC_SYNTHETIC enforces a synthetic topology as if + * hwloc_topology_set_synthetic() had been called. + * + * Finally, HWLOC_THISSYSTEM enforces the return value of + * hwloc_topology_is_thissystem(). + * + * @{ + */ + +/** \brief Change which process the topology is viewed from. + * + * On some systems, processes may have different views of the machine, for + * instance the set of allowed CPUs. By default, hwloc exposes the view from + * the current process. Calling hwloc_topology_set_pid() permits to make it + * expose the topology of the machine from the point of view of another + * process. + * + * \note \p hwloc_pid_t is \p pid_t on Unix platforms, + * and \p HANDLE on native Windows platforms. + * + * \note -1 is returned and errno is set to ENOSYS on platforms that do not + * support this feature. + */ +HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid); + +/** \brief Enable synthetic topology. + * + * Gather topology information from the given \p description, + * a space-separated string of <type:number> describing + * the object type and arity at each level. + * All types may be omitted (space-separated string of numbers) so that + * hwloc chooses all types according to usual topologies. + * See also the \ref synthetic. + * + * Setting the environment variable HWLOC_SYNTHETIC + * may also result in this behavior. + * + * If \p description was properly parsed and describes a valid topology + * configuration, this function returns 0. + * Otherwise -1 is returned and errno is set to EINVAL. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. + * + * \note On success, the synthetic component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description); + +/** \brief Enable XML-file based topology. + * + * Gather topology information from the XML file given at \p xmlpath. + * Setting the environment variable HWLOC_XMLFILE may also result in this behavior. + * This file may have been generated earlier with hwloc_topology_export_xml() in hwloc/export.h, + * or lstopo file.xml. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML file. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath); + +/** \brief Enable XML based topology using a memory buffer (instead of + * a file, as with hwloc_topology_set_xml()). + * + * Gather topology information from the XML memory buffer given at \p + * buffer and of length \p size. This buffer may have been filled + * earlier with hwloc_topology_export_xmlbuffer() in hwloc/export.h. + * + * Note that this function does not actually load topology + * information; it just tells hwloc where to load it from. You'll + * still need to invoke hwloc_topology_load() to actually load the + * topology information. + * + * \return -1 with errno set to EINVAL on failure to read the XML buffer. + * + * \note See also hwloc_topology_set_userdata_import_callback() + * for importing application-specific object userdata. + * + * \note For convenience, this backend provides empty binding hooks which just + * return success. To have hwloc still actually call OS-specific hooks, the + * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded + * file is really the underlying system. + * + * \note On success, the XML component replaces the previously enabled + * component (if any), but the topology is not actually modified until + * hwloc_topology_load(). + */ +HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); + +/** @} */ + + + +/** \defgroup hwlocality_configuration Topology Detection Configuration and Query + * + * Several functions can optionally be called between hwloc_topology_init() and + * hwloc_topology_load() to configure how the detection should be performed, + * e.g. to ignore some objects types, define a synthetic topology, etc. + * + * @{ + */ + +/** \brief Flags to be set onto a topology context before load. + * + * Flags should be given to hwloc_topology_set_flags(). + * They may also be returned by hwloc_topology_get_flags(). + */ +enum hwloc_topology_flags_e { + /** \brief Detect the whole system, ignore reservations. + * + * Gather all resources, even if some were disabled by the administrator. + * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes. + * + * When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology. + * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. + * + * When this flag is set, the actual sets of allowed PUs and NUMA nodes are given + * by hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset(). + * They may be smaller than the root object cpuset and nodeset. + * + * When this flag is not set, all existing PUs and NUMA nodes in the topology + * are allowed. hwloc_topology_get_allowed_cpuset() and hwloc_topology_get_allowed_nodeset() + * are equal to the root object cpuset and nodeset. + * + * If the current topology is exported to XML and reimported later, this flag + * should be set again in the reimported topology so that disallowed resources + * are reimported as well. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), + + /** \brief Assume that the selected backend provides the topology for the + * system on which we are running. + * + * This forces hwloc_topology_is_thissystem() to return 1, i.e. makes hwloc assume that + * the selected backend provides the topology for the system on which we are running, + * even if it is not the OS-specific backend but the XML backend for instance. + * This means making the binding functions actually call the OS-specific + * system calls and really do binding, while the XML backend would otherwise + * provide empty hooks just returning success. + * + * Setting the environment variable HWLOC_THISSYSTEM may also result in the + * same behavior. + * + * This can be used for efficiency reasons to first detect the topology once, + * save it to an XML file, and quickly reload it later through the XML + * backend, but still having binding functions actually do bind. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1), + + /** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description. + * + * If the topology was loaded from XML or from a synthetic string, + * restrict it by applying the current process restrictions such as + * Linux Cgroup/Cpuset. + * + * This is useful when the topology is not loaded directly from + * the local machine (e.g. for performance reason) and it comes + * with all resources, while the running process is restricted + * to only parts of the machine. + * + * This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is + * also set since the loaded topology must match the underlying machine + * where restrictions will be gathered from. + * + * Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES + * would result in the same behavior. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2) +}; + +/** \brief Set OR'ed flags to non-yet-loaded topology. + * + * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded. + * + * If this function is called multiple times, the last invokation will erase + * and replace the set of flags that was previously set. + * + * The flags set in a topology may be retrieved with hwloc_topology_get_flags() + */ +HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); + +/** \brief Get OR'ed flags of a topology. + * + * Get the OR'ed set of ::hwloc_topology_flags_e of a topology. + * + * \return the flags previously set with hwloc_topology_set_flags(). + */ +HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); + +/** \brief Does the topology context come from this system? + * + * \return 1 if this topology context was built using the system + * running this program. + * \return 0 instead (for instance if using another file-system root, + * a XML topology file, or a synthetic topology). + */ +HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; + +/** \brief Flags describing actual discovery support for this topology. */ +struct hwloc_topology_discovery_support { + /** \brief Detecting the number of PU objects is supported. */ + unsigned char pu; + /** \brief Detecting the number of NUMA nodes is supported. */ + unsigned char numa; + /** \brief Detecting the amount of memory in NUMA nodes is supported. */ + unsigned char numa_memory; +}; + +/** \brief Flags describing actual PU binding support for this topology. + * + * A flag may be set even if the feature isn't supported in all cases + * (e.g. binding to random sets of non-contiguous objects). + */ +struct hwloc_topology_cpubind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_cpubind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_cpubind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_cpubind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_cpubind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_cpubind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_cpubind; + /** Binding a given thread only is supported. */ + unsigned char set_thread_cpubind; + /** Getting the binding of a given thread only is supported. */ + unsigned char get_thread_cpubind; + /** Getting the last processors where the whole current process ran is supported */ + unsigned char get_thisproc_last_cpu_location; + /** Getting the last processors where a whole process ran is supported */ + unsigned char get_proc_last_cpu_location; + /** Getting the last processors where the current thread ran is supported */ + unsigned char get_thisthread_last_cpu_location; +}; + +/** \brief Flags describing actual memory binding support for this topology. + * + * A flag may be set even if the feature isn't supported in all cases + * (e.g. binding to random sets of non-contiguous objects). + */ +struct hwloc_topology_membind_support { + /** Binding the whole current process is supported. */ + unsigned char set_thisproc_membind; + /** Getting the binding of the whole current process is supported. */ + unsigned char get_thisproc_membind; + /** Binding a whole given process is supported. */ + unsigned char set_proc_membind; + /** Getting the binding of a whole given process is supported. */ + unsigned char get_proc_membind; + /** Binding the current thread only is supported. */ + unsigned char set_thisthread_membind; + /** Getting the binding of the current thread only is supported. */ + unsigned char get_thisthread_membind; + /** Binding a given memory area is supported. */ + unsigned char set_area_membind; + /** Getting the binding of a given memory area is supported. */ + unsigned char get_area_membind; + /** Allocating a bound memory area is supported. */ + unsigned char alloc_membind; + /** First-touch policy is supported. */ + unsigned char firsttouch_membind; + /** Bind policy is supported. */ + unsigned char bind_membind; + /** Interleave policy is supported. */ + unsigned char interleave_membind; + /** Next-touch migration policy is supported. */ + unsigned char nexttouch_membind; + /** Migration flags is supported. */ + unsigned char migrate_membind; + /** Getting the last NUMA nodes where a memory area was allocated is supported */ + unsigned char get_area_memlocation; +}; + +/** \brief Set of flags describing actual support for this topology. + * + * This is retrieved with hwloc_topology_get_support() and will be valid until + * the topology object is destroyed. Note: the values are correct only after + * discovery. + */ +struct hwloc_topology_support { + struct hwloc_topology_discovery_support *discovery; + struct hwloc_topology_cpubind_support *cpubind; + struct hwloc_topology_membind_support *membind; +}; + +/** \brief Retrieve the topology support. + * + * Each flag indicates whether a feature is supported. + * If set to 0, the feature is not supported. + * If set to 1, the feature is supported, but the corresponding + * call may still fail in some corner cases. + * + * These features are also listed by hwloc-info \--support + */ +HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); + +/** \brief Type filtering flags. + * + * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). + * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). + * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + * + * Note that group objects are also ignored individually (without the entire level) + * when they do not bring structure. + */ +enum hwloc_type_filter_e { + /** \brief Keep all objects of this type. + * + * Cannot be set for ::HWLOC_OBJ_GROUP (groups are designed only to add more structure to the topology). + * \hideinitializer + */ + HWLOC_TYPE_FILTER_KEEP_ALL = 0, + + /** \brief Ignore all objects of this type. + * + * The bottom-level type ::HWLOC_OBJ_PU, the ::HWLOC_OBJ_NUMANODE type, and + * the top-level type ::HWLOC_OBJ_MACHINE may not be ignored. + * \hideinitializer + */ + HWLOC_TYPE_FILTER_KEEP_NONE = 1, + + /** \brief Only ignore objects if their entire level does not bring any structure. + * + * Keep the entire level of objects if at least one of these objects adds + * structure to the topology. An object brings structure when it has multiple + * children and it is not the only child of its parent. + * + * If all objects in the level are the only child of their parent, and if none + * of them has multiple children, the entire level is removed. + * + * Cannot be set for I/O and Misc objects since the topology structure does not matter there. + * \hideinitializer + */ + HWLOC_TYPE_FILTER_KEEP_STRUCTURE = 2, + + /** \brief Only keep likely-important objects of the given type. + * + * It is only useful for I/O object types. + * For ::HWLOC_OBJ_PCI_DEVICE and ::HWLOC_OBJ_OS_DEVICE, it means that only objects + * of major/common kinds are kept (storage, network, OpenFabrics, Intel MICs, CUDA, + * OpenCL, NVML, and displays). + * Also, only OS devices directly attached on PCI (e.g. no USB) are reported. + * For ::HWLOC_OBJ_BRIDGE, it means that bridges are kept only if they have children. + * + * This flag equivalent to ::HWLOC_TYPE_FILTER_KEEP_ALL for Normal, Memory and Misc types + * since they are likely important. + * \hideinitializer + */ + HWLOC_TYPE_FILTER_KEEP_IMPORTANT = 3 +}; + +/** \brief Set the filtering for the given object type. + */ +HWLOC_DECLSPEC int hwloc_topology_set_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter); + +/** \brief Get the current filtering for the given object type. + */ +HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e *filter); + +/** \brief Set the filtering for all object types. + * + * If some types do not support this filtering, they are silently ignored. + */ +HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); + +/** \brief Set the filtering for all cache object types. + */ +HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); + +/** \brief Set the filtering for all instruction cache object types. + */ +HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); + +/** \brief Set the filtering for all I/O object types. + */ +HWLOC_DECLSPEC int hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); + +/** \brief Set the topology-specific userdata pointer. + * + * Each topology may store one application-given private data pointer. + * It is initialized to \c NULL. + * hwloc will never modify it. + * + * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy(). + * + * This pointer is not exported to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata); + +/** \brief Retrieve the topology-specific userdata pointer. + * + * Retrieve the application-given private data pointer that was + * previously set with hwloc_topology_set_userdata(). + */ +HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); + +/** @} */ + + + +/** \defgroup hwlocality_tinker Modifying a loaded Topology + * @{ + */ + +/** \brief Flags to be given to hwloc_topology_restrict(). */ +enum hwloc_restrict_flags_e { + /** \brief Remove all objects that became CPU-less. + * By default, only objects that contain no PU and no memory are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1UL<<0), + + /** \brief Move Misc objects to ancestors if their parents are removed during restriction. + * If this flag is not set, Misc objects are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1UL<<1), + + /** \brief Move I/O objects to ancestors if their parents are removed during restriction. + * If this flag is not set, I/O devices and bridges are removed when their parents are removed. + * \hideinitializer + */ + HWLOC_RESTRICT_FLAG_ADAPT_IO = (1UL<<2) +}; + +/** \brief Restrict the topology to the given CPU set. + * + * Topology \p topology is modified so as to remove all objects that + * are not included (or partially included) in the CPU set \p cpuset. + * All objects CPU and node sets are restricted accordingly. + * + * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. + * + * \note This call may not be reverted by restricting back to a larger + * cpuset. Once dropped during restriction, objects may not be brought + * back, except by loading another topology with hwloc_topology_load(). + * + * \return 0 on success. + * + * \return -1 with errno set to EINVAL if the input cpuset is invalid. + * The topology is not modified in this case. + * + * \return -1 with errno set to ENOMEM on failure to allocate internal data. + * The topology is reinitialized in this case. It should be either + * destroyed with hwloc_topology_destroy() or configured and loaded again. + */ +HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags); + +/** \brief Add a MISC object as a leaf of the topology + * + * A new MISC object will be created and inserted into the topology at the + * position given by parent. It is appended to the list of existing Misc children, + * without ever adding any intermediate hierarchy level. This is useful for + * annotating the topology without actually changing the hierarchy. + * + * \p name is supposed to be unique across all Misc objects in the topology. + * It will be duplicated to setup the new object attributes. + * + * The new leaf object will not have any \p cpuset. + * + * \return the newly-created object + * + * \return \c NULL on error. + * + * \return \c NULL if Misc objects are filtered-out of the topology (::HWLOC_TYPE_FILTER_KEEP_NONE). + * + * \note If \p name contains some non-printable characters, they will + * be dropped when exporting to XML, see hwloc_topology_export_xml() in hwloc/export.h. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object(hwloc_topology_t topology, hwloc_obj_t parent, const char *name); + +/** \brief Allocate a Group object to insert later with hwloc_topology_insert_group_object(). + * + * This function returns a new Group object. + * The caller should (at least) initialize its sets before inserting the object. + * See hwloc_topology_insert_group_object(). + * + * The \p subtype object attribute may be set to display something else + * than "Group" as the type name for this object in lstopo. + * Custom name/value info pairs may be added with hwloc_obj_add_info() after + * insertion. + * + * The \p kind group attribute should be 0. The \p subkind group attribute may + * be set to identify multiple Groups of the same level. + * + * It is recommended not to set any other object attribute before insertion, + * since the Group may get discarded during insertion. + * + * The object will be destroyed if passed to hwloc_topology_insert_group_object() + * without any set defined. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t topology); + +/** \brief Add more structure to the topology by adding an intermediate Group + * + * The caller should first allocate a new Group object with hwloc_topology_alloc_group_object(). + * Then it must setup at least one of its CPU or node sets to specify + * the final location of the Group in the topology. + * Then the object can be passed to this function for actual insertion in the topology. + * + * The group \p dont_merge attribute may be set to prevent the core from + * ever merging this object with another object hierarchically-identical. + * + * Either the cpuset or nodeset field (or both, if compatible) must be set + * to a non-empty bitmap. The complete_cpuset or complete_nodeset may be set + * instead if inserting with respect to the complete topology + * (including disallowed, offline or unknown objects). + * + * It grouping several objects, hwloc_obj_add_other_obj_sets() is an easy way + * to build the Group sets iteratively. + * + * These sets cannot be larger than the current topology, or they would get + * restricted silently. + * + * The core will setup the other sets after actual insertion. + * + * \return The inserted object if it was properly inserted. + * + * \return An existing object if the Group was discarded because the topology already + * contained an object at the same location (the Group did not add any locality information). + * Any name/info key pair set before inserting is appended to the existing object. + * + * \return \c NULL if the insertion failed because of conflicting sets in topology tree. + * + * \return \c NULL if Group objects are filtered-out of the topology (::HWLOC_TYPE_FILTER_KEEP_NONE). + * + * \return \c NULL if the object was discarded because no set was initialized in the Group + * before insert, or all of them were empty. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t topology, hwloc_obj_t group); + +/** \brief Setup object cpusets/nodesets by OR'ing another object's sets. + * + * For each defined cpuset or nodeset in \p src, allocate the corresponding set + * in \p dst and add \p src to it by OR'ing sets. + * + * This function is convenient between hwloc_topology_alloc_group_object() + * and hwloc_topology_insert_group_object(). It builds the sets of the new Group + * that will be inserted as a new intermediate parent of several objects. + */ +HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src); + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +/* high-level helpers */ +#include <hwloc/helper.h> + +/* inline code of some functions above */ +#include <hwloc/inlines.h> + +/* exporting to XML or synthetic */ +#include <hwloc/export.h> + +/* distances */ +#include <hwloc/distances.h> + +/* topology diffs */ +#include <hwloc/diff.h> + +/* deprecated headers */ +#include <hwloc/deprecated.h> + +#endif /* HWLOC_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h new file mode 100644 index 000000000..14d4481d2 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* The configuration file */ + +#ifndef HWLOC_CONFIG_H +#define HWLOC_CONFIG_H + +#define HWLOC_VERSION "2.0.4" +#define HWLOC_VERSION_MAJOR 2 +#define HWLOC_VERSION_MINOR 0 +#define HWLOC_VERSION_RELEASE 4 +#define HWLOC_VERSION_GREEK "" + +#define __hwloc_restrict +#define __hwloc_inline __inline + +#define __hwloc_attribute_unused +#define __hwloc_attribute_malloc +#define __hwloc_attribute_const +#define __hwloc_attribute_pure +#define __hwloc_attribute_deprecated +#define __hwloc_attribute_may_alias +#define __hwloc_attribute_warn_unused_result + +/* Defined to 1 if you have the `windows.h' header. */ +#define HWLOC_HAVE_WINDOWS_H 1 +#define hwloc_pid_t HANDLE +#define hwloc_thread_t HANDLE + +#include <windows.h> +#include <BaseTsd.h> +typedef DWORDLONG hwloc_uint64_t; + +#if defined( _USRDLL ) /* dynamic linkage */ +#if defined( DECLSPEC_EXPORTS ) +#define HWLOC_DECLSPEC __declspec(dllexport) +#else +#define HWLOC_DECLSPEC __declspec(dllimport) +#endif +#else /* static linkage */ +#define HWLOC_DECLSPEC +#endif + +/* Whether we need to re-define all the hwloc public symbols or not */ +#define HWLOC_SYM_TRANSFORM 0 + +/* The hwloc symbol prefix */ +#define HWLOC_SYM_PREFIX hwloc_ + +/* The hwloc symbol prefix in all caps */ +#define HWLOC_SYM_PREFIX_CAPS HWLOC_ + +#endif /* HWLOC_CONFIG_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h new file mode 100644 index 000000000..bae623c8c --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -0,0 +1,467 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief The bitmap API, for use in hwloc itself. + */ + +#ifndef HWLOC_BITMAP_H +#define HWLOC_BITMAP_H + +#include <hwloc/autogen/config.h> +#include <assert.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_bitmap The bitmap API + * + * The ::hwloc_bitmap_t type represents a set of integers (positive or null). + * A bitmap may be of infinite size (all bits are set after some point). + * A bitmap may even be full if all bits are set. + * + * Bitmaps are used by hwloc for sets of OS processors + * (which may actually be hardware threads) as by ::hwloc_cpuset_t + * (a typedef for ::hwloc_bitmap_t), or sets of NUMA memory nodes + * as ::hwloc_nodeset_t (also a typedef for ::hwloc_bitmap_t). + * Those are used for cpuset and nodeset fields in the ::hwloc_obj structure, + * see \ref hwlocality_object_sets. + * + * <em>Both CPU and node sets are always indexed by OS physical number.</em> + * However users should usually not build CPU and node sets manually + * (e.g. with hwloc_bitmap_set()). + * One should rather use existing object sets and combine them with + * hwloc_bitmap_or(), etc. + * For instance, binding the current thread on a pair of cores may be performed with: + * \code + * hwloc_obj_t core1 = ... , core2 = ... ; + * hwloc_bitmap_t set = hwloc_bitmap_alloc(); + * hwloc_bitmap_or(set, core1->cpuset, core2->cpuset); + * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD); + * hwloc_bitmap_free(set); + * \endcode + * + * \note Most functions below return an int that may be negative in case of + * error. The usual error case would be an internal failure to realloc/extend + * the storage of the bitmap (\p errno would be set to \c ENOMEM). + * + * \note Several examples of using the bitmap API are available under the + * doc/examples/ directory in the source tree. + * Regression tests such as tests/hwloc/hwloc_bitmap*.c also make intensive use + * of this API. + * @{ + */ + + +/** \brief + * Set of bits represented as an opaque pointer to an internal bitmap. + */ +typedef struct hwloc_bitmap_s * hwloc_bitmap_t; +/** \brief a non-modifiable ::hwloc_bitmap_t */ +typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t; + + +/* + * Bitmap allocation, freeing and copying. + */ + +/** \brief Allocate a new empty bitmap. + * + * \returns A valid bitmap or \c NULL. + * + * The bitmap should be freed by a corresponding call to + * hwloc_bitmap_free(). + */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc; + +/** \brief Allocate a new full bitmap. */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc; + +/** \brief Free bitmap \p bitmap. + * + * If \p bitmap is \c NULL, no operation is performed. + */ +HWLOC_DECLSPEC void hwloc_bitmap_free(hwloc_bitmap_t bitmap); + +/** \brief Duplicate bitmap \p bitmap by allocating a new bitmap and copying \p bitmap contents. + * + * If \p bitmap is \c NULL, \c NULL is returned. + */ +HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_dup(hwloc_const_bitmap_t bitmap) __hwloc_attribute_malloc; + +/** \brief Copy the contents of bitmap \p src into the already allocated bitmap \p dst */ +HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src); + + +/* + * Bitmap/String Conversion + */ + +/** \brief Stringify a bitmap. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a bitmap string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + +/** \brief Stringify a bitmap in the list format. + * + * Lists are comma-separated indexes or ranges. + * Ranges are dash separated indexes. + * The last range may not have an ending indexes if the bitmap is infinitely set. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated list string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a list string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + +/** \brief Stringify a bitmap in the taskset-specific format. + * + * The taskset command manipulates bitmap strings that contain a single + * (possible very long) hexadecimal number starting with 0x. + * + * Up to \p buflen characters may be written in buffer \p buf. + * + * If \p buflen is 0, \p buf may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); + +/** \brief Stringify a bitmap into a newly allocated taskset-specific string. + * + * \return -1 on error. + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); + +/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); + + +/* + * Building bitmaps. + */ + +/** \brief Empty the bitmap \p bitmap */ +HWLOC_DECLSPEC void hwloc_bitmap_zero(hwloc_bitmap_t bitmap); + +/** \brief Fill bitmap \p bitmap with all possible indexes (even if those objects don't exist or are otherwise unavailable) */ +HWLOC_DECLSPEC void hwloc_bitmap_fill(hwloc_bitmap_t bitmap); + +/** \brief Empty the bitmap \p bitmap and add bit \p id */ +HWLOC_DECLSPEC int hwloc_bitmap_only(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Fill the bitmap \p and clear the index \p id */ +HWLOC_DECLSPEC int hwloc_bitmap_allbut(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Setup bitmap \p bitmap from unsigned long \p mask */ +HWLOC_DECLSPEC int hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask); + +/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */ +HWLOC_DECLSPEC int hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); + + +/* + * Modifying bitmaps. + */ + +/** \brief Add index \p id in bitmap \p bitmap */ +HWLOC_DECLSPEC int hwloc_bitmap_set(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Add indexes from \p begin to \p end in bitmap \p bitmap. + * + * If \p end is \c -1, the range is infinite. + */ +HWLOC_DECLSPEC int hwloc_bitmap_set_range(hwloc_bitmap_t bitmap, unsigned begin, int end); + +/** \brief Replace \p i -th subset of bitmap \p bitmap with unsigned long \p mask */ +HWLOC_DECLSPEC int hwloc_bitmap_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask); + +/** \brief Remove index \p id from bitmap \p bitmap */ +HWLOC_DECLSPEC int hwloc_bitmap_clr(hwloc_bitmap_t bitmap, unsigned id); + +/** \brief Remove indexes from \p begin to \p end in bitmap \p bitmap. + * + * If \p end is \c -1, the range is infinite. + */ +HWLOC_DECLSPEC int hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, int end); + +/** \brief Keep a single index among those set in bitmap \p bitmap + * + * May be useful before binding so that the process does not + * have a chance of migrating between multiple logical CPUs + * in the original mask. + * Instead of running the task on any PU inside the given CPU set, + * the operating system scheduler will be forced to run it on a single + * of these PUs. + * It avoids a migration overhead and cache-line ping-pongs between PUs. + * + * \note This function is NOT meant to distribute multiple processes + * within a single CPU set. It always return the same single bit when + * called multiple times on the same input set. hwloc_distrib() may + * be used for generating CPU sets to distribute multiple tasks below + * a single multi-PU object. + * + * \note This function cannot be applied to an object set directly. It + * should be applied to a copy (which may be obtained with hwloc_bitmap_dup()). + */ +HWLOC_DECLSPEC int hwloc_bitmap_singlify(hwloc_bitmap_t bitmap); + + +/* + * Consulting bitmaps. + */ + +/** \brief Convert the beginning part of bitmap \p bitmap into unsigned long \p mask */ +HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */ +HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure; + +/** \brief Test whether index \p id is part of bitmap \p bitmap. + * + * \return 1 if the bit at index \p id is set in bitmap \p bitmap, 0 otherwise. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap is empty + * + * \return 1 if bitmap is empty, 0 otherwise. + */ +HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap is completely full + * + * \return 1 if bitmap is full, 0 otherwise. + * + * \note A full bitmap is always infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the first index (least significant bit) in bitmap \p bitmap + * + * \return -1 if no index is set in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev + * + * If \p prev is -1, the first index is returned. + * + * \return -1 if no index with higher index is set in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; + +/** \brief Compute the last index (most significant bit) in bitmap \p bitmap + * + * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the "weight" of bitmap \p bitmap (i.e., number of + * indexes that are in the bitmap). + * + * \return the number of indexes that are in the bitmap. + * + * \return -1 if \p bitmap is infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap + * + * \return -1 if no index is unset in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev + * + * If \p prev is -1, the first unset index is returned. + * + * \return -1 if no index with higher index is unset in \p bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; + +/** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap + * + * \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set. + */ +HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; + +/** \brief Loop macro iterating on bitmap \p bitmap + * + * The loop must start with hwloc_bitmap_foreach_begin() and end + * with hwloc_bitmap_foreach_end() followed by a terminating ';'. + * + * \p index is the loop variable; it should be an unsigned int. The + * first iteration will set \p index to the lowest index in the bitmap. + * Successive iterations will iterate through, in order, all remaining + * indexes set in the bitmap. To be specific: each iteration will return a + * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. + * + * The assert prevents the loop from being infinite if the bitmap is infinitely set. + * + * \hideinitializer + */ +#define hwloc_bitmap_foreach_begin(id, bitmap) \ +do { \ + assert(hwloc_bitmap_weight(bitmap) != -1); \ + for (id = hwloc_bitmap_first(bitmap); \ + (unsigned) id != (unsigned) -1; \ + id = hwloc_bitmap_next(bitmap, id)) { + +/** \brief End of loop macro iterating on a bitmap. + * + * Needs a terminating ';'. + * + * \sa hwloc_bitmap_foreach_begin() + * \hideinitializer + */ +#define hwloc_bitmap_foreach_end() \ + } \ +} while (0) + + +/* + * Combining bitmaps. + */ + +/** \brief Or bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC int hwloc_bitmap_or (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief And bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC int hwloc_bitmap_and (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief And bitmap \p bitmap1 and the negation of \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC int hwloc_bitmap_andnot (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief Xor bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap1 or \p bitmap2 + */ +HWLOC_DECLSPEC int hwloc_bitmap_xor (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2); + +/** \brief Negate bitmap \p bitmap and store the result in bitmap \p res + * + * \p res can be the same as \p bitmap + */ +HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap); + + +/* + * Comparing bitmaps. + */ + +/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects. + * + * \return 1 if bitmaps intersect, 0 otherwise. + */ +HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap. + * + * \return 1 if \p sub_bitmap is included in \p super_bitmap, 0 otherwise. + * + * \note The empty bitmap is considered included in any other bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure; + +/** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2. + * + * \return 1 if bitmaps are equal, 0 otherwise. + */ +HWLOC_DECLSPEC int hwloc_bitmap_isequal (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their lowest index. + * + * A bitmap is considered smaller if its least significant bit is smaller. + * The empty bitmap is considered higher than anything (because its least significant bit does not exist). + * + * \return -1 if \p bitmap1 is considered smaller than \p bitmap2. + * \return 1 if \p bitmap1 is considered larger than \p bitmap2. + * + * For instance comparing binary bitmaps 0011 and 0110 returns -1 + * (hence 0011 is considered smaller than 0110) + * because least significant bit of 0011 (0001) is smaller than least significant bit of 0110 (0010). + * Comparing 01001 and 00110 would also return -1 for the same reason. + * + * \return 0 if bitmaps are considered equal, even if they are not strictly equal. + * They just need to have the same least significant bit. + * For instance, comparing binary bitmaps 0010 and 0110 returns 0 because they have the same least significant bit. + */ +HWLOC_DECLSPEC int hwloc_bitmap_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 in lexicographic order. + * + * Lexicographic comparison of bitmaps, starting for their highest indexes. + * Compare last indexes first, then second, etc. + * The empty bitmap is considered lower than anything. + * + * \return -1 if \p bitmap1 is considered smaller than \p bitmap2. + * \return 1 if \p bitmap1 is considered larger than \p bitmap2. + * \return 0 if bitmaps are equal (contrary to hwloc_bitmap_compare_first()). + * + * For instance comparing binary bitmaps 0011 and 0110 returns -1 + * (hence 0011 is considered smaller than 0110). + * Comparing 00101 and 01010 returns -1 too. + * + * \note This is different from the non-existing hwloc_bitmap_compare_last() + * which would only compare the highest index of each bitmap. + */ +HWLOC_DECLSPEC int hwloc_bitmap_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_BITMAP_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h new file mode 100644 index 000000000..77c8473e6 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -0,0 +1,220 @@ +/* + * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the CUDA Driver API. + * + * Applications that use both hwloc and the CUDA Driver API may want to + * include this file so as to get topology information for CUDA devices. + * + */ + +#ifndef HWLOC_CUDA_H +#define HWLOC_CUDA_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#include <hwloc/helper.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#endif + +#include <cuda.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API + * + * This interface offers ways to retrieve topology information about + * CUDA devices when using the CUDA Driver API. + * + * @{ + */ + +/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice. + * + * Device \p cudevice must match the local machine. + */ +static __hwloc_inline int +hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, + CUdevice cudevice, int *domain, int *bus, int *dev) +{ + CUresult cres; + +#if CUDA_VERSION >= 4000 + cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } +#else + *domain = 0; +#endif + cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } + cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice); + if (cres != CUDA_SUCCESS) { + errno = ENOSYS; + return -1; + } + + return 0; +} + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p cudevice. + * + * Return the CPU set describing the locality of the CUDA device \p cudevice. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection and the CUDA component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_cuda_get_device_osdev() + * and hwloc_cuda_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + CUdevice cudevice, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX]; + int domainid, busid, deviceid; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid)) + return -1; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc PCI device object corresponding to the + * CUDA device \p cudevice. + * + * Return the PCI device object describing the CUDA device \p cudevice. + * Return NULL if there is none. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection must be enabled in topology \p topology. + * The CUDA component is not needed in the topology. + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice) +{ + int domain, bus, dev; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev)) + return NULL; + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0); +} + +/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice. + * + * Return the hwloc OS device object that describes the given + * CUDA device \p cudevice. Return NULL if there is none. + * + * Topology \p topology and device \p cudevice must match the local machine. + * I/O devices detection and the CUDA component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_cuda_get_device_cpuset(). + * + * \note This function cannot work if PCI devices are filtered out. + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice) +{ + hwloc_obj_t osdev = NULL; + int domain, bus, dev; + + if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev)) + return NULL; + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + if (strncmp(osdev->name, "cuda", 4)) + continue; + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && (int) pcidev->attr->pcidev.domain == domain + && (int) pcidev->attr->pcidev.bus == bus + && (int) pcidev->attr->pcidev.dev == dev + && pcidev->attr->pcidev.func == 0) + return osdev; + /* if PCI are filtered out, we need a info attr to match on */ + } + + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the OS device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the CUDA component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + * + * \note This function is identical to hwloc_cudart_get_device_osdev_by_index(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("cuda", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CUDA_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h new file mode 100644 index 000000000..63c7f59c6 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -0,0 +1,177 @@ +/* + * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the CUDA Runtime API. + * + * Applications that use both hwloc and the CUDA Runtime API may want to + * include this file so as to get topology information for CUDA devices. + * + */ + +#ifndef HWLOC_CUDART_H +#define HWLOC_CUDART_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#include <hwloc/helper.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#endif + +#include <cuda.h> /* for CUDA_VERSION */ +#include <cuda_runtime_api.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API + * + * This interface offers ways to retrieve topology information about + * CUDA devices when using the CUDA Runtime API. + * + * @{ + */ + +/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx. + * + * Device index \p idx must match the local machine. + */ +static __hwloc_inline int +hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, + int idx, int *domain, int *bus, int *dev) +{ + cudaError_t cerr; + struct cudaDeviceProp prop; + + cerr = cudaGetDeviceProperties(&prop, idx); + if (cerr) { + errno = ENOSYS; + return -1; + } + +#if CUDA_VERSION >= 4000 + *domain = prop.pciDomainID; +#else + *domain = 0; +#endif + + *bus = prop.pciBusID; + *dev = prop.pciDeviceID; + + return 0; +} + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p idx. + * + * Return the CPU set describing the locality of the CUDA device + * whose index is \p idx. + * + * Topology \p topology and device \p idx must match the local machine. + * I/O devices detection and the CUDA component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_cudart_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + int idx, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX]; + int domain, bus, dev; + + if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev)) + return -1; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc PCI device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the PCI device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * Topology \p topology and device \p idx must match the local machine. + * I/O devices detection must be enabled in topology \p topology. + * The CUDA component is not needed in the topology. + */ +static __hwloc_inline hwloc_obj_t +hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx) +{ + int domain, bus, dev; + + if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev)) + return NULL; + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0); +} + +/** \brief Get the hwloc OS device object corresponding to the + * CUDA device whose index is \p idx. + * + * Return the OS device object describing the CUDA device whose + * index is \p idx. Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the CUDA component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_cudart_get_device_cpuset(). + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + * + * \note This function is identical to hwloc_cuda_get_device_osdev_by_index(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("cuda", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CUDART_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h new file mode 100644 index 000000000..8f3b1459a --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h @@ -0,0 +1,206 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** + * This file contains the inline code of functions declared in hwloc.h + */ + +#ifndef HWLOC_DEPRECATED_H +#define HWLOC_DEPRECATED_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* backward compat with v1.11 before System removal */ +#define HWLOC_OBJ_SYSTEM HWLOC_OBJ_MACHINE +/* backward compat with v1.10 before Socket->Package renaming */ +#define HWLOC_OBJ_SOCKET HWLOC_OBJ_PACKAGE +/* backward compat with v1.10 before Node->NUMANode clarification */ +#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE + +/** \brief Insert a misc object by parent. + * + * Identical to hwloc_topology_insert_misc_object(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name) __hwloc_attribute_deprecated; +static __hwloc_inline hwloc_obj_t +hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name) +{ + return hwloc_topology_insert_misc_object(topology, parent, name); +} + +/** \brief Stringify the cpuset containing a set of objects. + * + * If \p size is 0, \p string may safely be \c NULL. + * + * \return the number of character that were actually written if not truncating, + * or that would have been written (not including the ending \\0). + */ +static __hwloc_inline int +hwloc_obj_cpuset_snprintf(char *str, size_t size, size_t nobj, struct hwloc_obj * const *objs) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_obj_cpuset_snprintf(char *str, size_t size, size_t nobj, struct hwloc_obj * const *objs) +{ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + int res; + unsigned i; + + hwloc_bitmap_zero(set); + for(i=0; i<nobj; i++) + if (objs[i]->cpuset) + hwloc_bitmap_or(set, set, objs[i]->cpuset); + + res = hwloc_bitmap_snprintf(str, size, set); + hwloc_bitmap_free(set); + return res; +} + +/** \brief Convert a type string into a type and some attributes. + * + * Deprecated by hwloc_type_sscanf() + */ +static __hwloc_inline int +hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthattrp, void *typeattrp, size_t typeattrsize) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthattrp, void *typeattrp, size_t typeattrsize) +{ + union hwloc_obj_attr_u attr; + int err = hwloc_type_sscanf(string, typep, &attr, sizeof(attr)); + if (err < 0) + return err; + if (hwloc_obj_type_is_cache(*typep)) { + if (depthattrp) + *depthattrp = (int) attr.cache.depth; + if (typeattrp && typeattrsize >= sizeof(hwloc_obj_cache_type_t)) + memcpy(typeattrp, &attr.cache.type, sizeof(hwloc_obj_cache_type_t)); + } else if (*typep == HWLOC_OBJ_GROUP) { + if (depthattrp) + *depthattrp = (int) attr.group.depth; + } + return 0; +} + +/** \brief Set the default memory binding policy of the current + * process or thread to prefer the NUMA node(s) specified by physical \p nodeset + */ +static __hwloc_inline int +hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_set_membind(topology, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Query the default memory binding policy and physical locality of the + * current process or thread. + */ +static __hwloc_inline int +hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + return hwloc_get_membind(topology, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Set the default memory binding policy of the specified + * process to prefer the NUMA node(s) specified by physical \p nodeset + */ +static __hwloc_inline int +hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_set_proc_membind(topology, pid, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Query the default memory binding policy and physical locality of the + * specified process. + */ +static __hwloc_inline int +hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + return hwloc_get_proc_membind(topology, pid, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Bind the already-allocated memory identified by (addr, len) + * to the NUMA node(s) in physical \p nodeset. + */ +static __hwloc_inline int +hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_set_area_membind(topology, addr, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Query the physical NUMA node(s) and binding policy of the memory + * identified by (\p addr, \p len ). + */ +static __hwloc_inline int +hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated; +static __hwloc_inline int +hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + return hwloc_get_area_membind(topology, addr, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Allocate some memory on the given physical nodeset \p nodeset + */ +static __hwloc_inline void * +hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc __hwloc_attribute_deprecated; +static __hwloc_inline void * +hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_alloc_membind(topology, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Allocate some memory on the given nodeset \p nodeset. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc __hwloc_attribute_deprecated; +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_alloc_membind_policy(topology, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET); +} + +/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases + */ +static __hwloc_inline void +hwloc_cpuset_to_nodeset_strict(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) __hwloc_attribute_deprecated; +static __hwloc_inline void +hwloc_cpuset_to_nodeset_strict(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) +{ + hwloc_cpuset_to_nodeset(topology, _cpuset, nodeset); +} + +/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases + */ +static __hwloc_inline void +hwloc_cpuset_from_nodeset_strict(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) __hwloc_attribute_deprecated; +static __hwloc_inline void +hwloc_cpuset_from_nodeset_strict(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) +{ + hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset); +} + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_DEPRECATED_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/diff.h b/src/3rdparty/hwloc/include/hwloc/diff.h new file mode 100644 index 000000000..79f2df3de --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/diff.h @@ -0,0 +1,289 @@ +/* + * Copyright © 2013-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Topology differences. + */ + +#ifndef HWLOC_DIFF_H +#define HWLOC_DIFF_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + + +/** \defgroup hwlocality_diff Topology differences + * + * Applications that manipulate many similar topologies, for instance + * one for each node of a homogeneous cluster, may want to compress + * topologies to reduce the memory footprint. + * + * This file offers a way to manipulate the difference between topologies + * and export/import it to/from XML. + * Compression may therefore be achieved by storing one topology + * entirely while the others are only described by their differences + * with the former. + * The actual topology can be reconstructed when actually needed by + * applying the precomputed difference to the reference topology. + * + * This interface targets very similar nodes. + * Only very simple differences between topologies are actually + * supported, for instance a change in the memory size, the name + * of the object, or some info attribute. + * More complex differences such as adding or removing objects cannot + * be represented in the difference structures and therefore return + * errors. + * Differences between object sets or topology-wide allowed sets, + * cannot be represented either. + * + * It means that there is no need to apply the difference when + * looking at the tree organization (how many levels, how many + * objects per level, what kind of objects, CPU and node sets, etc) + * and when binding to objects. + * However the difference must be applied when looking at object + * attributes such as the name, the memory size or info attributes. + * + * @{ + */ + + +/** \brief Type of one object attribute difference. + */ +typedef enum hwloc_topology_diff_obj_attr_type_e { + /** \brief The object local memory is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_uint64_s + * (and the index field is ignored). + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE, + + /** \brief The object name is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s + * (and the name field is ignored). + */ + + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME, + /** \brief the value of an info attribute is modified. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s. + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO +} hwloc_topology_diff_obj_attr_type_t; + +/** \brief One object attribute difference. + */ +union hwloc_topology_diff_obj_attr_u { + struct hwloc_topology_diff_obj_attr_generic_s { + /* each part of the union must start with these */ + hwloc_topology_diff_obj_attr_type_t type; + } generic; + + /** \brief Integer attribute modification with an optional index. */ + struct hwloc_topology_diff_obj_attr_uint64_s { + /* used for storing integer attributes */ + hwloc_topology_diff_obj_attr_type_t type; + hwloc_uint64_t index; /* not used for SIZE */ + hwloc_uint64_t oldvalue; + hwloc_uint64_t newvalue; + } uint64; + + /** \brief String attribute modification with an optional name */ + struct hwloc_topology_diff_obj_attr_string_s { + /* used for storing name and info pairs */ + hwloc_topology_diff_obj_attr_type_t type; + char *name; /* not used for NAME */ + char *oldvalue; + char *newvalue; + } string; +}; + + +/** \brief Type of one element of a difference list. + */ +typedef enum hwloc_topology_diff_type_e { + /** \brief An object attribute was changed. + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s. + */ + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR, + + /** \brief The difference is too complex, + * it cannot be represented. The difference below + * this object has not been checked. + * hwloc_topology_diff_build() will return 1. + * + * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s. + */ + HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX +} hwloc_topology_diff_type_t; + +/** \brief One element of a difference list between two topologies. + */ +typedef union hwloc_topology_diff_u { + struct hwloc_topology_diff_generic_s { + /* each part of the union must start with these */ + hwloc_topology_diff_type_t type; + union hwloc_topology_diff_u * next; /* pointer to the next element of the list, or NULL */ + } generic; + + /* A difference in an object attribute. */ + struct hwloc_topology_diff_obj_attr_s { + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */ + union hwloc_topology_diff_u * next; + /* List of attribute differences for a single object */ + int obj_depth; + unsigned obj_index; + union hwloc_topology_diff_obj_attr_u diff; + } obj_attr; + + /* A difference that is too complex. */ + struct hwloc_topology_diff_too_complex_s { + hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */ + union hwloc_topology_diff_u * next; + /* Where we had to stop computing the diff in the first topology */ + int obj_depth; + unsigned obj_index; + } too_complex; +} * hwloc_topology_diff_t; + + +/** \brief Compute the difference between 2 topologies. + * + * The difference is stored as a list of ::hwloc_topology_diff_t entries + * starting at \p diff. + * It is computed by doing a depth-first traversal of both topology trees + * simultaneously. + * + * If the difference between 2 objects is too complex to be represented + * (for instance if some objects have different types, or different numbers + * of children), a special diff entry of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX + * is queued. + * The computation of the diff does not continue below these objects. + * So each such diff entry means that the difference between two subtrees + * could not be computed. + * + * \return 0 if the difference can be represented properly. + * + * \return 0 with \p diff pointing to NULL if there is no difference + * between the topologies. + * + * \return 1 if the difference is too complex (see above). Some entries in + * the list will be of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX. + * + * \return -1 on any other error. + * + * \note \p flags is currently not used. It should be 0. + * + * \note The output diff has to be freed with hwloc_topology_diff_destroy(). + * + * \note The output diff can only be exported to XML or passed to + * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type + * ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed. + * + * \note The output diff may be modified by removing some entries from + * the list. The removed entries should be freed by passing them to + * to hwloc_topology_diff_destroy() (possible as another list). +*/ +HWLOC_DECLSPEC int hwloc_topology_diff_build(hwloc_topology_t topology, hwloc_topology_t newtopology, unsigned long flags, hwloc_topology_diff_t *diff); + +/** \brief Flags to be given to hwloc_topology_diff_apply(). + */ +enum hwloc_topology_diff_apply_flags_e { + /** \brief Apply topology diff in reverse direction. + * \hideinitializer + */ + HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE = (1UL<<0) +}; + +/** \brief Apply a topology diff to an existing topology. + * + * \p flags is an OR'ed set of ::hwloc_topology_diff_apply_flags_e. + * + * The new topology is modified in place. hwloc_topology_dup() + * may be used to duplicate it before patching. + * + * If the difference cannot be applied entirely, all previous applied + * elements are unapplied before returning. + * + * \return 0 on success. + * + * \return -N if applying the difference failed while trying + * to apply the N-th part of the difference. For instance -1 + * is returned if the very first difference element could not + * be applied. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags); + +/** \brief Destroy a list of topology differences. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff); + +/** \brief Load a list of topology differences from a XML file. + * + * If not \c NULL, \p refname will be filled with the identifier + * string of the reference topology for the difference file, + * if any was specified in the XML file. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * + * \note the pointer returned in refname should later be freed + * by the caller. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topology_diff_t *diff, char **refname); + +/** \brief Export a list of topology differences to a XML file. + * + * If not \c NULL, \p refname defines an identifier string + * for the reference topology which was used as a base when + * computing this difference. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * This attribute is given back when reading the diff from XML. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath); + +/** \brief Load a list of topology differences from a XML buffer. + * + * If not \c NULL, \p refname will be filled with the identifier + * string of the reference topology for the difference file, + * if any was specified in the XML file. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * + * \note the pointer returned in refname should later be freed + * by the caller. + */ +HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int buflen, hwloc_topology_diff_t *diff, char **refname); + +/** \brief Export a list of topology differences to a XML buffer. + * + * If not \c NULL, \p refname defines an identifier string + * for the reference topology which was used as a base when + * computing this difference. + * This identifier is usually the name of the other XML file + * that contains the reference topology. + * This attribute is given back when reading the diff from XML. + * + * The returned buffer ends with a \0 that is included in the returned + * length. + * + * \note The XML buffer should later be freed with hwloc_free_xmlbuffer(). + */ +HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen); + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_DIFF_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h new file mode 100644 index 000000000..d523f29fc --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -0,0 +1,271 @@ +/* + * Copyright © 2010-2019 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Object distances. + */ + +#ifndef HWLOC_DISTANCES_H +#define HWLOC_DISTANCES_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + + +/** \defgroup hwlocality_distances_get Retrieve distances between objects + * @{ + */ + +/** \brief Matrix of distances between a set of objects. + * + * This matrix often contains latencies between NUMA nodes + * (as reported in the System Locality Distance Information Table (SLIT) + * in the ACPI specification), which may or may not be physically accurate. + * It corresponds to the latency for accessing the memory of one node + * from a core in another node. + * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. + * + * The matrix may also contain bandwidths between random sets of objects, + * possibly provided by the user, as specified in the \p kind attribute. + */ +struct hwloc_distances_s { + unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */ + hwloc_obj_t *objs; /**< \brief Array of objects described by the distance matrix. + * These objects are not in any particular order, + * see hwloc_distances_obj_index() and hwloc_distances_obj_pair_values() + * for easy ways to find objects in this array and their corresponding values. + */ + unsigned long kind; /**< \brief OR'ed set of ::hwloc_distances_kind_e. */ + hwloc_uint64_t *values; /**< \brief Matrix of distances between objects, stored as a one-dimension array. + * + * Distance from i-th to j-th object is stored in slot i*nbobjs+j. + * The meaning of the value depends on the \p kind attribute. + */ +}; + +/** \brief Kinds of distance matrices. + * + * The \p kind attribute of struct hwloc_distances_s is a OR'ed set + * of kinds. + * + * A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the + * distance information comes from, if known. + * + * A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether + * values are latencies or bandwidths, if applicable. + */ +enum hwloc_distances_kind_e { + /** \brief These distances were obtained from the operating system or hardware. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_FROM_OS = (1UL<<0), + /** \brief These distances were provided by the user. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_FROM_USER = (1UL<<1), + + /** \brief Distance values are similar to latencies between objects. + * Values are smaller for closer objects, hence minimal on the diagonal + * of the matrix (distance between an object and itself). + * It could also be the number of network hops between objects, etc. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_MEANS_LATENCY = (1UL<<2), + /** \brief Distance values are similar to bandwidths between objects. + * Values are higher for closer objects, hence maximal on the diagonal + * of the matrix (distance between an object and itself). + * Such values are currently ignored for distance-based grouping. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3) +}; + +/** \brief Retrieve distance matrices. + * + * Retrieve distance matrices from the topology into the \p distances array. + * + * \p flags is currently unused, should be \c 0. + * + * \p kind serves as a filter. If \c 0, all distance matrices are returned. + * If it contains some HWLOC_DISTANCES_KIND_FROM_*, only distance matrices + * whose kind matches one of these are returned. + * If it contains some HWLOC_DISTANCES_KIND_MEANS_*, only distance matrices + * whose kind matches one of these are returned. + * + * On input, \p nr points to the number of distance matrices that may be stored + * in \p distances. + * On output, \p nr points to the number of distance matrices that were actually + * found, even if some of them couldn't be stored in \p distances. + * Distance matrices that couldn't be stored are ignored, but the function still + * returns success (\c 0). The caller may find out by comparing the value pointed + * by \p nr before and after the function call. + * + * Each distance matrix returned in the \p distances array should be released + * by the caller using hwloc_distances_release(). + */ +HWLOC_DECLSPEC int +hwloc_distances_get(hwloc_topology_t topology, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long kind, unsigned long flags); + +/** \brief Retrieve distance matrices for object at a specific depth in the topology. + * + * Identical to hwloc_distances_get() with the additional \p depth filter. + */ +HWLOC_DECLSPEC int +hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long kind, unsigned long flags); + +/** \brief Retrieve distance matrices for object of a specific type. + * + * Identical to hwloc_distances_get() with the additional \p type filter. + */ +static __hwloc_inline int +hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long kind, unsigned long flags) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + *nr = 0; + return 0; + } + return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags); +} + +/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */ +HWLOC_DECLSPEC void +hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); + +/** @} */ + + + +/** \defgroup hwlocality_distances_consult Helpers for consulting distance matrices + * @{ + */ + +/** \brief Find the index of an object in a distances structure. + * + * \return -1 if object \p obj is not involved in structure \p distances. + */ +static __hwloc_inline int +hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj) +{ + unsigned i; + for(i=0; i<distances->nbobjs; i++) + if (distances->objs[i] == obj) + return (int)i; + return -1; +} + +/** \brief Find the values between two objects in a distance matrices. + * + * The distance from \p obj1 to \p obj2 is stored in the value pointed by + * \p value1to2 and reciprocally. + * + * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances. + */ +static __hwloc_inline int +hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances, + hwloc_obj_t obj1, hwloc_obj_t obj2, + hwloc_uint64_t *value1to2, hwloc_uint64_t *value2to1) +{ + int i1 = hwloc_distances_obj_index(distances, obj1); + int i2 = hwloc_distances_obj_index(distances, obj2); + if (i1 < 0 || i2 < 0) + return -1; + *value1to2 = distances->values[i1 * distances->nbobjs + i2]; + *value2to1 = distances->values[i2 * distances->nbobjs + i1]; + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_distances_add Add or remove distances between objects + * @{ + */ + +/** \brief Flags for adding a new distances to a topology. */ +enum hwloc_distances_add_flag_e { + /** \brief Try to group objects based on the newly provided distance information. + * \hideinitializer + */ + HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0), + /** \brief If grouping, consider the distance values as inaccurate and relax the + * comparisons during the grouping algorithms. The actual accuracy may be modified + * through the HWLOC_GROUPING_ACCURACY environment variable (see \ref envvar). + * \hideinitializer + */ + HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1) +}; + +/** \brief Provide a new distance matrix. + * + * Provide the matrix of distances between a set of objects given by \p nbobjs + * and the \p objs array. \p nbobjs must be at least 2. + * The distances are stored as a one-dimension array in \p values. + * The distance from object i to object j is in slot i*nbobjs+j. + * + * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * + * \p flags configures the behavior of the function using an optional OR'ed set of + * ::hwloc_distances_add_flag_e. + * + * Objects must be of the same type. They cannot be of type Group. + */ +HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, + unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, + unsigned long kind, unsigned long flags); + +/** \brief Remove all distance matrices from a topology. + * + * Remove all distance matrices, either provided by the user or + * gathered through the OS. + * + * If these distances were used to group objects, these additional + *Group objects are not removed from the topology. + */ +HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); + +/** \brief Remove distance matrices for objects at a specific depth in the topology. + * + * Identical to hwloc_distances_remove() but only applies to one level of the topology. + */ +HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth); + +/** \brief Remove distance matrices for objects of a specific type in the topology. + * + * Identical to hwloc_distances_remove() but only applies to one level of the topology. + */ +static __hwloc_inline int +hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return 0; + return hwloc_distances_remove_by_depth(topology, depth); +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_DISTANCES_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/export.h b/src/3rdparty/hwloc/include/hwloc/export.h new file mode 100644 index 000000000..b178b77e5 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/export.h @@ -0,0 +1,278 @@ +/* + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Exporting Topologies to XML or to Synthetic strings. + */ + +#ifndef HWLOC_EXPORT_H +#define HWLOC_EXPORT_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + + +/** \defgroup hwlocality_xmlexport Exporting Topologies to XML + * @{ + */ + +/** \brief Flags for exporting XML topologies. + * + * Flags to be given as a OR'ed set to hwloc_topology_export_xml(). + */ +enum hwloc_topology_export_xml_flags_e { + /** \brief Export XML that is loadable by hwloc v1.x. + * However, the export may miss some details about the topology. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 = (1UL<<0) +}; + +/** \brief Export the topology into an XML file. + * + * This file may be loaded later through hwloc_topology_set_xml(). + * + * By default, the latest export format is used, which means older hwloc + * releases (e.g. v1.x) will not be able to import it. + * Exporting to v1.x specific XML format is possible using flag + * ::HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 but it may miss some details + * about the topology. + * If there is any chance that the exported file may ever be imported + * back by a process using hwloc 1.x, one should consider detecting + * it at runtime and using the corresponding export format. + * + * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + * + * \note If \p name is "-", the XML output is sent to the standard output. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath, unsigned long flags); + +/** \brief Export the topology into a newly-allocated XML memory buffer. + * + * \p xmlbuffer is allocated by the callee and should be freed with + * hwloc_free_xmlbuffer() later in the caller. + * + * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer(). + * + * By default, the latest export format is used, which means older hwloc + * releases (e.g. v1.x) will not be able to import it. + * Exporting to v1.x specific XML format is possible using flag + * ::HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 but it may miss some details + * about the topology. + * If there is any chance that the exported buffer may ever be imported + * back by a process using hwloc 1.x, one should consider detecting + * it at runtime and using the corresponding export format. + * + * The returned buffer ends with a \0 that is included in the returned + * length. + * + * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. + * + * \return -1 if a failure occured. + * + * \note See also hwloc_topology_set_userdata_export_callback() + * for exporting application-specific object userdata. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + * + * \note Only printable characters may be exported to XML string attributes. + * Any other character, especially any non-ASCII character, will be silently + * dropped. + */ +HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen, unsigned long flags); + +/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */ +HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer); + +/** \brief Set the application-specific callback for exporting object userdata + * + * The object userdata pointer is not exported to XML by default because hwloc + * does not know what it contains. + * + * This function lets applications set \p export_cb to a callback function + * that converts this opaque userdata into an exportable string. + * + * \p export_cb is invoked during XML export for each object whose + * \p userdata pointer is not \c NULL. + * The callback should use hwloc_export_obj_userdata() or + * hwloc_export_obj_userdata_base64() to actually export + * something to XML (possibly multiple times per object). + * + * \p export_cb may be set to \c NULL if userdata should not be exported to XML. + * + * \note The topology-specific userdata pointer is ignored when exporting to XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology, + void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj)); + +/** \brief Export some object userdata to XML + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * It may be invoked one of multiple times to export some userdata to XML. + * The \p buffer content of length \p length is stored with optional name + * \p name. + * + * When importing this XML file, the import() callback (if set) will be + * called exactly as many times as hwloc_export_obj_userdata() was called + * during export(). It will receive the corresponding \p name, \p buffer + * and \p length arguments. + * + * \p reserved, \p topology and \p obj must be the first three parameters + * that were given to the export callback. + * + * Only printable characters may be exported to XML string attributes. + * If a non-printable character is passed in \p name or \p buffer, + * the function returns -1 with errno set to EINVAL. + * + * If exporting binary data, the application should first encode into + * printable characters only (or use hwloc_export_obj_userdata_base64()). + * It should also take care of portability issues if the export may + * be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Encode and export some object userdata to XML + * + * This function is similar to hwloc_export_obj_userdata() but it encodes + * the input buffer into printable characters before exporting. + * On import, decoding is automatically performed before the data is given + * to the import() callback if any. + * + * This function may only be called from within the export() callback passed + * to hwloc_topology_set_userdata_export_callback(). + * + * The function does not take care of portability issues if the export + * may be reimported on a different architecture. + */ +HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); + +/** \brief Set the application-specific callback for importing userdata + * + * On XML import, userdata is ignored by default because hwloc does not know + * how to store it in memory. + * + * This function lets applications set \p import_cb to a callback function + * that will get the XML-stored userdata and store it in the object as expected + * by the application. + * + * \p import_cb is called during hwloc_topology_load() as many times as + * hwloc_export_obj_userdata() was called during export. The topology + * is not entirely setup yet. Object attributes are ready to consult, + * but links between objects are not. + * + * \p import_cb may be \c NULL if userdata should be ignored during import. + * + * \note \p buffer contains \p length characters followed by a null byte ('\0'). + * + * \note This function should be called before hwloc_topology_load(). + * + * \note The topology-specific userdata pointer is ignored when importing from XML. + */ +HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology, + void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length)); + +/** @} */ + + +/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic + * @{ + */ + +/** \brief Flags for exporting synthetic topologies. + * + * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic(). + */ +enum hwloc_topology_export_synthetic_flags_e { + /** \brief Export extended types such as L2dcache as basic types such as Cache. + * + * This is required if loading the synthetic description with hwloc < 1.9. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0), + + /** \brief Do not export level attributes. + * + * Ignore level attributes such as memory/cache sizes or PU indexes. + * This is required if loading the synthetic description with hwloc < 1.10. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1), + + /** \brief Export the memory hierarchy as expected in hwloc 1.x. + * + * Instead of attaching memory children to levels, export single NUMA node child + * as normal intermediate levels, when possible. + * This is required if loading the synthetic description with hwloc 1.x. + * However this may fail if some objects have multiple local NUMA nodes. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1 = (1UL<<2), + + /** \brief Do not export memory information. + * + * Only export the actual hierarchy of normal CPU-side objects and ignore + * where memory is attached. + * This is useful for when the hierarchy of CPUs is what really matters, + * but it behaves as if there was a single machine-wide NUMA node. + * \hideinitializer + */ + HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY = (1UL<<3) +}; + +/** \brief Export the topology as a synthetic string. + * + * At most \p buflen characters will be written in \p buffer, + * including the terminating \0. + * + * This exported string may be given back to hwloc_topology_set_synthetic(). + * + * \p flags is a OR'ed set of ::hwloc_topology_export_synthetic_flags_e. + * + * \return The number of characters that were written, + * not including the terminating \0. + * + * \return -1 if the topology could not be exported, + * for instance if it is not symmetric. + * + * \note I/O and Misc children are ignored, the synthetic string only + * describes normal children. + * + * \note A 1024-byte buffer should be large enough for exporting + * topologies in the vast majority of cases. + */ + HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags); + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_EXPORT_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h new file mode 100644 index 000000000..3e643fa9a --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/gl.h @@ -0,0 +1,135 @@ +/* + * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. + * Copyright © 2012-2013 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and OpenGL displays. + * + * Applications that use both hwloc and OpenGL may want to include + * this file so as to get topology information for OpenGL displays. + */ + +#ifndef HWLOC_GL_H +#define HWLOC_GL_H + +#include <hwloc.h> + +#include <stdio.h> +#include <string.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_gl Interoperability with OpenGL displays + * + * This interface offers ways to retrieve topology information about + * OpenGL displays. + * + * Only the NVIDIA display locality information is currently available, + * using the NV-CONTROL X11 extension and the NVCtrl library. + * + * @{ + */ + +/** \brief Get the hwloc OS device object corresponding to the + * OpenGL display given by port and device index. + * + * Return the OS device object describing the OpenGL display + * whose port (server) is \p port and device (screen) is \p device. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology, + unsigned port, unsigned device) +{ + unsigned x = (unsigned) -1, y = (unsigned) -1; + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && sscanf(osdev->name, ":%u.%u", &x, &y) == 2 + && port == x && device == y) + return osdev; + } + errno = EINVAL; + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the + * OpenGL display given by name. + * + * Return the OS device object describing the OpenGL display + * whose name is \p name, built as ":port.device" such as ":0.0" . + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology, + const char *name) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && !strcmp(name, osdev->name)) + return osdev; + } + errno = EINVAL; + return NULL; +} + +/** \brief Get the OpenGL display port and device corresponding + * to the given hwloc OS object. + * + * Return the OpenGL display port (server) in \p port and device (screen) + * in \p screen that correspond to the given hwloc OS device object. + * Return \c -1 if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the GL component must be enabled in the topology. + */ +static __hwloc_inline int +hwloc_gl_get_display_by_osdev(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_obj_t osdev, + unsigned *port, unsigned *device) +{ + unsigned x = -1, y = -1; + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && sscanf(osdev->name, ":%u.%u", &x, &y) == 2) { + *port = x; + *device = y; + return 0; + } + errno = EINVAL; + return -1; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_GL_H */ + diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h new file mode 100644 index 000000000..1f9ba7cdd --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -0,0 +1,125 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2013 inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and glibc scheduling routines. + * + * Applications that use both hwloc and glibc scheduling routines such as + * sched_getaffinity() or pthread_attr_setaffinity_np() may want to include + * this file so as to ease conversion between their respective types. + */ + +#ifndef HWLOC_GLIBC_SCHED_H +#define HWLOC_GLIBC_SCHED_H + +#include <hwloc.h> +#include <hwloc/helper.h> +#include <assert.h> + +#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) +#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef HWLOC_HAVE_CPU_SET + + +/** \defgroup hwlocality_glibc_sched Interoperability with glibc sched affinity + * + * This interface offers ways to convert between hwloc cpusets and glibc cpusets + * such as those manipulated by sched_getaffinity() or pthread_attr_setaffinity_np(). + * + * \note Topology \p topology must match the current machine. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p toposet into glibc sched affinity CPU set \p schedset + * + * This function may be used before calling sched_setaffinity or any other function + * that takes a cpu_set_t as input parameter. + * + * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + */ +static __hwloc_inline int +hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset, + cpu_set_t *schedset, size_t schedsetsize) +{ +#ifdef CPU_ZERO_S + unsigned cpu; + CPU_ZERO_S(schedsetsize, schedset); + hwloc_bitmap_foreach_begin(cpu, hwlocset) + CPU_SET_S(cpu, schedsetsize, schedset); + hwloc_bitmap_foreach_end(); +#else /* !CPU_ZERO_S */ + unsigned cpu; + CPU_ZERO(schedset); + assert(schedsetsize == sizeof(cpu_set_t)); + hwloc_bitmap_foreach_begin(cpu, hwlocset) + CPU_SET(cpu, schedset); + hwloc_bitmap_foreach_end(); +#endif /* !CPU_ZERO_S */ + return 0; +} + +/** \brief Convert glibc sched affinity CPU set \p schedset into hwloc CPU set + * + * This function may be used before calling sched_setaffinity or any other function + * that takes a cpu_set_t as input parameter. + * + * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + */ +static __hwloc_inline int +hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset, + const cpu_set_t *schedset, size_t schedsetsize) +{ + int cpu; +#ifdef CPU_ZERO_S + int count; +#endif + hwloc_bitmap_zero(hwlocset); +#ifdef CPU_ZERO_S + count = CPU_COUNT_S(schedsetsize, schedset); + cpu = 0; + while (count) { + if (CPU_ISSET_S(cpu, schedsetsize, schedset)) { + hwloc_bitmap_set(hwlocset, cpu); + count--; + } + cpu++; + } +#else /* !CPU_ZERO_S */ + /* sched.h does not support dynamic cpu_set_t (introduced in glibc 2.7), + * assume we have a very old interface without CPU_COUNT (added in 2.6) + */ + assert(schedsetsize == sizeof(cpu_set_t)); + for(cpu=0; cpu<CPU_SETSIZE; cpu++) + if (CPU_ISSET(cpu, schedset)) + hwloc_bitmap_set(hwlocset, cpu); +#endif /* !CPU_ZERO_S */ + return 0; +} + +/** @} */ + + +#endif /* CPU_SET */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_GLIBC_SCHED_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h new file mode 100644 index 000000000..d48df15f3 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -0,0 +1,1160 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief High-level hwloc traversal helpers. + */ + +#ifndef HWLOC_HELPER_H +#define HWLOC_HELPER_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#include <stdlib.h> +#include <errno.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set + * @{ + */ + +/** \brief Get the first largest object included in the given cpuset \p set. + * + * \return the first object that is included in \p set and whose parent is not. + * + * This is convenient for iterating over all largest objects within a CPU set + * by doing a loop getting the first largest object and clearing its CPU set + * from the remaining CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + if (!hwloc_bitmap_intersects(obj->cpuset, set)) + return NULL; + while (!hwloc_bitmap_isincluded(obj->cpuset, set)) { + /* while the object intersects without being included, look at its children */ + hwloc_obj_t child = obj->first_child; + while (child) { + if (hwloc_bitmap_intersects(child->cpuset, set)) + break; + child = child->next_sibling; + } + if (!child) + /* no child intersects, return their father */ + return obj; + /* found one intersecting child, look at its children */ + obj = child; + } + /* obj is included, return it */ + return obj; +} + +/** \brief Get the set of largest objects covering exactly a given cpuset \p set + * + * \return the number of objects returned in \p objs. + */ +HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_t * __hwloc_restrict objs, int max); + +/** \brief Return the next object at depth \p depth included in CPU set \p set. + * + * If \p prev is \c NULL, return the first object at depth \p depth + * included in \p set. The next invokation should pass the previous + * return value in \p prev so as to obtain the next object in \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next) + return NULL; + while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set))) + next = next->next_cousin; + return next; +} + +/** \brief Return the next object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_next_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev); +} + +/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, unsigned idx) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj) + return NULL; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) { + if (count == idx) + return obj; + count++; + } + obj = obj->next_cousin; + } + return NULL; +} + +/** \brief Return the \p idx -th object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx); +} + +/** \brief Return the number of objects at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth) __hwloc_attribute_pure; +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj) + return 0; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + count++; + obj = obj->next_cousin; + } + return count; +} + +/** \brief Return the number of objects of type \p type included in CPU set \p set. + * + * If no object for that type exists inside CPU set \p set, 0 is + * returned. If there are several levels with objects of that type + * inside CPU set \p set, -1 is returned. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O objects). + */ +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return (int) hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth); +} + +/** \brief Return the logical index among the objects included in CPU set \p set. + * + * Consult all objects in the same level as \p obj and inside CPU set \p set + * in the logical order, and return the index of \p obj within them. + * If \p set covers the entire topology, this is the logical index of \p obj. + * Otherwise, this is similar to a logical index within the part of the topology + * defined by CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if obj does not have CPU sets (I/O objects). + */ +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) +{ + int idx = 0; + if (!hwloc_bitmap_isincluded(obj->cpuset, set)) + return -1; + /* count how many objects are inside the cpuset on the way from us to the beginning of the level */ + while ((obj = obj->prev_cousin) != NULL) + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + idx++; + return idx; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set + * @{ + */ + +/** \brief Get the child covering at least CPU set \p set. + * + * \return \c NULL if no child matches or if \p set is empty. + * + * \note This function cannot work if parent does not have a CPU set (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) +{ + hwloc_obj_t child; + if (hwloc_bitmap_iszero(set)) + return NULL; + child = parent->first_child; + while (child) { + if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset)) + return child; + child = child->next_sibling; + } + return NULL; +} + +/** \brief Get the lowest object covering at least CPU set \p set + * + * \return \c NULL if no object matches or if \p set is empty. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + struct hwloc_obj *current = hwloc_get_root_obj(topology); + if (hwloc_bitmap_iszero(set) || !hwloc_bitmap_isincluded(set, current->cpuset)) + return NULL; + while (1) { + hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current); + if (!child) + return current; + current = child; + } +} + +/** \brief Iterate through same-depth objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object at depth \p + * depth covering at least part of CPU set \p set. The next + * invokation should pass the previous return value in \p prev so as + * to obtain the next object covering at least another part of \p set. + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next) + return NULL; + while (next && !hwloc_bitmap_intersects(set, next->cpuset)) + next = next->next_cousin; + return next; +} + +/** \brief Iterate through same-type objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object of type \p + * type covering at least part of CPU set \p set. The next invokation + * should pass the previous return value in \p prev so as to obtain + * the next object of type \p type covering at least another part of + * \p set. + * + * If there are no or multiple depths for type \p type, \c NULL is returned. + * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth() + * for each depth. + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Returns the ancestor object of \p obj at depth \p depth. + * + * \note \p depth should not be the depth of PU or NUMA objects + * since they are ancestors of no objects (except Misc or I/O). + * This function rather expects an intermediate level depth, + * such as the depth of Packages, Cores, or Caches. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj; + if (obj->depth < depth) + return NULL; + while (ancestor && ancestor->depth > depth) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the ancestor object of \p obj with type \p type. + * + * \note \p type should not be ::HWLOC_OBJ_PU or ::HWLOC_OBJ_NUMANODE + * since these objects are ancestors of no objects (except Misc or I/O). + * This function rather expects an intermediate object type, + * such as ::HWLOC_OBJ_PACKAGE, ::HWLOC_OBJ_CORE, etc. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj->parent; + while (ancestor && ancestor->type != type) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */ +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + /* the loop isn't so easy since intermediate ancestors may have + * different depth, causing us to alternate between using obj1->parent + * and obj2->parent. Also, even if at some point we find ancestors of + * of the same depth, their ancestors may have different depth again. + */ + while (obj1 != obj2) { + while (obj1->depth > obj2->depth) + obj1 = obj1->parent; + while (obj2->depth > obj1->depth) + obj2 = obj2->parent; + if (obj1 != obj2 && obj1->depth == obj2->depth) { + obj1 = obj1->parent; + obj2 = obj2->parent; + } + } + return obj1; +} + +/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root. + * + * \note This function cannot work if \p obj and \p subtree_root objects do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) +{ + return obj->cpuset && subtree_root->cpuset && hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset); +} + +/** \brief Return the next child. + * + * Return the next child among the normal children list, + * then among the memory children list, then among the I/O + * children list, then among the Misc children list. + * + * If \p prev is \c NULL, return the first child. + * + * Return \c NULL when there is no next child. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev) +{ + hwloc_obj_t obj; + int state = 0; + if (prev) { + if (prev->type == HWLOC_OBJ_MISC) + state = 3; + else if (prev->type == HWLOC_OBJ_BRIDGE || prev->type == HWLOC_OBJ_PCI_DEVICE || prev->type == HWLOC_OBJ_OS_DEVICE) + state = 2; + else if (prev->type == HWLOC_OBJ_NUMANODE) + state = 1; + obj = prev->next_sibling; + } else { + obj = parent->first_child; + } + if (!obj && state == 0) { + obj = parent->memory_first_child; + state = 1; + } + if (!obj && state == 1) { + obj = parent->io_first_child; + state = 2; + } + if (!obj && state == 2) { + obj = parent->misc_first_child; + state = 3; + } + return obj; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_types Kinds of object Type + * @{ + * + * Each object type is + * either Normal (i.e. hwloc_obj_type_is_normal() returns 1), + * or Memory (i.e. hwloc_obj_type_is_memory() returns 1) + * or I/O (i.e. hwloc_obj_type_is_io() returns 1) + * or Misc (i.e. equal to ::HWLOC_OBJ_MISC). + * It cannot be of more than one of these kinds. + */ + +/** \brief Check whether an object type is Normal. + * + * Normal objects are objects of the main CPU hierarchy + * (Machine, Package, Core, PU, CPU caches, etc.), + * but they are not NUMA nodes, I/O devices or Misc objects. + * + * They are attached to parent as Normal children, + * not as Memory, I/O or Misc children. + * + * \return 1 if an object of type \p type is a Normal object, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_normal(hwloc_obj_type_t type); + +/** \brief Check whether an object type is I/O. + * + * I/O objects are objects attached to their parents + * in the I/O children list. + * This current includes Bridges, PCI and OS devices. + * + * \return 1 if an object of type \p type is a I/O object, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_io(hwloc_obj_type_t type); + +/** \brief Check whether an object type is Memory. + * + * Memory objects are objects attached to their parents + * in the Memory children list. + * This current only includes NUMA nodes. + * + * \return 1 if an object of type \p type is a Memory object, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_memory(hwloc_obj_type_t type); + +/** \brief Check whether an object type is a Cache (Data, Unified or Instruction). + * + * \return 1 if an object of type \p type is a Cache, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_cache(hwloc_obj_type_t type); + +/** \brief Check whether an object type is a Data or Unified Cache. + * + * \return 1 if an object of type \p type is a Data or Unified Cache, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_dcache(hwloc_obj_type_t type); + +/** \brief Check whether an object type is a Instruction Cache, + * + * \return 1 if an object of type \p type is a Instruction Cache, 0 otherwise. + */ +HWLOC_DECLSPEC int +hwloc_obj_type_is_icache(hwloc_obj_type_t type); + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects + * @{ + */ + +/** \brief Find the depth of cache objects matching cache level and type. + * + * Return the depth of the topology level that contains cache objects + * whose attributes match \p cachelevel and \p cachetype. + + * This function is identical to calling hwloc_get_type_depth() with the + * corresponding type such as ::HWLOC_OBJ_L1ICACHE, except that it may + * also return a Unified cache when looking for an instruction cache. + * + * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned. + * + * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the + * unique matching unified cache level is returned. + * + * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION, + * either a matching cache, or a unified cache is returned. + * + * If \p cachetype is \c -1, it is ignored and multiple levels may + * match. The function returns either the depth of a uniquely matching + * level or ::HWLOC_TYPE_DEPTH_MULTIPLE. + */ +static __hwloc_inline int +hwloc_get_cache_type_depth (hwloc_topology_t topology, + unsigned cachelevel, hwloc_obj_cache_type_t cachetype) +{ + int depth; + int found = HWLOC_TYPE_DEPTH_UNKNOWN; + for (depth=0; ; depth++) { + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); + if (!obj) + break; + if (!hwloc_obj_type_is_dcache(obj->type) || obj->attr->cache.depth != cachelevel) + /* doesn't match, try next depth */ + continue; + if (cachetype == (hwloc_obj_cache_type_t) -1) { + if (found != HWLOC_TYPE_DEPTH_UNKNOWN) { + /* second match, return MULTIPLE */ + return HWLOC_TYPE_DEPTH_MULTIPLE; + } + /* first match, mark it as found */ + found = depth; + continue; + } + if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED) + /* exact match (either unified is alone, or we match instruction or data), return immediately */ + return depth; + } + /* went to the bottom, return what we found */ + return found; +} + +/** \brief Get the first data (or unified) cache covering a cpuset \p set + * + * \return \c NULL if no cache matches. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set); + while (current) { + if (hwloc_obj_type_is_dcache(current->type)) + return current; + current = current->parent; + } + return NULL; +} + +/** \brief Get the first data (or unified) cache shared between an object and somebody else. + * + * \return \c NULL if no cache matches or if an invalid object is given. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) +{ + hwloc_obj_t current = obj->parent; + if (!obj->cpuset) + return NULL; + while (current) { + if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset) + && hwloc_obj_type_is_dcache(current->type)) + return current; + current = current->parent; + } + return NULL; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index. + * + * This function is useful for converting a CPU set into the PU + * objects it contains. + * When retrieving the current binding (e.g. with hwloc_get_cpubind()), + * one may iterate over the bits of the resulting CPU set with + * hwloc_bitmap_foreach_begin(), and find the corresponding PUs + * with this function. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} + +/** \brief Returns the object of type ::HWLOC_OBJ_NUMANODE with \p os_index. + * + * This function is useful for converting a nodeset into the NUMA node + * objects it contains. + * When retrieving the current binding (e.g. with hwloc_get_membind() with HWLOC_MEMBIND_BYNODESET), + * one may iterate over the bits of the resulting nodeset with + * hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes + * with this function. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} + +/** \brief Do a depth-first traversal of the topology to find and sort + * + * all objects that are at the same depth than \p src. + * Report in \p objs up to \p max physically closest ones to \p src. + * + * \return the number of objects returned in \p objs. + * + * \return 0 if \p src is an I/O object. + * + * \note This function requires the \p src object to have a CPU set. + */ +/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */ +HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max); + +/** \brief Find an object below another object, both specified by types and indexes. + * + * Start from the top system object and find object of type \p type1 + * and logical index \p idx1. Then look below this object and find another + * object of type \p type2 and logical index \p idx2. Indexes are specified + * within the parent, not withing the entire system. + * + * For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2 + * is 3, return the fourth core object below the third package. + * + * \note This function requires these objects to have a CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_by_type (hwloc_topology_t topology, + hwloc_obj_type_t type1, unsigned idx1, + hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_by_type (hwloc_topology_t topology, + hwloc_obj_type_t type1, unsigned idx1, + hwloc_obj_type_t type2, unsigned idx2) +{ + hwloc_obj_t obj; + obj = hwloc_get_obj_by_type (topology, type1, idx1); + if (!obj) + return NULL; + return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2); +} + +/** \brief Find an object below a chain of objects specified by types and indexes. + * + * This is a generalized version of hwloc_get_obj_below_by_type(). + * + * Arrays \p typev and \p idxv must contain \p nr types and indexes. + * + * Start from the top system object and walk the arrays \p typev and \p idxv. + * For each type and logical index couple in the arrays, look under the previously found + * object to find the index-th object of the given type. + * Indexes are specified within the parent, not withing the entire system. + * + * For instance, if nr is 3, typev contains NODE, PACKAGE and CORE, + * and idxv contains 0, 1 and 2, return the third core object below + * the second package below the first NUMA node. + * + * \note This function requires all these objects and the root object + * to have a CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + int i; + for(i=0; i<nr; i++) { + if (!obj) + return NULL; + obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]); + } + return obj; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_distribute Distributing items over a topology + * @{ + */ + +/** \brief Flags to be given to hwloc_distrib(). + */ +enum hwloc_distrib_flags_e { + /** \brief Distrib in reverse order, starting from the last objects. + * \hideinitializer + */ + HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0) +}; + +/** \brief Distribute \p n items over the topology under \p roots + * + * Array \p set will be filled with \p n cpusets recursively distributed + * linearly over the topology under objects \p roots, down to depth \p until + * (which can be INT_MAX to distribute down to the finest level). + * + * \p n_roots is usually 1 and \p roots only contains the topology root object + * so as to distribute over the entire topology. + * + * This is typically useful when an application wants to distribute \p n + * threads over a machine, giving each of them as much private cache as + * possible and keeping them locally in number order. + * + * The caller may typically want to also call hwloc_bitmap_singlify() + * before binding a thread so that it does not move at all. + * + * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e. + * + * \note This function requires the \p roots objects to have a CPU set. + * + * \note This function replaces the now deprecated hwloc_distribute() + * and hwloc_distributev() functions. + */ +static __hwloc_inline int +hwloc_distrib(hwloc_topology_t topology, + hwloc_obj_t *roots, unsigned n_roots, + hwloc_cpuset_t *set, + unsigned n, + int until, unsigned long flags) +{ + unsigned i; + unsigned tot_weight; + unsigned given, givenweight; + hwloc_cpuset_t *cpusetp = set; + + if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) { + errno = EINVAL; + return -1; + } + + tot_weight = 0; + for (i = 0; i < n_roots; i++) + tot_weight += (unsigned) hwloc_bitmap_weight(roots[i]->cpuset); + + for (i = 0, given = 0, givenweight = 0; i < n_roots; i++) { + unsigned chunk, weight; + hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i]; + hwloc_cpuset_t cpuset = root->cpuset; + if (root->type == HWLOC_OBJ_NUMANODE) + /* NUMANodes have same cpuset as their parent, but we need normal objects below */ + root = root->parent; + weight = (unsigned) hwloc_bitmap_weight(cpuset); + if (!weight) + continue; + /* Give to root a chunk proportional to its weight. + * If previous chunks got rounded-up, we may get a bit less. */ + chunk = (( (givenweight+weight) * n + tot_weight-1) / tot_weight) + - (( givenweight * n + tot_weight-1) / tot_weight); + if (!root->arity || chunk <= 1 || root->depth >= until) { + /* We can't split any more, put everything there. */ + if (chunk) { + /* Fill cpusets with ours */ + unsigned j; + for (j=0; j < chunk; j++) + cpusetp[j] = hwloc_bitmap_dup(cpuset); + } else { + /* We got no chunk, just merge our cpuset to a previous one + * (the first chunk cannot be empty) + * so that this root doesn't get ignored. + */ + assert(given); + hwloc_bitmap_or(cpusetp[-1], cpusetp[-1], cpuset); + } + } else { + /* Still more to distribute, recurse into children */ + hwloc_distrib(topology, root->children, root->arity, cpusetp, chunk, until, flags); + } + cpusetp += chunk; + given += chunk; + givenweight += weight; + } + + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_topology_sets CPU and node sets of entire topologies + * @{ + */ + +/** \brief Get complete CPU set + * + * \return the complete CPU set of logical processors of the system. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + * + * \note This is equivalent to retrieving the root object complete CPU-set. + */ +HWLOC_DECLSPEC hwloc_const_cpuset_t +hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Get topology CPU set + * + * \return the CPU set of logical processors of the system for which hwloc + * provides topology information. This is equivalent to the cpuset of the + * system object. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + * + * \note This is equivalent to retrieving the root object complete CPU-set. + */ +HWLOC_DECLSPEC hwloc_const_cpuset_t +hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Get allowed CPU set + * + * \return the CPU set of allowed logical processors of the system. + * + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * this is identical to hwloc_topology_get_topology_cpuset(), which means + * all PUs are allowed. + * + * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * hwloc_bitmap_intersects() on the result of this function and on an object + * cpuset checks whether there are allowed PUs inside that object. + * Applying hwloc_bitmap_and() returns the list of these allowed PUs. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy. + */ +HWLOC_DECLSPEC hwloc_const_cpuset_t +hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Get complete node set + * + * \return the complete node set of memory of the system. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + * + * \note This is equivalent to retrieving the root object complete CPU-set. + */ +HWLOC_DECLSPEC hwloc_const_nodeset_t +hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Get topology node set + * + * \return the node set of memory of the system for which hwloc + * provides topology information. This is equivalent to the nodeset of the + * system object. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. + * + * \note This is equivalent to retrieving the root object complete CPU-set. + */ +HWLOC_DECLSPEC hwloc_const_nodeset_t +hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** \brief Get allowed node set + * + * \return the node set of allowed memory of the system. + * + * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was not set, + * this is identical to hwloc_topology_get_topology_nodeset(), which means + * all NUMA nodes are allowed. + * + * \note If ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM was set, applying + * hwloc_bitmap_intersects() on the result of this function and on an object + * nodeset checks whether there are allowed NUMA nodes inside that object. + * Applying hwloc_bitmap_and() returns the list of these allowed NUMA nodes. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy. + */ +HWLOC_DECLSPEC hwloc_const_nodeset_t +hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; + +/** @} */ + + + +/** \defgroup hwlocality_helper_nodeset_convert Converting between CPU sets and node sets + * + * @{ + */ + +/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases + * + * If some NUMA nodes have no CPUs at all, this function never sets their + * indexes in the output node set, even if a full CPU set is given in input. + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p cpuset is empty, \p nodeset will be emptied as well. + * Otherwise \p nodeset will be entirely filled. + */ +static __hwloc_inline int +hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj = NULL; + assert(depth != HWLOC_TYPE_DEPTH_UNKNOWN); + hwloc_bitmap_zero(nodeset); + while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL) + if (hwloc_bitmap_set(nodeset, obj->os_index) < 0) + return -1; + return 0; +} + +/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p nodeset is empty, \p cpuset will be emptied as well. + * Otherwise \p cpuset will be entirely filled. + * This is useful for manipulating memory binding sets. + */ +static __hwloc_inline int +hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t obj = NULL; + assert(depth != HWLOC_TYPE_DEPTH_UNKNOWN); + hwloc_bitmap_zero(_cpuset); + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) { + if (hwloc_bitmap_isset(nodeset, obj->os_index)) + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + if (hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset) < 0) + return -1; + } + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_advanced_io Finding I/O objects + * @{ + */ + +/** \brief Get the first non-I/O ancestor object. + * + * Given the I/O object \p ioobj, find the smallest non-I/O ancestor + * object. This object (normal or memory) may then be used for binding + * because it has non-NULL CPU and node sets + * and because its locality is the same as \p ioobj. + * + * \note The resulting object is usually a normal object but it could also + * be a memory object (e.g. NUMA node) in future platforms if I/O objects + * ever get attached to memory instead of CPUs. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_obj_t ioobj) +{ + hwloc_obj_t obj = ioobj; + while (obj && !obj->cpuset) { + obj = obj->parent; + } + return obj; +} + +/** \brief Get the next PCI device in the system. + * + * \return the first PCI device if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev); +} + +/** \brief Find the PCI device object matching the PCI bus id + * given domain, bus device and function PCI bus id. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pcidev_by_busid(hwloc_topology_t topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { + if (obj->attr->pcidev.domain == domain + && obj->attr->pcidev.bus == bus + && obj->attr->pcidev.dev == dev + && obj->attr->pcidev.func == func) + return obj; + } + return NULL; +} + +/** \brief Find the PCI device object matching the PCI bus id + * given as a string xxxx:yy:zz.t or yy:zz.t. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid) +{ + unsigned domain = 0; /* default */ + unsigned bus, dev, func; + + if (sscanf(busid, "%x:%x.%x", &bus, &dev, &func) != 3 + && sscanf(busid, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + errno = EINVAL; + return NULL; + } + + return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func); +} + +/** \brief Get the next OS device in the system. + * + * \return the first OS device if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev); +} + +/** \brief Get the next bridge in the system. + * + * \return the first bridge if \p prev is \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev) +{ + return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_BRIDGE, prev); +} + +/* \brief Checks whether a given bridge covers a given PCI bus. + */ +static __hwloc_inline int +hwloc_bridge_covers_pcibus(hwloc_obj_t bridge, + unsigned domain, unsigned bus) +{ + return bridge->type == HWLOC_OBJ_BRIDGE + && bridge->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI + && bridge->attr->bridge.downstream.pci.domain == domain + && bridge->attr->bridge.downstream.pci.secondary_bus <= bus + && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus; +} + +/** @} */ + + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_HELPER_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/inlines.h b/src/3rdparty/hwloc/include/hwloc/inlines.h new file mode 100644 index 000000000..494209ea6 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/inlines.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** + * This file contains the inline code of functions declared in hwloc.h + */ + +#ifndef HWLOC_INLINES_H +#define HWLOC_INLINES_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#include <stdlib.h> +#include <errno.h> + + +#ifdef __cplusplus +extern "C" { +#endif + +static __hwloc_inline int +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the highest existing level with type order >= */ + for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0) + return depth+1; + + /* Shouldn't ever happen, as there is always a Machine level with lower order and known depth. */ + /* abort(); */ +} + +static __hwloc_inline int +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the lowest existing level with type order <= */ + for(depth = 0; ; depth++) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0) + return depth-1; + + /* Shouldn't ever happen, as there is always a PU level with higher order and known depth. */ + /* abort(); */ +} + +static __hwloc_inline int +hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return (int) hwloc_get_nbobjs_by_depth(topology, depth); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_by_depth(topology, depth, idx); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, int depth, hwloc_obj_t prev) +{ + if (!prev) + return hwloc_get_obj_by_depth (topology, depth, 0); + if (prev->depth != depth) + return NULL; + return prev->next_cousin; +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_by_depth (topology, depth, prev); +} + +static __hwloc_inline hwloc_obj_t +hwloc_get_root_obj (hwloc_topology_t topology) +{ + return hwloc_get_obj_by_depth (topology, 0, 0); +} + +static __hwloc_inline const char * +hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) +{ + unsigned i; + for(i=0; i<obj->infos_count; i++) { + struct hwloc_info_s *info = &obj->infos[i]; + if (!strcmp(info->name, name)) + return info->value; + } + return NULL; +} + +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags) +{ + void *p = hwloc_alloc_membind(topology, len, set, policy, flags); + if (p) + return p; + + if (hwloc_set_membind(topology, set, policy, flags) < 0) + /* hwloc_set_membind() takes care of ignoring errors if non-STRICT */ + return NULL; + + p = hwloc_alloc(topology, len); + if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH) + /* Enforce the binding by touching the data */ + memset(p, 0, len); + return p; +} + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_INLINES_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/intel-mic.h b/src/3rdparty/hwloc/include/hwloc/intel-mic.h new file mode 100644 index 000000000..6f6f9d1b3 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/intel-mic.h @@ -0,0 +1,134 @@ +/* + * Copyright © 2013-2016 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC). + * + * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to + * include this file so as to get topology information for MIC devices. + */ + +#ifndef HWLOC_INTEL_MIC_H +#define HWLOC_INTEL_MIC_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#include <hwloc/helper.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#include <dirent.h> +#include <string.h> +#endif + +#include <stdio.h> +#include <stdlib.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC) + * + * This interface offers ways to retrieve topology information about + * Intel Xeon Phi (MIC) devices. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to MIC device whose index is \p idx. + * + * Return the CPU set describing the locality of the MIC device whose index is \p idx. + * + * Topology \p topology and device index \p idx must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_intel_mic_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + int idx __hwloc_attribute_unused, + hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX]; + DIR *sysdir = NULL; + struct dirent *dirent; + unsigned pcibus, pcidev, pcifunc; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/class/mic/mic%d", idx); + sysdir = opendir(path); + if (!sysdir) + return -1; + + while ((dirent = readdir(sysdir)) != NULL) { + if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) { + sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + break; + } + } + + closedir(sysdir); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * MIC device for the given index. + * + * Return the OS device object describing the MIC device whose index is \p idx. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology, + unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && !strncmp("mic", osdev->name, 3) + && atoi(osdev->name + 3) == (int) idx) + return osdev; + } + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_INTEL_MIC_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h new file mode 100644 index 000000000..7cea4166b --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h @@ -0,0 +1,273 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2010, 2012 Université Bordeaux + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Linux libnuma. + * + * Applications that use both Linux libnuma and hwloc may want to + * include this file so as to ease conversion between their respective types. +*/ + +#ifndef HWLOC_LINUX_LIBNUMA_H +#define HWLOC_LINUX_LIBNUMA_H + +#include <hwloc.h> +#include <numa.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_linux_libnuma_ulongs Interoperability with Linux libnuma unsigned long masks + * + * This interface helps converting between Linux libnuma unsigned long masks + * and hwloc cpusets and nodesets. + * + * \note Topology \p topology must match the current machine. + * + * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware. + * (when CONFIG_NUMA is not set in the kernel configuration). + * This helper and libnuma may thus not be strictly compatible in this case, + * which may be detected by checking whether numa_available() returns -1. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p cpuset into the array of unsigned long \p mask + * + * \p mask is the array of unsigned long that will be filled. + * \p maxnode contains the maximal node number that may be stored in \p mask. + * \p maxnode will be set to the maximal node number that was found, plus one. + * + * This function may be used before calling set_mempolicy, mbind, migrate_pages + * or any other function that takes an array of unsigned long and a maximal + * node number as input parameter. + */ +static __hwloc_inline int +hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, + unsigned long *mask, unsigned long *maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + unsigned long outmaxnode = -1; + hwloc_obj_t node = NULL; + + /* round-up to the next ulong and clear all bytes */ + *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1); + memset(mask, 0, *maxnode/8); + + while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) { + if (node->os_index >= *maxnode) + continue; + mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8)); + if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index) + outmaxnode = node->os_index; + } + + *maxnode = outmaxnode+1; + return 0; +} + +/** \brief Convert hwloc NUMA node set \p nodeset into the array of unsigned long \p mask + * + * \p mask is the array of unsigned long that will be filled. + * \p maxnode contains the maximal node number that may be stored in \p mask. + * \p maxnode will be set to the maximal node number that was found, plus one. + * + * This function may be used before calling set_mempolicy, mbind, migrate_pages + * or any other function that takes an array of unsigned long and a maximal + * node number as input parameter. + */ +static __hwloc_inline int +hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, + unsigned long *mask, unsigned long *maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + unsigned long outmaxnode = -1; + hwloc_obj_t node = NULL; + + /* round-up to the next ulong and clear all bytes */ + *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1); + memset(mask, 0, *maxnode/8); + + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) { + if (node->os_index >= *maxnode) + continue; + if (!hwloc_bitmap_isset(nodeset, node->os_index)) + continue; + mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8)); + if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index) + outmaxnode = node->os_index; + } + + *maxnode = outmaxnode+1; + return 0; +} + +/** \brief Convert the array of unsigned long \p mask into hwloc CPU set + * + * \p mask is a array of unsigned long that will be read. + * \p maxnode contains the maximal node number that may be read in \p mask. + * + * This function may be used after calling get_mempolicy or any other function + * that takes an array of unsigned long as output parameter (and possibly + * a maximal node number as input parameter). + */ +static __hwloc_inline int +hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + const unsigned long *mask, unsigned long maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(cpuset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + return 0; +} + +/** \brief Convert the array of unsigned long \p mask into hwloc NUMA node set + * + * \p mask is a array of unsigned long that will be read. + * \p maxnode contains the maximal node number that may be read in \p mask. + * + * This function may be used after calling get_mempolicy or any other function + * that takes an array of unsigned long as output parameter (and possibly + * a maximal node number as input parameter). + */ +static __hwloc_inline int +hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset, + const unsigned long *mask, unsigned long maxnode) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(nodeset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_set(nodeset, node->os_index); + return 0; +} + +/** @} */ + + + +/** \defgroup hwlocality_linux_libnuma_bitmask Interoperability with Linux libnuma bitmask + * + * This interface helps converting between Linux libnuma bitmasks + * and hwloc cpusets and nodesets. + * + * \note Topology \p topology must match the current machine. + * + * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware. + * (when CONFIG_NUMA is not set in the kernel configuration). + * This helper and libnuma may thus not be strictly compatible in this case, + * which may be detected by checking whether numa_available() returns -1. + * + * @{ + */ + + +/** \brief Convert hwloc CPU set \p cpuset into the returned libnuma bitmask + * + * The returned bitmask should later be freed with numa_bitmask_free. + * + * This function may be used before calling many numa_ functions + * that use a struct bitmask as an input parameter. + * + * \return newly allocated struct bitmask. + */ +static __hwloc_inline struct bitmask * +hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc; +static __hwloc_inline struct bitmask * +hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + struct bitmask *bitmask = numa_allocate_cpumask(); + if (!bitmask) + return NULL; + while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) + if (node->attr->numanode.local_memory) + numa_bitmask_setbit(bitmask, node->os_index); + return bitmask; +} + +/** \brief Convert hwloc NUMA node set \p nodeset into the returned libnuma bitmask + * + * The returned bitmask should later be freed with numa_bitmask_free. + * + * This function may be used before calling many numa_ functions + * that use a struct bitmask as an input parameter. + * + * \return newly allocated struct bitmask. + */ +static __hwloc_inline struct bitmask * +hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc; +static __hwloc_inline struct bitmask * +hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + struct bitmask *bitmask = numa_allocate_cpumask(); + if (!bitmask) + return NULL; + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (hwloc_bitmap_isset(nodeset, node->os_index) && node->attr->numanode.local_memory) + numa_bitmask_setbit(bitmask, node->os_index); + return bitmask; +} + +/** \brief Convert libnuma bitmask \p bitmask into hwloc CPU set \p cpuset + * + * This function may be used after calling many numa_ functions + * that use a struct bitmask as an output parameter. + */ +static __hwloc_inline int +hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + const struct bitmask *bitmask) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(cpuset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + return 0; +} + +/** \brief Convert libnuma bitmask \p bitmask into hwloc NUMA node set \p nodeset + * + * This function may be used after calling many numa_ functions + * that use a struct bitmask as an output parameter. + */ +static __hwloc_inline int +hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset, + const struct bitmask *bitmask) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t node = NULL; + hwloc_bitmap_zero(nodeset); + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_set(nodeset, node->os_index); + return 0; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_LINUX_NUMA_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/linux.h b/src/3rdparty/hwloc/include/hwloc/linux.h new file mode 100644 index 000000000..c409e1c2a --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/linux.h @@ -0,0 +1,79 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Linux. + * + * Applications that use hwloc on Linux may want to include this file + * if using some low-level Linux features. + */ + +#ifndef HWLOC_LINUX_H +#define HWLOC_LINUX_H + +#include <hwloc.h> +#include <stdio.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_linux Linux-specific helpers + * + * This includes helpers for manipulating Linux kernel cpumap files, and hwloc + * equivalents of the Linux sched_setaffinity and sched_getaffinity system calls. + * + * @{ + */ + +/** \brief Bind a thread \p tid on cpus given in cpuset \p set + * + * The behavior is exactly the same as the Linux sched_setaffinity system call, + * but uses a hwloc cpuset. + * + * \note This is equivalent to calling hwloc_set_proc_cpubind() with + * HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set); + +/** \brief Get the current binding of thread \p tid + * + * The behavior is exactly the same as the Linux sched_getaffinity system call, + * but uses a hwloc cpuset. + * + * \note This is equivalent to calling hwloc_get_proc_cpubind() with + * ::HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set); + +/** \brief Get the last physical CPU where thread \p tid ran. + * + * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with + * ::HWLOC_CPUBIND_THREAD as flags. + */ +HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology, pid_t tid, hwloc_bitmap_t set); + +/** \brief Convert a linux kernel cpumask file \p path into a hwloc bitmap \p set. + * + * Might be used when reading CPU set from sysfs attributes such as topology + * and caches for processors, or local_cpus for devices. + * + * \note This function ignores the HWLOC_FSROOT environment variable. + */ +HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set); + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_LINUX_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h new file mode 100644 index 000000000..197108660 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -0,0 +1,181 @@ +/* + * Copyright © 2012-2016 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the NVIDIA Management Library. + * + * Applications that use both hwloc and the NVIDIA Management Library may want to + * include this file so as to get topology information for NVML devices. + */ + +#ifndef HWLOC_NVML_H +#define HWLOC_NVML_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#include <hwloc/helper.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#endif + +#include <nvml.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library + * + * This interface offers ways to retrieve topology information about + * devices managed by the NVIDIA Management Library (NVML). + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to NVML device \p device. + * + * Return the CPU set describing the locality of the NVML device \p device. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the NVML component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_nvml_get_device_osdev() + * and hwloc_nvml_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + nvmlDevice_t device, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX]; + nvmlReturn_t nvres; + nvmlPciInfo_t pci; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + nvres = nvmlDeviceGetPciInfo(device, &pci); + if (NVML_SUCCESS != nvres) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * NVML device whose index is \p idx. + * + * Return the OS device object describing the NVML device whose + * index is \p idx. Returns NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the NVML component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type + && osdev->name + && !strncmp("nvml", osdev->name, 4) + && atoi(osdev->name + 4) == (int) idx) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to NVML device \p device. + * + * Return the hwloc OS device object that describes the given + * NVML device \p device. Return NULL if there is none. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the NVML component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_nvml_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device) +{ + hwloc_obj_t osdev; + nvmlReturn_t nvres; + nvmlPciInfo_t pci; + char uuid[64]; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + + nvres = nvmlDeviceGetPciInfo(device, &pci); + if (NVML_SUCCESS != nvres) + return NULL; + + nvres = nvmlDeviceGetUUID(device, uuid, sizeof(uuid)); + if (NVML_SUCCESS != nvres) + uuid[0] = '\0'; + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + const char *info; + + if (strncmp(osdev->name, "nvml", 4)) + continue; + + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == pci.domain + && pcidev->attr->pcidev.bus == pci.bus + && pcidev->attr->pcidev.dev == pci.device + && pcidev->attr->pcidev.func == 0) + return osdev; + + info = hwloc_obj_get_info_by_name(osdev, "NVIDIAUUID"); + if (info && !strcmp(info, uuid)) + return osdev; + } + + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_NVML_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h new file mode 100644 index 000000000..058968d74 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -0,0 +1,206 @@ +/* + * Copyright © 2012-2018 Inria. All rights reserved. + * Copyright © 2013, 2018 Université Bordeaux. All right reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the OpenCL interface. + * + * Applications that use both hwloc and OpenCL may want to + * include this file so as to get topology information for OpenCL devices. + */ + +#ifndef HWLOC_OPENCL_H +#define HWLOC_OPENCL_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#include <hwloc/helper.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#endif + +#ifdef __APPLE__ +#include <OpenCL/cl.h> +#include <OpenCL/cl_ext.h> +#else +#include <CL/cl.h> +#include <CL/cl_ext.h> +#endif + +#include <stdio.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_opencl Interoperability with OpenCL + * + * This interface offers ways to retrieve topology information about + * OpenCL devices. + * + * Only the AMD OpenCL interface currently offers useful locality information + * about its devices. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to OpenCL device \p device. + * + * Return the CPU set describing the locality of the OpenCL device \p device. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the OpenCL component are not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_opencl_get_device_osdev() + * and hwloc_opencl_get_device_osdev_by_index(). + * + * This function is currently only implemented in a meaningful way for + * Linux with the AMD OpenCL implementation; other systems will simply + * get a full cpuset. + */ +static __hwloc_inline int +hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + cl_device_id device __hwloc_attribute_unused, + hwloc_cpuset_t set) +{ +#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD) + /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */ +#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX]; + cl_device_topology_amd amdtopo; + cl_int clret; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS != clret) { + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + return 0; + } + if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + return 0; + } + + sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", + (unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux + AMD OpenCL systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the + * OpenCL device for the given indexes. + * + * Return the OS device object describing the OpenCL device + * whose platform index is \p platform_index, + * and whose device index within this platform if \p device_index. + * Return NULL if there is none. + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection and the OpenCL component must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology, + unsigned platform_index, unsigned device_index) +{ + unsigned x = (unsigned) -1, y = (unsigned) -1; + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type + && osdev->name + && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2 + && platform_index == x && device_index == y) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to OpenCL device \p deviceX. + * + * Use OpenCL device attributes to find the corresponding hwloc OS device object. + * Return NULL if there is none or if useful attributes are not available. + * + * This function currently only works on AMD OpenCL devices that support + * the CL_DEVICE_TOPOLOGY_AMD extension. hwloc_opencl_get_device_osdev_by_index() + * should be preferred whenever possible, i.e. when platform and device index + * are known. + * + * Topology \p topology and device \p device must match the local machine. + * I/O devices detection and the OpenCL component must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_opencl_get_device_cpuset(). + * + * \note This function cannot work if PCI devices are filtered out. + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused, + cl_device_id device __hwloc_attribute_unused) +{ +#ifdef CL_DEVICE_TOPOLOGY_AMD + hwloc_obj_t osdev; + cl_device_topology_amd amdtopo; + cl_int clret; + + clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); + if (CL_SUCCESS != clret) { + errno = EINVAL; + return NULL; + } + if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { + errno = EINVAL; + return NULL; + } + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + if (strncmp(osdev->name, "opencl", 6)) + continue; + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == 0 + && pcidev->attr->pcidev.bus == amdtopo.pcie.bus + && pcidev->attr->pcidev.dev == amdtopo.pcie.device + && pcidev->attr->pcidev.func == amdtopo.pcie.function) + return osdev; + /* if PCI are filtered out, we need a info attr to match on */ + } + + return NULL; +#else + return NULL; +#endif +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_OPENCL_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h new file mode 100644 index 000000000..174ab4a57 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -0,0 +1,150 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and OpenFabrics + * verbs. + * + * Applications that use both hwloc and OpenFabrics verbs may want to + * include this file so as to get topology information for OpenFabrics + * hardware (InfiniBand, etc). + * + */ + +#ifndef HWLOC_OPENFABRICS_VERBS_H +#define HWLOC_OPENFABRICS_VERBS_H + +#include <hwloc.h> +#include <hwloc/autogen/config.h> +#ifdef HWLOC_LINUX_SYS +#include <hwloc/linux.h> +#endif + +#include <infiniband/verbs.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_openfabrics Interoperability with OpenFabrics + * + * This interface offers ways to retrieve topology information about + * OpenFabrics devices (InfiniBand, Omni-Path, usNIC, etc). + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to device \p ibdev. + * + * Return the CPU set describing the locality of the OpenFabrics + * device \p ibdev (InfiniBand, etc). + * + * Topology \p topology and device \p ibdev must match the local machine. + * I/O devices detection is not needed in the topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_ibv_get_device_osdev() + * and hwloc_ibv_get_device_osdev_by_name(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + struct ibv_device *ibdev, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the verbs-provided sysfs mechanism to + get the local cpus */ +#define HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX 128 + char path[HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX]; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/class/infiniband/%s/device/local_cpus", + ibv_get_device_name(ibdev)); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to the OpenFabrics + * device named \p ibname. + * + * Return the OS device object describing the OpenFabrics device + * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname + * (mlx5_0, hfi1_0, usnic_0, qib0, etc). + * Returns NULL if there is none. + * The name \p ibname is usually obtained from ibv_get_device_name(). + * + * The topology \p topology does not necessarily have to match the current + * machine. For instance the topology may be an XML import of a remote host. + * I/O devices detection must be enabled in the topology. + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology, + const char *ibname) +{ + hwloc_obj_t osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + if (HWLOC_OBJ_OSDEV_OPENFABRICS == osdev->attr->osdev.type + && osdev->name && !strcmp(ibname, osdev->name)) + return osdev; + } + return NULL; +} + +/** \brief Get the hwloc OS device object corresponding to the OpenFabrics + * device \p ibdev. + * + * Return the OS device object describing the OpenFabrics device \p ibdev + * (InfiniBand, etc). Returns NULL if there is none. + * + * Topology \p topology and device \p ibdev must match the local machine. + * I/O devices detection must be enabled in the topology. + * If not, the locality of the object may still be found using + * hwloc_ibv_get_device_cpuset(). + * + * \note The corresponding PCI device object can be obtained by looking + * at the OS device parent object. + */ +static __hwloc_inline hwloc_obj_t +hwloc_ibv_get_device_osdev(hwloc_topology_t topology, + struct ibv_device *ibdev) +{ + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + return hwloc_ibv_get_device_osdev_by_name(topology, ibv_get_device_name(ibdev)); +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_OPENFABRICS_VERBS_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h new file mode 100644 index 000000000..cb22000d4 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -0,0 +1,542 @@ +/* + * Copyright © 2013-2017 Inria. All rights reserved. + * Copyright © 2016 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef HWLOC_PLUGINS_H +#define HWLOC_PLUGINS_H + +/** \file + * \brief Public interface for building hwloc plugins. + */ + +struct hwloc_backend; + +#include <hwloc.h> +#ifdef HWLOC_INSIDE_PLUGIN +/* needed for hwloc_plugin_check_namespace() */ +#include <ltdl.h> +#endif + + + +/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components + * @{ + */ + +/** \brief Discovery component type */ +typedef enum hwloc_disc_component_type_e { + /** \brief CPU-only discovery through the OS, or generic no-OS support. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0), + + /** \brief xml or synthetic, + * platform-specific components such as bgq. + * Anything the discovers CPU and everything else. + * No misc backend is expected to complement a global component. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1), + + /** \brief OpenCL, Cuda, etc. + * \hideinitializer */ + HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2) +} hwloc_disc_component_type_t; + +/** \brief Discovery component structure + * + * This is the major kind of components, taking care of the discovery. + * They are registered by generic components, either statically-built or as plugins. + */ +struct hwloc_disc_component { + /** \brief Discovery component type */ + hwloc_disc_component_type_t type; + + /** \brief Name. + * If this component is built as a plugin, this name does not have to match the plugin filename. + */ + const char *name; + + /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. + * + * For a GLOBAL component, this usually includes all other types (~0). + * + * Other components only exclude types that may bring conflicting + * topology information. MISC components should likely not be excluded + * since they usually bring non-primary additional information. + */ + unsigned excludes; + + /** \brief Instantiate callback to create a backend from the component. + * Parameters data1, data2, data3 are NULL except for components + * that have special enabling routines such as hwloc_topology_set_xml(). */ + struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); + + /** \brief Component priority. + * Used to sort topology->components, higher priority first. + * Also used to decide between two components with the same name. + * + * Usual values are + * 50 for native OS (or platform) components, + * 45 for x86, + * 40 for no-OS fallback, + * 30 for global components (xml, synthetic), + * 20 for pci, + * 10 for other misc components (opencl etc.). + */ + unsigned priority; + + /** \brief Enabled by default. + * If unset, if will be disabled unless explicitly requested. + */ + unsigned enabled_by_default; + + /** \private Used internally to list components by priority on topology->components + * (the component structure is usually read-only, + * the core copies it before using this field for queueing) + */ + struct hwloc_disc_component * next; +}; + +/** @} */ + + + + +/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends + * @{ + */ + +/** \brief Discovery backend structure + * + * A backend is the instantiation of a discovery component. + * When a component gets enabled for a topology, + * its instantiate() callback creates a backend. + * + * hwloc_backend_alloc() initializes all fields to default values + * that the component may change (except "component" and "next") + * before enabling the backend with hwloc_backend_enable(). + */ +struct hwloc_backend { + /** \private Reserved for the core, set by hwloc_backend_alloc() */ + struct hwloc_disc_component * component; + /** \private Reserved for the core, set by hwloc_backend_enable() */ + struct hwloc_topology * topology; + /** \private Reserved for the core. Set to 1 if forced through envvar, 0 otherwise. */ + int envvar_forced; + /** \private Reserved for the core. Used internally to list backends topology->backends. */ + struct hwloc_backend * next; + + /** \brief Backend flags, currently always 0. */ + unsigned long flags; + + /** \brief Backend-specific 'is_thissystem' property. + * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled. + * Set to -1 if the backend doesn't care (default). */ + int is_thissystem; + + /** \brief Backend private data, or NULL if none. */ + void * private_data; + /** \brief Callback for freeing the private_data. + * May be NULL. + */ + void (*disable)(struct hwloc_backend *backend); + + /** \brief Main discovery callback. + * returns -1 on error, either because it couldn't add its objects ot the existing topology, + * or because of an actual discovery/gathering failure. + * May be NULL. + */ + int (*discover)(struct hwloc_backend *backend); + + /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend. + * May be NULL. */ + int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset); +}; + +/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc. + * The caller will then modify whatever needed, and call hwloc_backend_enable(). + */ +HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component); + +/** \brief Enable a previously allocated and setup backend. */ +HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend); + +/** @} */ + + + + +/** \defgroup hwlocality_generic_components Components and Plugins: Generic components + * @{ + */ + +/** \brief Generic component type */ +typedef enum hwloc_component_type_e { + /** \brief The data field must point to a struct hwloc_disc_component. */ + HWLOC_COMPONENT_TYPE_DISC, + + /** \brief The data field must point to a struct hwloc_xml_component. */ + HWLOC_COMPONENT_TYPE_XML +} hwloc_component_type_t; + +/** \brief Generic component structure + * + * Generic components structure, either statically listed by configure in static-components.h + * or dynamically loaded as a plugin. + */ +struct hwloc_component { + /** \brief Component ABI version, set to ::HWLOC_COMPONENT_ABI */ + unsigned abi; + + /** \brief Process-wide component initialization callback. + * + * This optional callback is called when the component is registered + * to the hwloc core (after loading the plugin). + * + * When the component is built as a plugin, this callback + * should call hwloc_check_plugin_namespace() + * and return an negative error code on error. + * + * \p flags is always 0 for now. + * + * \return 0 on success, or a negative code on error. + * + * \note If the component uses ltdl for loading its own plugins, + * it should load/unload them only in init() and finalize(), + * to avoid race conditions with hwloc's use of ltdl. + */ + int (*init)(unsigned long flags); + + /** \brief Process-wide component termination callback. + * + * This optional callback is called after unregistering the component + * from the hwloc core (before unloading the plugin). + * + * \p flags is always 0 for now. + * + * \note If the component uses ltdl for loading its own plugins, + * it should load/unload them only in init() and finalize(), + * to avoid race conditions with hwloc's use of ltdl. + */ + void (*finalize)(unsigned long flags); + + /** \brief Component type */ + hwloc_component_type_t type; + + /** \brief Component flags, unused for now */ + unsigned long flags; + + /** \brief Component data, pointing to a struct hwloc_disc_component or struct hwloc_xml_component. */ + void * data; +}; + +/** @} */ + + + + +/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components + * @{ + */ + +/** \brief Add an object to the topology. + * + * It is sorted along the tree of other objects according to the inclusion of + * cpusets, to eventually be added as a child of the smallest object including + * this object. + * + * If the cpuset is empty, the type of the object (and maybe some attributes) + * must be enough to find where to insert the object. This is especially true + * for NUMA nodes with memory and no CPUs. + * + * The given object should not have children. + * + * This shall only be called before levels are built. + * + * In case of error, hwloc_report_os_error() is called. + * + * The caller should check whether the object type is filtered-out before calling this function. + * + * The topology cpuset/nodesets will be enlarged to include the object sets. + * + * Returns the object on success. + * Returns NULL and frees obj on error. + * Returns another object and frees obj if it was merged with an identical pre-existing object. + */ +HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj); + +/** \brief Type of error callbacks during object insertion */ +typedef void (*hwloc_report_error_t)(const char * msg, int line); +/** \brief Report an insertion error from a backend */ +HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line); +/** \brief Check whether insertion errors are hidden */ +HWLOC_DECLSPEC int hwloc_hide_errors(void); + +/** \brief Add an object to the topology and specify which error callback to use. + * + * This function is similar to hwloc_insert_object_by_cpuset() but it allows specifying + * where to start insertion from (if \p root is NULL, the topology root object is used), + * and specifying the error callback. + */ +HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, hwloc_obj_t obj, hwloc_report_error_t report_error); + +/** \brief Insert an object somewhere in the topology. + * + * It is added as the last child of the given parent. + * The cpuset is completely ignored, so strange objects such as I/O devices should + * preferably be inserted with this. + * + * When used for "normal" children with cpusets (when importing from XML + * when duplicating a topology), the caller should make sure that: + * - children are inserted in order, + * - children cpusets do not intersect. + * + * The given object may have normal, I/O or Misc children, as long as they are in order as well. + * These children must have valid parent and next_sibling pointers. + * + * The caller should check whether the object type is filtered-out before calling this function. + */ +HWLOC_DECLSPEC void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj); + +/** \brief Allocate and initialize an object of the given type and physical index. + * + * If \p os_index is unknown or irrelevant, use \c HWLOC_UNKNOWN_INDEX. + */ +HWLOC_DECLSPEC hwloc_obj_t hwloc_alloc_setup_object(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned os_index); + +/** \brief Setup object cpusets/nodesets by OR'ing its children. + * + * Used when adding an object late in the topology. + * Will update the new object by OR'ing all its new children sets. + * + * Used when PCI backend adds a hostbridge parent, when distances + * add a new Group, etc. + */ +HWLOC_DECLSPEC int hwloc_obj_add_children_sets(hwloc_obj_t obj); + +/** \brief Request a reconnection of children and levels in the topology. + * + * May be used by backends during discovery if they need arrays or lists + * of object within levels or children to be fully connected. + * + * \p flags is currently unused, must 0. + */ +HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused); + +/** \brief Make sure that plugins can lookup core symbols. + * + * This is a sanity check to avoid lazy-lookup failures when libhwloc + * is loaded within a plugin, and later tries to load its own plugins. + * This may fail (and abort the program) if libhwloc symbols are in a + * private namespace. + * + * \return 0 on success. + * \return -1 if the plugin cannot be successfully loaded. The caller + * plugin init() callback should return a negative error code as well. + * + * Plugins should call this function in their init() callback to avoid + * later crashes if lazy symbol resolution is used by the upper layer that + * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY). + * + * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if + * building the caller as a plugin. + * + * \note This function should remain inline so plugins can call it even + * when they cannot find libhwloc symbols. + */ +static __hwloc_inline int +hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused) +{ +#ifdef HWLOC_INSIDE_PLUGIN + lt_dlhandle handle; + void *sym; + handle = lt_dlopen(NULL); + if (!handle) + /* cannot check, assume things will work */ + return 0; + sym = lt_dlsym(handle, symbol); + lt_dlclose(handle); + if (!sym) { + static int verboseenv_checked = 0; + static int verboseenv_value = 0; + if (!verboseenv_checked) { + const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE"); + verboseenv_value = verboseenv ? atoi(verboseenv) : 0; + verboseenv_checked = 1; + } + if (verboseenv_value) + fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n", + pluginname, symbol); + return -1; + } +#endif /* HWLOC_INSIDE_PLUGIN */ + return 0; +} + +/** @} */ + + + + +/** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects + * @{ + */ + +/** \brief Check whether the given PCI device classid is important. + * + * \return 1 if important, 0 otherwise. + */ +static __hwloc_inline int +hwloc_filter_check_pcidev_subtype_important(unsigned classid) +{ + unsigned baseclass = classid >> 8; + return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */ + || baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */ + || baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */ + || baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */ + || classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */ + || classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */ + || baseclass == 0x12 /* Processing Accelerators */); +} + +/** \brief Check whether the given OS device subtype is important. + * + * \return 1 if important, 0 otherwise. + */ +static __hwloc_inline int +hwloc_filter_check_osdev_subtype_important(hwloc_obj_osdev_type_t subtype) +{ + return (subtype != HWLOC_OBJ_OSDEV_DMA); +} + +/** \brief Check whether a non-I/O object type should be filtered-out. + * + * Cannot be used for I/O objects. + * + * \return 1 if the object type should be kept, 0 otherwise. + */ +static __hwloc_inline int +hwloc_filter_check_keep_object_type(hwloc_topology_t topology, hwloc_obj_type_t type) +{ + enum hwloc_type_filter_e filter = HWLOC_TYPE_FILTER_KEEP_NONE; + hwloc_topology_get_type_filter(topology, type, &filter); + assert(filter != HWLOC_TYPE_FILTER_KEEP_IMPORTANT); /* IMPORTANT only used for I/O */ + return filter == HWLOC_TYPE_FILTER_KEEP_NONE ? 0 : 1; +} + +/** \brief Check whether the given object should be filtered-out. + * + * \return 1 if the object type should be kept, 0 otherwise. + */ +static __hwloc_inline int +hwloc_filter_check_keep_object(hwloc_topology_t topology, hwloc_obj_t obj) +{ + hwloc_obj_type_t type = obj->type; + enum hwloc_type_filter_e filter = HWLOC_TYPE_FILTER_KEEP_NONE; + hwloc_topology_get_type_filter(topology, type, &filter); + if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) + return 0; + if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT) { + if (type == HWLOC_OBJ_PCI_DEVICE) + return hwloc_filter_check_pcidev_subtype_important(obj->attr->pcidev.class_id); + if (type == HWLOC_OBJ_OS_DEVICE) + return hwloc_filter_check_osdev_subtype_important(obj->attr->osdev.type); + } + return 1; +} + +/** @} */ + + + + +/** \defgroup hwlocality_components_pcidisc Components and Plugins: helpers for PCI discovery + * @{ + */ + +/** \brief Return the offset of the given capability in the PCI config space buffer + * + * This function requires a 256-bytes config space. Unknown/unavailable bytes should be set to 0xff. + */ +HWLOC_DECLSPEC unsigned hwloc_pcidisc_find_cap(const unsigned char *config, unsigned cap); + +/** \brief Fill linkspeed by reading the PCI config space where PCI_CAP_ID_EXP is at position offset. + * + * Needs 20 bytes of EXP capability block starting at offset in the config space + * for registers up to link status. + */ +HWLOC_DECLSPEC int hwloc_pcidisc_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed); + +/** \brief Return the hwloc object type (PCI device or Bridge) for the given class and configuration space. + * + * This function requires 16 bytes of common configuration header at the beginning of config. + */ +HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *config); + +/** \brief Fills the attributes of the given PCI bridge using the given PCI config space. + * + * This function requires 32 bytes of common configuration header at the beginning of config. + * + * Returns -1 and destroys /p obj if bridge fields are invalid. + */ +HWLOC_DECLSPEC int hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config); + +/** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs. + * + * If \p treep points to \c NULL, the new object is inserted there. + */ +HWLOC_DECLSPEC void hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, struct hwloc_obj *obj); + +/** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the topology. + * + * For now, they will be attached to the root object. The core will move them to their actual PCI + * locality using hwloc_pci_belowroot_apply_locality() at the end of the discovery. + * + * In the meantime, other backends lookup PCI objects or localities (for instance to attach OS devices) + * by using hwloc_pcidisc_find_by_busid() or hwloc_pcidisc_find_busid_parent(). + */ +HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tree); + +/** @} */ + + + + +/** \defgroup hwlocality_components_pcifind Components and Plugins: finding PCI objects during other discoveries + * @{ + */ + +/** \brief Find the PCI object that matches the bus ID. + * + * To be used after a PCI backend added PCI devices with hwloc_pcidisc_tree_attach() + * and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality(). + * + * If no exactly matching object is found, return the container bridge if any, or NULL. + * + * On failure, it may be possible to find the PCI locality (instead of the PCI device) + * by calling hwloc_pcidisc_find_busid_parent(). + * + * \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works + * after the topology is fully loaded. + */ +HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + +/** \brief Find the normal parent of a PCI bus ID. + * + * Look at PCI affinity to find out where the given PCI bus ID should be attached. + * + * This function should be used to attach an I/O device directly under a normal + * (non-I/O) object, instead of below a PCI object. + * It is usually used by backends when hwloc_pcidisc_find_by_busid() failed + * to find the hwloc object corresponding to this bus ID, for instance because + * PCI discovery is not supported on this platform. + */ +HWLOC_DECLSPEC struct hwloc_obj * hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + +/** @} */ + + + + +#endif /* HWLOC_PLUGINS_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h new file mode 100644 index 000000000..7cef1b2e8 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -0,0 +1,765 @@ +/* + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2010-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef HWLOC_RENAME_H +#define HWLOC_RENAME_H + +#include <hwloc/autogen/config.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Only enact these defines if we're actually renaming the symbols + (i.e., avoid trying to have no-op defines if we're *not* + renaming). */ + +#if HWLOC_SYM_TRANSFORM + +/* Use a preprocessor two-step in order to get the prefixing right. + Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming + things. */ + +#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b) +#define HWLOC_MUNGE_NAME2(a, b) a ## b +#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name) +#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name) + +/* Now define all the "real" names to be the prefixed names. This + allows us to use the real names throughout the code base (i.e., + "hwloc_<foo>"); the preprocessor will adjust to have the prefixed + name under the covers. */ + +/* Names from hwloc.h */ + +#define hwloc_get_api_version HWLOC_NAME(get_api_version) + +#define hwloc_topology HWLOC_NAME(topology) +#define hwloc_topology_t HWLOC_NAME(topology_t) + +#define hwloc_cpuset_t HWLOC_NAME(cpuset_t) +#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t) +#define hwloc_nodeset_t HWLOC_NAME(nodeset_t) +#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t) + +#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE) +#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) +#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) +#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) +#define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE) +#define HWLOC_OBJ_L2CACHE HWLOC_NAME_CAPS(OBJ_L2CACHE) +#define HWLOC_OBJ_L3CACHE HWLOC_NAME_CAPS(OBJ_L3CACHE) +#define HWLOC_OBJ_L4CACHE HWLOC_NAME_CAPS(OBJ_L4CACHE) +#define HWLOC_OBJ_L5CACHE HWLOC_NAME_CAPS(OBJ_L5CACHE) +#define HWLOC_OBJ_L1ICACHE HWLOC_NAME_CAPS(OBJ_L1ICACHE) +#define HWLOC_OBJ_L2ICACHE HWLOC_NAME_CAPS(OBJ_L2ICACHE) +#define HWLOC_OBJ_L3ICACHE HWLOC_NAME_CAPS(OBJ_L3ICACHE) +#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC) +#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP) +#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE) +#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE) +#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE) +#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX) +#define hwloc_obj_type_t HWLOC_NAME(obj_type_t) + +#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e) +#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t) +#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED) +#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA) +#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION) + +#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e) +#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t) +#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST) +#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI) + +#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e) +#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t) +#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK) +#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU) +#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK) +#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS) +#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA) +#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC) + +#define hwloc_compare_types HWLOC_NAME(compare_types) + +#define hwloc_compare_types_e HWLOC_NAME(compare_types_e) +#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED) + +#define hwloc_obj HWLOC_NAME(obj) +#define hwloc_obj_t HWLOC_NAME(obj_t) + +#define hwloc_info_s HWLOC_NAME(info_s) + +#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u) +#define hwloc_numanode_attr_s HWLOC_NAME(numanode_attr_s) +#define hwloc_memory_page_type_s HWLOC_NAME(memory_page_type_s) +#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s) +#define hwloc_group_attr_s HWLOC_NAME(group_attr_s) +#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s) +#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s) +#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s) + +#define hwloc_topology_init HWLOC_NAME(topology_init) +#define hwloc_topology_load HWLOC_NAME(topology_load) +#define hwloc_topology_destroy HWLOC_NAME(topology_destroy) +#define hwloc_topology_dup HWLOC_NAME(topology_dup) +#define hwloc_topology_abi_check HWLOC_NAME(topology_abi_check) +#define hwloc_topology_check HWLOC_NAME(topology_check) + +#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e) + +#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM) +#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) +#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) + +#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) +#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) +#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml) +#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer) + +#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) +#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem) +#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags) +#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) +#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) +#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) +#define hwloc_topology_support HWLOC_NAME(topology_support) +#define hwloc_topology_get_support HWLOC_NAME(topology_get_support) + +#define hwloc_type_filter_e HWLOC_NAME(type_filter_e) +#define HWLOC_TYPE_FILTER_KEEP_ALL HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_ALL) +#define HWLOC_TYPE_FILTER_KEEP_NONE HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_NONE) +#define HWLOC_TYPE_FILTER_KEEP_STRUCTURE HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_STRUCTURE) +#define HWLOC_TYPE_FILTER_KEEP_IMPORTANT HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_IMPORTANT) +#define hwloc_topology_set_type_filter HWLOC_NAME(topology_set_type_filter) +#define hwloc_topology_get_type_filter HWLOC_NAME(topology_get_type_filter) +#define hwloc_topology_set_all_types_filter HWLOC_NAME(topology_set_all_types_filter) +#define hwloc_topology_set_cache_types_filter HWLOC_NAME(topology_set_cache_types_filter) +#define hwloc_topology_set_icache_types_filter HWLOC_NAME(topology_set_icache_types_filter) +#define hwloc_topology_set_io_types_filter HWLOC_NAME(topology_set_io_types_filter) + +#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata) +#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata) + +#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e) +#define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS) +#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC) +#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO) +#define hwloc_topology_restrict HWLOC_NAME(topology_restrict) + +#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) +#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) +#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) +#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets) + +#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) +#define hwloc_get_type_depth HWLOC_NAME(get_type_depth) +#define hwloc_get_memory_parents_depth HWLOC_NAME(get_memory_parents_depth) + +#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e) +#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN) +#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE) +#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE) +#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE) +#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE) +#define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC) +#define HWLOC_TYPE_DEPTH_NUMANODE HWLOC_NAME_CAPS(TYPE_DEPTH_NUMANODE) + +#define hwloc_get_depth_type HWLOC_NAME(get_depth_type) +#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth) +#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type) + +#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth ) +#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type ) + +#define hwloc_obj_type_string HWLOC_NAME(obj_type_string ) +#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf ) +#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf ) +#define hwloc_type_sscanf HWLOC_NAME(type_sscanf) +#define hwloc_type_sscanf_as_depth HWLOC_NAME(type_sscanf_as_depth) + +#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) +#define hwloc_obj_add_info HWLOC_NAME(obj_add_info) + +#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) +#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) +#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT) +#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND) + +#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t) + +#define hwloc_set_cpubind HWLOC_NAME(set_cpubind) +#define hwloc_get_cpubind HWLOC_NAME(get_cpubind) +#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind) +#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind) +#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind) +#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind) + +#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location) +#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location) + +#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT) +#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) +#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) +#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) +#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) +#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) + +#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t) + +#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS) +#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD) +#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT) +#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE) +#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND) +#define HWLOC_MEMBIND_BYNODESET HWLOC_NAME_CAPS(MEMBIND_BYNODESET) + +#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t) + +#define hwloc_set_membind HWLOC_NAME(set_membind) +#define hwloc_get_membind HWLOC_NAME(get_membind) +#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind) +#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind) +#define hwloc_set_area_membind HWLOC_NAME(set_area_membind) +#define hwloc_get_area_membind HWLOC_NAME(get_area_membind) +#define hwloc_get_area_memlocation HWLOC_NAME(get_area_memlocation) +#define hwloc_alloc_membind HWLOC_NAME(alloc_membind) +#define hwloc_alloc HWLOC_NAME(alloc) +#define hwloc_free HWLOC_NAME(free) + +#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj) +#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev) +#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid) +#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring) +#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev) +#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge) +#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus) + +/* hwloc/bitmap.h */ + +#define hwloc_bitmap_s HWLOC_NAME(bitmap_s) +#define hwloc_bitmap_t HWLOC_NAME(bitmap_t) +#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t) + +#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc) +#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full) +#define hwloc_bitmap_free HWLOC_NAME(bitmap_free) +#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup) +#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy) +#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf) +#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf) +#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf) +#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf) +#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf) +#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf) +#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf) +#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf) +#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf) +#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero) +#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill) +#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong) + +#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong) +#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong) +#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong) +#define hwloc_bitmap_only HWLOC_NAME(bitmap_only) +#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut) +#define hwloc_bitmap_set HWLOC_NAME(bitmap_set) +#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range) +#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong) +#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr) +#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range) +#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset) +#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero) +#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull) +#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal) +#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects) +#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded) +#define hwloc_bitmap_or HWLOC_NAME(bitmap_or) +#define hwloc_bitmap_and HWLOC_NAME(bitmap_and) +#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot) +#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor) +#define hwloc_bitmap_not HWLOC_NAME(bitmap_not) +#define hwloc_bitmap_first HWLOC_NAME(bitmap_first) +#define hwloc_bitmap_last HWLOC_NAME(bitmap_last) +#define hwloc_bitmap_next HWLOC_NAME(bitmap_next) +#define hwloc_bitmap_first_unset HWLOC_NAME(bitmap_first_unset) +#define hwloc_bitmap_last_unset HWLOC_NAME(bitmap_last_unset) +#define hwloc_bitmap_next_unset HWLOC_NAME(bitmap_next_unset) +#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify) +#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first) +#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare) +#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight) + +/* hwloc/helper.h */ + +#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth) +#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth) +#define hwloc_get_root_obj HWLOC_NAME(get_root_obj) +#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth) +#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type) +#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth) +#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type) +#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index) +#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index) +#define hwloc_get_next_child HWLOC_NAME(get_next_child) +#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj) +#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree) +#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset) +#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset) +#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth) +#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type) +#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth) +#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type) +#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth) +#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type) +#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset) +#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset) +#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset) +#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth) +#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type) +#define hwloc_obj_type_is_normal HWLOC_NAME(obj_type_is_normal) +#define hwloc_obj_type_is_memory HWLOC_NAME(obj_type_is_memory) +#define hwloc_obj_type_is_io HWLOC_NAME(obj_type_is_io) +#define hwloc_obj_type_is_cache HWLOC_NAME(obj_type_is_cache) +#define hwloc_obj_type_is_dcache HWLOC_NAME(obj_type_is_dcache) +#define hwloc_obj_type_is_icache HWLOC_NAME(obj_type_is_icache) +#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth) +#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset) +#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj) +#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) +#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) +#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) +#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) +#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) +#define hwloc_distrib HWLOC_NAME(distrib) +#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy) +#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset) +#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset) +#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset) +#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset) +#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset) +#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset) +#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset) +#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) +#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) + +/* export.h */ + +#define hwloc_topology_export_xml_flags_e HWLOC_NAME(topology_export_xml_flags_e) +#define HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_XML_FLAG_V1) +#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml) +#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer) +#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer) +#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback) +#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata) +#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64) +#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback) + +#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1 HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) +#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY) +#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic) + +/* distances.h */ + +#define hwloc_distances_s HWLOC_NAME(distances_s) + +#define hwloc_distances_kind_e HWLOC_NAME(distances_kind_e) +#define HWLOC_DISTANCES_KIND_FROM_OS HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_OS) +#define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER) +#define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY) +#define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH) + +#define hwloc_distances_get HWLOC_NAME(distances_get) +#define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth) +#define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type) +#define hwloc_distances_release HWLOC_NAME(distances_release) +#define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) +#define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) + +#define hwloc_distances_add_flag_e HWLOC_NAME(distances_add_flag_e) +#define HWLOC_DISTANCES_ADD_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP) +#define HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP_INACCURATE) + +#define hwloc_distances_add HWLOC_NAME(distances_add) +#define hwloc_distances_remove HWLOC_NAME(distances_remove) +#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) +#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) + +/* diff.h */ + +#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e) +#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO) +#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u) +#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s) +#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s) +#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s) +#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e) +#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t) +#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR) +#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX) +#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u) +#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t) +#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s) +#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s) +#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s) +#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build) +#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e) +#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE) +#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply) +#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy) +#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml) +#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml) +#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer) +#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer) + +/* shmem.h */ + +#define hwloc_shmem_topology_get_length HWLOC_NAME(shmem_topology_get_length) +#define hwloc_shmem_topology_write HWLOC_NAME(shmem_topology_write) +#define hwloc_shmem_topology_adopt HWLOC_NAME(shmem_topology_adopt) + +/* glibc-sched.h */ + +#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity) +#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity) + +/* linux-libnuma.h */ + +#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs) +#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs) +#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs) +#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs) +#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask) +#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask) +#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask) +#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask) + +/* linux.h */ + +#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind) +#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind) +#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) +#define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask) + +/* openfabrics-verbs.h */ + +#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) +#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev) +#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name) + +/* intel-mic.h */ + +#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset) +#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index) + +/* opencl.h */ + +#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) +#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev) +#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index) + +/* cuda.h */ + +#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids) +#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset) +#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev) +#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev) +#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index) + +/* cudart.h */ + +#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids) +#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset) +#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev) +#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index) + +/* nvml.h */ + +#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset) +#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) +#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) + +/* gl.h */ + +#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) +#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name) +#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev) + +/* hwloc/plugins.h */ + +#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) +#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) +#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) +#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) +#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_component HWLOC_NAME(disc_component) + +#define hwloc_backend HWLOC_NAME(backend) + +#define hwloc_backend_alloc HWLOC_NAME(backend_alloc) +#define hwloc_backend_enable HWLOC_NAME(backend_enable) + +#define hwloc_component_type_e HWLOC_NAME(component_type_e) +#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC) +#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML) +#define hwloc_component_type_t HWLOC_NAME(component_type_t) +#define hwloc_component HWLOC_NAME(component) + +#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) + +#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) +#define hwloc_report_error_t HWLOC_NAME(report_error_t) +#define hwloc_report_os_error HWLOC_NAME(report_os_error) +#define hwloc_hide_errors HWLOC_NAME(hide_errors) +#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) +#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) +#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object) +#define hwloc_obj_add_children_sets HWLOC_NAME(add_children_sets) +#define hwloc_topology_reconnect HWLOC_NAME(topology_reconnect) + +#define hwloc_filter_check_pcidev_subtype_important HWLOC_NAME(filter_check_pcidev_subtype_important) +#define hwloc_filter_check_osdev_subtype_important HWLOC_NAME(filter_check_osdev_subtype_important) +#define hwloc_filter_check_keep_object_type HWLOC_NAME(filter_check_keep_object_type) +#define hwloc_filter_check_keep_object HWLOC_NAME(filter_check_keep_object) + +#define hwloc_pcidisc_find_cap HWLOC_NAME(pcidisc_find_cap) +#define hwloc_pcidisc_find_linkspeed HWLOC_NAME(pcidisc_find_linkspeed) +#define hwloc_pcidisc_check_bridge_type HWLOC_NAME(pcidisc_check_bridge_type) +#define hwloc_pcidisc_setup_bridge_attr HWLOC_NAME(pcidisc_setup_bridge_attr) +#define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) +#define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) + +#define hwloc_pcidisc_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) +#define hwloc_pcidisc_find_busid_parent HWLOC_NAME(pcidisc_find_busid_parent) + +/* hwloc/deprecated.h */ + +#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) +#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) +#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) + +#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset) +#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset) +#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset) +#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset) +#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset) +#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset) +#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset) + +#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict) +#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict) + +/* private/debug.h */ + +#define hwloc_debug_enabled HWLOC_NAME(debug_enabled) +#define hwloc_debug HWLOC_NAME(debug) + +/* private/misc.h */ + +#define hwloc_snprintf HWLOC_NAME(snprintf) +#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp) +#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual) +#define hwloc_ffs32 HWLOC_NAME(ffs32) +#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32) +#define hwloc_flsl_manual HWLOC_NAME(flsl_manual) +#define hwloc_fls32 HWLOC_NAME(fls32) +#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32) +#define hwloc_weight_long HWLOC_NAME(weight_long) +#define hwloc_strncasecmp HWLOC_NAME(strncasecmp) + +#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion) + +#define hwloc_pci_class_string HWLOC_NAME(pci_class_string) +#define hwloc_linux_pci_link_speed_from_string HWLOC_NAME(linux_pci_link_speed_from_string) + +#define hwloc_cache_type_by_depth_type HWLOC_NAME(cache_type_by_depth_type) +#define hwloc__obj_type_is_normal HWLOC_NAME(_obj_type_is_normal) +#define hwloc__obj_type_is_memory HWLOC_NAME(_obj_type_is_memory) +#define hwloc__obj_type_is_io HWLOC_NAME(_obj_type_is_io) +#define hwloc__obj_type_is_special HWLOC_NAME(_obj_type_is_special) + +#define hwloc__obj_type_is_cache HWLOC_NAME(_obj_type_is_cache) +#define hwloc__obj_type_is_dcache HWLOC_NAME(_obj_type_is_dcache) +#define hwloc__obj_type_is_icache HWLOC_NAME(_obj_type_is_icache) + +/* private/cpuid-x86.h */ + +#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) +#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid) + +/* private/xml.h */ + +#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose) + +#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s) +#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t) +#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff) +#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s) +#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s) +#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t) +#define hwloc__xml_export_data_s HWLOC_NAME(_xml_export_data_s) +#define hwloc__xml_export_topology HWLOC_NAME(_xml_export_topology) +#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff) + +#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks) +#define hwloc_xml_component HWLOC_NAME(xml_component) +#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register) +#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset) + +#define hwloc__xml_imported_v1distances_s HWLOC_NAME(_xml_imported_v1distances_s) + +/* private/components.h */ + +#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable) +#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others) + +#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem) +#define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks) + +#define hwloc_backends_init HWLOC_NAME(backends_init) +#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all) + +#define hwloc_components_init HWLOC_NAME(components_init) +#define hwloc_components_fini HWLOC_NAME(components_fini) + +/* private/internal-private.h */ + +#define hwloc_xml_component HWLOC_NAME(xml_component) +#define hwloc_synthetic_component HWLOC_NAME(synthetic_component) + +#define hwloc_aix_component HWLOC_NAME(aix_component) +#define hwloc_bgq_component HWLOC_NAME(bgq_component) +#define hwloc_darwin_component HWLOC_NAME(darwin_component) +#define hwloc_freebsd_component HWLOC_NAME(freebsd_component) +#define hwloc_hpux_component HWLOC_NAME(hpux_component) +#define hwloc_linux_component HWLOC_NAME(linux_component) +#define hwloc_netbsd_component HWLOC_NAME(netbsd_component) +#define hwloc_noos_component HWLOC_NAME(noos_component) +#define hwloc_solaris_component HWLOC_NAME(solaris_component) +#define hwloc_windows_component HWLOC_NAME(windows_component) +#define hwloc_x86_component HWLOC_NAME(x86_component) + +#define hwloc_cuda_component HWLOC_NAME(cuda_component) +#define hwloc_gl_component HWLOC_NAME(gl_component) +#define hwloc_linuxio_component HWLOC_NAME(linuxio_component) +#define hwloc_nvml_component HWLOC_NAME(nvml_component) +#define hwloc_opencl_component HWLOC_NAME(opencl_component) +#define hwloc_pci_component HWLOC_NAME(pci_component) + +#define hwloc_xml_libxml_component HWLOC_NAME(xml_libxml_component) +#define hwloc_xml_nolibxml_component HWLOC_NAME(xml_nolibxml_component) + +/* private/private.h */ + +#define hwloc_special_level_s HWLOC_NAME(special_level_s) + +#define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) + +#define hwloc_alloc_root_sets HWLOC_NAME(alloc_root_sets) +#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level) +#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname) +#define hwloc_get_sysctl HWLOC_NAME(get_sysctl) +#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors) + +#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first) +#define hwloc__reorder_children HWLOC_NAME(_reorder_children) + +#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults) +#define hwloc_topology_clear HWLOC_NAME(topology_clear) + +#define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object) + +#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) +#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) +#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) +#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) +#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality) + +#define hwloc__add_info HWLOC_NAME(_add_info) +#define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) +#define hwloc__move_infos HWLOC_NAME(_move_infos) +#define hwloc__free_infos HWLOC_NAME(_free_infos) + +#define hwloc_binding_hooks HWLOC_NAME(binding_hooks) +#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) +#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks) + +#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks) +#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks) +#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks) +#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks) +#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks) +#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks) +#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks) +#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks) +#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks) + +#define hwloc_look_hardwired_fujitsu_k HWLOC_NAME(look_hardwired_fujitsu_k) +#define hwloc_look_hardwired_fujitsu_fx10 HWLOC_NAME(look_hardwired_fujitsu_fx10) +#define hwloc_look_hardwired_fujitsu_fx100 HWLOC_NAME(look_hardwired_fujitsu_fx100) + +#define hwloc_add_uname_info HWLOC_NAME(add_uname_info) +#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object) +#define hwloc_free_object_and_children HWLOC_NAME(free_object_and_children) +#define hwloc_free_object_siblings_and_children HWLOC_NAME(free_object_siblings_and_children) + +#define hwloc_alloc_heap HWLOC_NAME(alloc_heap) +#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap) +#define hwloc_free_heap HWLOC_NAME(free_heap) +#define hwloc_free_mmap HWLOC_NAME(free_mmap) +#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail) + +#define hwloc_internal_distances_s HWLOC_NAME(internal_distances_s) +#define hwloc_internal_distances_init HWLOC_NAME(internal_distances_init) +#define hwloc_internal_distances_prepare HWLOC_NAME(internal_distances_prepare) +#define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup) +#define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh) +#define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy) + +#define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add) +#define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index) +#define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs) + +#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) +#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) + +#define hwloc_progname HWLOC_NAME(progname) + +#define hwloc__topology_disadopt HWLOC_NAME(_topology_disadopt) +#define hwloc__topology_dup HWLOC_NAME(_topology_dup) + +#define hwloc_tma HWLOC_NAME(tma) +#define hwloc_tma_malloc HWLOC_NAME(tma_malloc) +#define hwloc_tma_calloc HWLOC_NAME(tma_calloc) +#define hwloc_tma_strdup HWLOC_NAME(tma_strdup) +#define hwloc_bitmap_tma_dup HWLOC_NAME(bitmap_tma_dup) + +/* private/solaris-chiptype.h */ + +#define hwloc_solaris_chip_info_s HWLOC_NAME(solaris_chip_info_s) +#define hwloc_solaris_get_chip_info HWLOC_NAME(solaris_get_chip_info) + +#endif /* HWLOC_SYM_TRANSFORM */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_RENAME_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/shmem.h b/src/3rdparty/hwloc/include/hwloc/shmem.h new file mode 100644 index 000000000..222494630 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/shmem.h @@ -0,0 +1,137 @@ +/* + * Copyright © 2013-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Sharing topologies between processes + */ + +#ifndef HWLOC_SHMEM_H +#define HWLOC_SHMEM_H + +#include <hwloc.h> + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + + +/** \defgroup hwlocality_shmem Sharing topologies between processes + * + * These functions are used to share a topology between processes by + * duplicating it into a file-backed shared-memory buffer. + * + * The master process must first get the required shared-memory size + * for storing this topology with hwloc_shmem_topology_get_length(). + * + * Then it must find a virtual memory area of that size that is available + * in all processes (identical virtual addresses in all processes). + * On Linux, this can be done by comparing holes found in /proc/\<pid\>/maps + * for each process. + * + * Once found, it must open a destination file for storing the buffer, + * and pass it to hwloc_shmem_topology_write() together with + * virtual memory address and length obtained above. + * + * Other processes may then adopt this shared topology by opening the + * same file and passing it to hwloc_shmem_topology_adopt() with the + * exact same virtual memory address and length. + * + * @{ + */ + +/** \brief Get the required shared memory length for storing a topology. + * + * This length (in bytes) must be used in hwloc_shmem_topology_write() + * and hwloc_shmem_topology_adopt() later. + * + * \note Flags \p flags are currently unused, must be 0. + */ +HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology, + size_t *lengthp, + unsigned long flags); + +/** \brief Duplicate a topology to a shared memory file. + * + * Temporarily map a file in virtual memory and duplicate the + * topology \p topology by allocating duplicates in there. + * + * The segment of the file pointed by descriptor \p fd, + * starting at offset \p fileoffset, and of length \p length (in bytes), + * will be temporarily mapped at virtual address \p mmap_address + * during the duplication. + * + * The mapping length \p length must have been previously obtained with + * hwloc_shmem_topology_get_length() + * and the topology must not have been modified in the meantime. + * + * \note Flags \p flags are currently unused, must be 0. + * + * \note The object userdata pointer is duplicated but the pointed buffer + * is not. However the caller may also allocate it manually in shared memory + * to share it as well. + * + * \return -1 with errno set to EBUSY if the virtual memory mapping defined + * by \p mmap_address and \p length isn't available in the process. + * \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address + * or \p length aren't page-aligned. + */ +HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology, + int fd, hwloc_uint64_t fileoffset, + void *mmap_address, size_t length, + unsigned long flags); + +/** \brief Adopt a shared memory topology stored in a file. + * + * Map a file in virtual memory and adopt the topology that was previously + * stored there with hwloc_shmem_topology_write(). + * + * The returned adopted topology in \p topologyp can be used just like any + * topology. And it must be destroyed with hwloc_topology_destroy() as usual. + * + * However the topology is read-only. + * For instance, it cannot be modified with hwloc_topology_restrict() + * and object userdata pointers cannot be changed. + * + * The segment of the file pointed by descriptor \p fd, + * starting at offset \p fileoffset, and of length \p length (in bytes), + * will be mapped at virtual address \p mmap_address. + * + * The file pointed by descriptor \p fd, the offset \p fileoffset, + * the requested mapping virtual address \p mmap_address and the length \p length + * must be identical to what was given to hwloc_shmem_topology_write() earlier. + * + * \note Flags \p flags are currently unused, must be 0. + * + * \note The object userdata pointer should not be used unless the process + * that created the shared topology also placed userdata-pointed buffers + * in shared memory. + * + * \note This function takes care of calling hwloc_topology_abi_check(). + * + * \return -1 with errno set to EBUSY if the virtual memory mapping defined + * by \p mmap_address and \p length isn't available in the process. + * + * \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address + * or \p length aren't page-aligned, or do not match what was given to + * hwloc_shmem_topology_write() earlier. + * + * \return -1 with errno set to EINVAL if the layout of the topology structure + * is different between the writer process and the adopter process. + */ +HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, + int fd, hwloc_uint64_t fileoffset, + void *mmap_address, size_t length, + unsigned long flags); +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_SHMEM_H */ diff --git a/src/3rdparty/hwloc/include/private/autogen/config.h b/src/3rdparty/hwloc/include/private/autogen/config.h new file mode 100644 index 000000000..a97bdfea2 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/autogen/config.h @@ -0,0 +1,672 @@ +/* + * Copyright © 2009, 2011, 2012 CNRS. All rights reserved. + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. + * Copyright © 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef HWLOC_CONFIGURE_H +#define HWLOC_CONFIGURE_H + +#define DECLSPEC_EXPORTS + +#define HWLOC_HAVE_MSVC_CPUIDEX 1 + +/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */ +#define HAVE_CACHE_DESCRIPTOR 0 + +/* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */ +#define HAVE_CACHE_RELATIONSHIP 0 + +/* Define to 1 if you have the `clz' function. */ +/* #undef HAVE_CLZ */ + +/* Define to 1 if you have the `clzl' function. */ +/* #undef HAVE_CLZL */ + +/* Define to 1 if you have the <CL/cl_ext.h> header file. */ +/* #undef HAVE_CL_CL_EXT_H */ + +/* Define to 1 if you have the `cpuset_setaffinity' function. */ +/* #undef HAVE_CPUSET_SETAFFINITY */ + +/* Define to 1 if you have the `cpuset_setid' function. */ +/* #undef HAVE_CPUSET_SETID */ + +/* Define to 1 if we have -lcuda */ +/* #undef HAVE_CUDA */ + +/* Define to 1 if you have the <cuda.h> header file. */ +/* #undef HAVE_CUDA_H */ + +/* Define to 1 if you have the <cuda_runtime_api.h> header file. */ +/* #undef HAVE_CUDA_RUNTIME_API_H */ + +/* Define to 1 if you have the declaration of `CL_DEVICE_TOPOLOGY_AMD', and to + 0 if you don't. */ +/* #undef HAVE_DECL_CL_DEVICE_TOPOLOGY_AMD */ + +/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't. + */ +/* #undef HAVE_DECL_CTL_HW */ + +/* Define to 1 if you have the declaration of `fabsf', and to 0 if you don't. + */ +#define HAVE_DECL_FABSF 1 + +/* Define to 1 if you have the declaration of `modff', and to 0 if you don't. + */ +#define HAVE_DECL_MODFF 1 + +/* Define to 1 if you have the declaration of `HW_NCPU', and to 0 if you + don't. */ +/* #undef HAVE_DECL_HW_NCPU */ + +/* Define to 1 if you have the declaration of + `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */ +/* #undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION */ + +/* Define to 1 if you have the declaration of `pthread_getaffinity_np', and to + 0 if you don't. */ +#define HAVE_DECL_PTHREAD_GETAFFINITY_NP 0 + +/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to + 0 if you don't. */ +#define HAVE_DECL_PTHREAD_SETAFFINITY_NP 0 + +/* Define to 1 if you have the declaration of `strtoull', and to 0 if you + don't. */ +#define HAVE_DECL_STRTOULL 0 + +/* Define to 1 if you have the declaration of `strcasecmp', and to 0 if you + don't. */ +/* #undef HWLOC_HAVE_DECL_STRCASECMP */ + +/* Define to 1 if you have the declaration of `snprintf', and to 0 if you + don't. */ +#define HAVE_DECL_SNPRINTF 0 + +/* Define to 1 if you have the declaration of `_strdup', and to 0 if you + don't. */ +#define HAVE_DECL__STRDUP 1 + +/* Define to 1 if you have the declaration of `_putenv', and to 0 if you + don't. */ +#define HAVE_DECL__PUTENV 1 + +/* Define to 1 if you have the declaration of `_SC_LARGE_PAGESIZE', and to 0 + if you don't. */ +#define HAVE_DECL__SC_LARGE_PAGESIZE 0 + +/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_CONF', and to 0 + if you don't. */ +#define HAVE_DECL__SC_NPROCESSORS_CONF 0 + +/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_ONLN', and to 0 + if you don't. */ +#define HAVE_DECL__SC_NPROCESSORS_ONLN 0 + +/* Define to 1 if you have the declaration of `_SC_NPROC_CONF', and to 0 if + you don't. */ +#define HAVE_DECL__SC_NPROC_CONF 0 + +/* Define to 1 if you have the declaration of `_SC_NPROC_ONLN', and to 0 if + you don't. */ +#define HAVE_DECL__SC_NPROC_ONLN 0 + +/* Define to 1 if you have the declaration of `_SC_PAGESIZE', and to 0 if you + don't. */ +#define HAVE_DECL__SC_PAGESIZE 0 + +/* Define to 1 if you have the declaration of `_SC_PAGE_SIZE', and to 0 if you + don't. */ +#define HAVE_DECL__SC_PAGE_SIZE 0 + +/* Define to 1 if you have the <dirent.h> header file. */ +/* #define HAVE_DIRENT_H 1 */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the <dlfcn.h> header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the `ffs' function. */ +/* #undef HAVE_FFS */ + +/* Define to 1 if you have the `ffsl' function. */ +/* #undef HAVE_FFSL */ + +/* Define to 1 if you have the `fls' function. */ +/* #undef HAVE_FLS */ + +/* Define to 1 if you have the `flsl' function. */ +/* #undef HAVE_FLSL */ + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if the system has the type `GROUP_AFFINITY'. */ +#define HAVE_GROUP_AFFINITY 1 + +/* Define to 1 if the system has the type `GROUP_RELATIONSHIP'. */ +#define HAVE_GROUP_RELATIONSHIP 1 + +/* Define to 1 if you have the `host_info' function. */ +/* #undef HAVE_HOST_INFO */ + +/* Define to 1 if you have the <infiniband/verbs.h> header file. */ +/* #undef HAVE_INFINIBAND_VERBS_H */ + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if the system has the type `KAFFINITY'. */ +#define HAVE_KAFFINITY 1 + +/* Define to 1 if you have the <kstat.h> header file. */ +/* #undef HAVE_KSTAT_H */ + +/* Define to 1 if you have the <langinfo.h> header file. */ +/* #undef HAVE_LANGINFO_H */ + +/* Define to 1 if we have -lgdi32 */ +#define HAVE_LIBGDI32 1 + +/* Define to 1 if we have -libverbs */ +/* #undef HAVE_LIBIBVERBS */ + +/* Define to 1 if we have -lkstat */ +/* #undef HAVE_LIBKSTAT */ + +/* Define to 1 if we have -llgrp */ +/* #undef HAVE_LIBLGRP */ + +/* Define to 1 if you have the <locale.h> header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if the system has the type `LOGICAL_PROCESSOR_RELATIONSHIP'. */ +#define HAVE_LOGICAL_PROCESSOR_RELATIONSHIP 1 + +/* Define to 1 if you have the <mach/mach_host.h> header file. */ +/* #undef HAVE_MACH_MACH_HOST_H */ + +/* Define to 1 if you have the <mach/mach_init.h> header file. */ +/* #undef HAVE_MACH_MACH_INIT_H */ + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the `memalign' function. */ +/* #undef HAVE_MEMALIGN */ + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the <numaif.h> header file. */ +/* #undef HAVE_NUMAIF_H */ + +/* Define to 1 if the system has the type `NUMA_NODE_RELATIONSHIP'. */ +#define HAVE_NUMA_NODE_RELATIONSHIP 1 + +/* Define to 1 if you have the <NVCtrl/NVCtrl.h> header file. */ +/* #undef HAVE_NVCTRL_NVCTRL_H */ + +/* Define to 1 if you have the <nvml.h> header file. */ +/* #undef HAVE_NVML_H */ + +/* Define to 1 if you have the `openat' function. */ +/* #undef HAVE_OPENAT */ + +/* Define to 1 if you have the <picl.h> header file. */ +/* #undef HAVE_PICL_H */ + +/* Define to 1 if you have the `posix_memalign' function. */ +/* #undef HAVE_POSIX_MEMALIGN */ + +/* Define to 1 if the system has the type `PROCESSOR_CACHE_TYPE'. */ +#define HAVE_PROCESSOR_CACHE_TYPE 1 + +/* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */ +#define HAVE_PROCESSOR_GROUP_INFO 1 + +/* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */ +#define HAVE_PROCESSOR_RELATIONSHIP 1 + +/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_BLOCK'. */ +/* #undef HAVE_PSAPI_WORKING_SET_EX_BLOCK */ + +/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_INFORMATION'. + */ +/* #undef HAVE_PSAPI_WORKING_SET_EX_INFORMATION */ + +/* Define to 1 if the system has the type `PROCESSOR_NUMBER'. */ +#define HAVE_PROCESSOR_NUMBER 1 + +/* Define to 1 if you have the <pthread_np.h> header file. */ +/* #undef HAVE_PTHREAD_NP_H */ + +/* Define to 1 if the system has the type `pthread_t'. */ +/* #undef HAVE_PTHREAD_T */ +#undef HAVE_PTHREAD_T + +/* Define to 1 if you have the `putwc' function. */ +#define HAVE_PUTWC 1 + +/* Define to 1 if the system has the type `RelationProcessorPackage'. */ +/* #undef HAVE_RELATIONPROCESSORPACKAGE */ + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the <strings.h> header file. */ +/* #define HAVE_STRINGS_H 1*/ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strncasecmp' function. */ +#define HAVE_STRNCASECMP 1 + +/* Define to '1' if sysctl is present and usable */ +/* #undef HAVE_SYSCTL */ + +/* Define to '1' if sysctlbyname is present and usable */ +/* #undef HAVE_SYSCTLBYNAME */ + +/* Define to 1 if the system has the type + `SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */ +#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION 1 + +/* Define to 1 if the system has the type + `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */ +#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1 + +/* Define to 1 if you have the <sys/cpuset.h> header file. */ +/* #undef HAVE_SYS_CPUSET_H */ + +/* Define to 1 if you have the <sys/lgrp_user.h> header file. */ +/* #undef HAVE_SYS_LGRP_USER_H */ + +/* Define to 1 if you have the <sys/mman.h> header file. */ +/* #undef HAVE_SYS_MMAN_H */ + +/* Define to 1 if you have the <sys/param.h> header file. */ +/* #define HAVE_SYS_PARAM_H 1 */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/sysctl.h> header file. */ +/* #undef HAVE_SYS_SYSCTL_H */ + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/utsname.h> header file. */ +/* #undef HAVE_SYS_UTSNAME_H */ + +/* Define to 1 if you have the `uname' function. */ +/* #undef HAVE_UNAME */ + +/* Define to 1 if you have the <unistd.h> header file. */ +/* #define HAVE_UNISTD_H 1 */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `uselocale' function. */ +/* #undef HAVE_USELOCALE */ + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the <X11/keysym.h> header file. */ +/* #undef HAVE_X11_KEYSYM_H */ + +/* Define to 1 if you have the <X11/Xlib.h> header file. */ +/* #undef HAVE_X11_XLIB_H */ + +/* Define to 1 if you have the <X11/Xutil.h> header file. */ +/* #undef HAVE_X11_XUTIL_H */ + +/* Define to 1 if you have the <xlocale.h> header file. */ +/* #undef HAVE_XLOCALE_H */ + +/* Define to 1 on AIX */ +/* #undef HWLOC_AIX_SYS */ + +/* Define to 1 on BlueGene/Q */ +/* #undef HWLOC_BGQ_SYS */ + +/* Whether C compiler supports symbol visibility or not */ +#define HWLOC_C_HAVE_VISIBILITY 0 + +/* Define to 1 on Darwin */ +/* #undef HWLOC_DARWIN_SYS */ + +/* Whether we are in debugging mode or not */ +/* #undef HWLOC_DEBUG */ + +/* Define to 1 on *FREEBSD */ +/* #undef HWLOC_FREEBSD_SYS */ + +/* Whether your compiler has __attribute__ or not */ +/* #define HWLOC_HAVE_ATTRIBUTE 1 */ +#undef HWLOC_HAVE_ATTRIBUTE + +/* Whether your compiler has __attribute__ aligned or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_ALIGNED 1 */ + +/* Whether your compiler has __attribute__ always_inline or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE 1 */ + +/* Whether your compiler has __attribute__ cold or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_COLD 1 */ + +/* Whether your compiler has __attribute__ const or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_CONST 1 */ + +/* Whether your compiler has __attribute__ deprecated or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_DEPRECATED 1 */ + +/* Whether your compiler has __attribute__ format or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_FORMAT 1 */ + +/* Whether your compiler has __attribute__ hot or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_HOT 1 */ + +/* Whether your compiler has __attribute__ malloc or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_MALLOC 1 */ + +/* Whether your compiler has __attribute__ may_alias or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 1 */ + +/* Whether your compiler has __attribute__ nonnull or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NONNULL 1 */ + +/* Whether your compiler has __attribute__ noreturn or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NORETURN 1 */ + +/* Whether your compiler has __attribute__ no_instrument_function or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION 1 */ + +/* Whether your compiler has __attribute__ packed or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_PACKED 1 */ + +/* Whether your compiler has __attribute__ pure or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_PURE 1 */ + +/* Whether your compiler has __attribute__ sentinel or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_SENTINEL 1 */ + +/* Whether your compiler has __attribute__ unused or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_UNUSED 1 */ + +/* Whether your compiler has __attribute__ warn unused result or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT 1 */ + +/* Whether your compiler has __attribute__ weak alias or not */ +/* #define HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS 1 */ + +/* Define to 1 if your `ffs' function is known to be broken. */ +/* #undef HWLOC_HAVE_BROKEN_FFS */ + +/* Define to 1 if you have the `cairo' library. */ +/* #undef HWLOC_HAVE_CAIRO */ + +/* Define to 1 if you have the `clz' function. */ +/* #undef HWLOC_HAVE_CLZ */ + +/* Define to 1 if you have the `clzl' function. */ +/* #undef HWLOC_HAVE_CLZL */ + +/* Define to 1 if you have cpuid */ +/* #undef HWLOC_HAVE_CPUID */ + +/* Define to 1 if the CPU_SET macro works */ +/* #undef HWLOC_HAVE_CPU_SET */ + +/* Define to 1 if the CPU_SET_S macro works */ +/* #undef HWLOC_HAVE_CPU_SET_S */ + +/* Define to 1 if you have the `cudart' SDK. */ +/* #undef HWLOC_HAVE_CUDART */ + +/* Define to 1 if function `clz' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_CLZ */ + +/* Define to 1 if function `clzl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_CLZL */ + +/* Define to 1 if function `ffs' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FFS */ + +/* Define to 1 if function `ffsl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FFSL */ + +/* Define to 1 if function `fls' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FLS */ + +/* Define to 1 if function `flsl' is declared by system headers */ +/* #undef HWLOC_HAVE_DECL_FLSL */ + +/* Define to 1 if you have the `ffs' function. */ +/* #undef HWLOC_HAVE_FFS */ + +/* Define to 1 if you have the `ffsl' function. */ +/* #undef HWLOC_HAVE_FFSL */ + +/* Define to 1 if you have the `fls' function. */ +/* #undef HWLOC_HAVE_FLS */ + +/* Define to 1 if you have the `flsl' function. */ +/* #undef HWLOC_HAVE_FLSL */ + +/* Define to 1 if you have the GL module components. */ +/* #undef HWLOC_HAVE_GL */ + +/* Define to 1 if you have a library providing the termcap interface */ +/* #undef HWLOC_HAVE_LIBTERMCAP */ + +/* Define to 1 if you have the `libxml2' library. */ +/* #undef HWLOC_HAVE_LIBXML2 */ + +/* Define to 1 if building the Linux PCI component */ +/* #undef HWLOC_HAVE_LINUXPCI */ + +/* Define to 1 if you have the `NVML' library. */ +/* #undef HWLOC_HAVE_NVML */ + +/* Define to 1 if glibc provides the old prototype (without length) of + sched_setaffinity() */ +/* #undef HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ + +/* Define to 1 if you have the `OpenCL' library. */ +/* #undef HWLOC_HAVE_OPENCL */ + +/* Define to 1 if the hwloc library should support dynamically-loaded plugins + */ +/* #undef HWLOC_HAVE_PLUGINS */ + +/* `Define to 1 if you have pthread_getthrds_np' */ +/* #undef HWLOC_HAVE_PTHREAD_GETTHRDS_NP */ + +/* Define to 1 if pthread mutexes are available */ +/* #undef HWLOC_HAVE_PTHREAD_MUTEX */ + +/* Define to 1 if glibc provides a prototype of sched_setaffinity() */ +#define HWLOC_HAVE_SCHED_SETAFFINITY 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HWLOC_HAVE_STDINT_H 1 + +/* Define to 1 if you have the `windows.h' header. */ +#define HWLOC_HAVE_WINDOWS_H 1 + +/* Define to 1 if X11 headers including Xutil.h and keysym.h are available. */ +/* #undef HWLOC_HAVE_X11_KEYSYM */ + +/* Define to 1 if function `syscall' is available */ +/* #undef HWLOC_HAVE_SYSCALL */ + +/* Define to 1 on HP-UX */ +/* #undef HWLOC_HPUX_SYS */ + +/* Define to 1 on Linux */ +/* #undef HWLOC_LINUX_SYS */ + +/* Define to 1 on *NETBSD */ +/* #undef HWLOC_NETBSD_SYS */ + +/* The size of `unsigned int', as computed by sizeof */ +#define HWLOC_SIZEOF_UNSIGNED_INT 4 + +/* The size of `unsigned long', as computed by sizeof */ +#define HWLOC_SIZEOF_UNSIGNED_LONG 4 + +/* Define to 1 on Solaris */ +/* #undef HWLOC_SOLARIS_SYS */ + +/* The hwloc symbol prefix */ +#define HWLOC_SYM_PREFIX hwloc_ + +/* The hwloc symbol prefix in all caps */ +#define HWLOC_SYM_PREFIX_CAPS HWLOC_ + +/* Whether we need to re-define all the hwloc public symbols or not */ +#define HWLOC_SYM_TRANSFORM 0 + +/* Define to 1 on unsupported systems */ +/* #undef HWLOC_UNSUPPORTED_SYS */ + +/* Define to 1 if ncurses works, preferred over curses */ +/* #undef HWLOC_USE_NCURSES */ + +/* Define to 1 on WINDOWS */ +#define HWLOC_WIN_SYS 1 + +/* Define to 1 on x86_32 */ +/* #undef HWLOC_X86_32_ARCH */ + +/* Define to 1 on x86_64 */ +#define HWLOC_X86_64_ARCH 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "hwloc" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "hwloc" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "hwloc" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "hwloc" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION HWLOC_VERSION + +/* The size of `unsigned int', as computed by sizeof. */ +#define SIZEOF_UNSIGNED_INT 4 + +/* The size of `unsigned long', as computed by sizeof. */ +#define SIZEOF_UNSIGNED_LONG 4 + +/* The size of `void *', as computed by sizeof. */ +#define SIZEOF_VOID_P 8 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable extensions on HP-UX. */ +#ifndef _HPUX_SOURCE +# define _HPUX_SOURCE 1 +#endif + + +/* Enable extensions on AIX 3, Interix. */ +/* +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +*/ + +/* Enable GNU extensions on systems that have them. */ +/* +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +*/ +/* Enable threading extensions on Solaris. */ +/* +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +*/ +/* Enable extensions on HP NonStop. */ +/* +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +*/ +/* Enable general extensions on Solaris. */ +/* +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +*/ + + +/* Version number of package */ +#define VERSION HWLOC_VERSION + +/* Define to 1 if the X Window System is missing or not being used. */ +#define X_DISPLAY_MISSING 1 + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define this to the process ID type */ +#define hwloc_pid_t HANDLE + +/* Define this to either strncasecmp or strncmp */ +#define hwloc_strncasecmp strncasecmp + +/* Define this to the thread ID type */ +#define hwloc_thread_t HANDLE + + +#endif /* HWLOC_CONFIGURE_H */ diff --git a/src/3rdparty/hwloc/include/private/components.h b/src/3rdparty/hwloc/include/private/components.h new file mode 100644 index 000000000..8525bbe46 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/components.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2012-2015 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + + +#ifdef HWLOC_INSIDE_PLUGIN +/* + * these declarations are internal only, they are not available to plugins + * (many functions below are internal static symbols). + */ +#error This file should not be used in plugins +#endif + + +#ifndef PRIVATE_COMPONENTS_H +#define PRIVATE_COMPONENTS_H 1 + +#include <hwloc/plugins.h> + +struct hwloc_topology; + +extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology, + int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */ + int type, const char *name, + const void *data1, const void *data2, const void *data3); +extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology); + +/* Compute the topology is_thissystem flag and find some callbacks based on enabled backends */ +extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology); +extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology); + +/* Initialize the list of backends used by a topology */ +extern void hwloc_backends_init(struct hwloc_topology *topology); +/* Disable and destroy all backends used by a topology */ +extern void hwloc_backends_disable_all(struct hwloc_topology *topology); + +/* Used by the core to setup/destroy the list of components */ +extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */ +extern void hwloc_components_fini(void); /* decreases components refcount, should be called exactly once per topology (during destroy) */ + +#endif /* PRIVATE_COMPONENTS_H */ + diff --git a/src/3rdparty/hwloc/include/private/cpuid-x86.h b/src/3rdparty/hwloc/include/private/cpuid-x86.h new file mode 100644 index 000000000..2758afe04 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/cpuid-x86.h @@ -0,0 +1,86 @@ +/* + * Copyright © 2010-2012, 2014 Université Bordeaux + * Copyright © 2010 Cisco Systems, Inc. All rights reserved. + * Copyright © 2014 Inria. All rights reserved. + * + * See COPYING in top-level directory. + */ + +/* Internals for x86's cpuid. */ + +#ifndef HWLOC_PRIVATE_CPUID_X86_H +#define HWLOC_PRIVATE_CPUID_X86_H + +#if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX) +static __hwloc_inline int hwloc_have_x86_cpuid(void) +{ + int ret; + unsigned tmp, tmp2; + __asm__( + "mov $0,%0\n\t" /* Not supported a priori */ + + "pushfl \n\t" /* Save flags */ + + "pushfl \n\t" \ + "pop %1 \n\t" /* Get flags */ \ + +#define TRY_TOGGLE \ + "xor $0x00200000,%1\n\t" /* Try to toggle ID */ \ + "mov %1,%2\n\t" /* Save expected value */ \ + "push %1 \n\t" \ + "popfl \n\t" /* Try to toggle */ \ + "pushfl \n\t" \ + "pop %1 \n\t" \ + "cmp %1,%2\n\t" /* Compare with expected value */ \ + "jnz 0f\n\t" /* Unexpected, failure */ \ + + TRY_TOGGLE /* Try to set/clear */ + TRY_TOGGLE /* Try to clear/set */ + + "mov $1,%0\n\t" /* Passed the test! */ + + "0: \n\t" + "popfl \n\t" /* Restore flags */ + + : "=r" (ret), "=&r" (tmp), "=&r" (tmp2)); + return ret; +} +#endif /* !defined HWLOC_X86_32_ARCH && !defined HWLOC_HAVE_MSVC_CPUIDEX*/ +#if (defined HWLOC_X86_64_ARCH) || (defined HWLOC_HAVE_MSVC_CPUIDEX) +static __hwloc_inline int hwloc_have_x86_cpuid(void) { return 1; } +#endif /* HWLOC_X86_64_ARCH */ + +static __hwloc_inline void hwloc_x86_cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) +{ +#ifdef HWLOC_HAVE_MSVC_CPUIDEX + int regs[4]; + __cpuidex(regs, *eax, *ecx); + *eax = regs[0]; + *ebx = regs[1]; + *ecx = regs[2]; + *edx = regs[3]; +#else /* HWLOC_HAVE_MSVC_CPUIDEX */ + /* Note: gcc might want to use bx or the stack for %1 addressing, so we can't + * use them :/ */ +#ifdef HWLOC_X86_64_ARCH + hwloc_uint64_t sav_rbx; + __asm__( + "mov %%rbx,%2\n\t" + "cpuid\n\t" + "xchg %2,%%rbx\n\t" + "movl %k2,%1\n\t" + : "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx), + "+c" (*ecx), "=&d" (*edx)); +#elif defined(HWLOC_X86_32_ARCH) + __asm__( + "mov %%ebx,%1\n\t" + "cpuid\n\t" + "xchg %%ebx,%1\n\t" + : "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx)); +#else +#error unknown architecture +#endif +#endif /* HWLOC_HAVE_MSVC_CPUIDEX */ +} + +#endif /* HWLOC_PRIVATE_X86_CPUID_H */ diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h new file mode 100644 index 000000000..74b697db4 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/debug.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009, 2011 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* The configuration file */ + +#ifndef HWLOC_DEBUG_H +#define HWLOC_DEBUG_H + +#include <private/autogen/config.h> +#include <private/misc.h> + +#ifdef HWLOC_DEBUG +#include <stdarg.h> +#include <stdio.h> +#endif + +/* Compile-time assertion */ +#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) + +#ifdef HWLOC_DEBUG +static __hwloc_inline int hwloc_debug_enabled(void) +{ + static int checked = 0; + static int enabled = 1; + if (!checked) { + const char *env = getenv("HWLOC_DEBUG_VERBOSE"); + if (env) + enabled = atoi(env); + if (enabled) + fprintf(stderr, "hwloc verbose debug enabled, may be disabled with HWLOC_DEBUG_VERBOSE=0 in the environment.\n"); + checked = 1; + } + return enabled; +} +#endif + +static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) __hwloc_attribute_format(printf, 1, 2); +static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) +{ +#ifdef HWLOC_DEBUG + if (hwloc_debug_enabled()) { + va_list ap; + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + } +#endif +} + +#ifdef HWLOC_DEBUG +#define hwloc_debug_bitmap(fmt, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, s); \ + free(s); \ +} } while (0) +#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, arg1, s); \ + free(s); \ +} } while (0) +#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ +if (hwloc_debug_enabled()) { \ + char *s; \ + hwloc_bitmap_asprintf(&s, bitmap); \ + fprintf(stderr, fmt, arg1, arg2, s); \ + free(s); \ +} } while (0) +#else +#define hwloc_debug_bitmap(s, bitmap) do { } while(0) +#define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0) +#define hwloc_debug_2args_bitmap(s, arg1, arg2, bitmap) do { } while(0) +#endif + +#endif /* HWLOC_DEBUG_H */ diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h new file mode 100644 index 000000000..b138a0eb9 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2018 Inria. All rights reserved. + * + * See COPYING in top-level directory. + */ + +/* List of components defined inside hwloc */ + +#ifndef PRIVATE_INTERNAL_COMPONENTS_H +#define PRIVATE_INTERNAL_COMPONENTS_H + +/* global discovery */ +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component; + +/* CPU discovery */ +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_aix_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_bgq_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_darwin_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_freebsd_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_hpux_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linux_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_netbsd_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_solaris_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; + +/* I/O discovery */ +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; + +/* XML backend */ +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_libxml_component; + +#endif /* PRIVATE_INTERNAL_COMPONENTS_H */ diff --git a/src/3rdparty/hwloc/include/private/misc.h b/src/3rdparty/hwloc/include/private/misc.h new file mode 100644 index 000000000..66608bc79 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/misc.h @@ -0,0 +1,583 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* Misc macros and inlines. */ + +#ifndef HWLOC_PRIVATE_MISC_H +#define HWLOC_PRIVATE_MISC_H + +#include <hwloc/autogen/config.h> +#include <private/autogen/config.h> +#include <hwloc.h> + +#ifdef HWLOC_HAVE_DECL_STRNCASECMP +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#else +#ifdef HAVE_CTYPE_H +#include <ctype.h> +#endif +#endif + +#define HWLOC_BITS_PER_LONG (HWLOC_SIZEOF_UNSIGNED_LONG * 8) +#define HWLOC_BITS_PER_INT (HWLOC_SIZEOF_UNSIGNED_INT * 8) + +#if (HWLOC_BITS_PER_LONG != 32) && (HWLOC_BITS_PER_LONG != 64) +#error "unknown size for unsigned long." +#endif + +#if (HWLOC_BITS_PER_INT != 16) && (HWLOC_BITS_PER_INT != 32) && (HWLOC_BITS_PER_INT != 64) +#error "unknown size for unsigned int." +#endif + +/* internal-use-only value for when we don't know the type or don't have any value */ +#define HWLOC_OBJ_TYPE_NONE ((hwloc_obj_type_t) -1) + +/** + * ffsl helpers. + */ + +#if defined(HWLOC_HAVE_BROKEN_FFS) + +/* System has a broken ffs(). + * We must check the before __GNUC__ or HWLOC_HAVE_FFSL + */ +# define HWLOC_NO_FFS + +#elif defined(__GNUC__) + +# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) + /* Starting from 3.4, gcc has a long variant. */ +# define hwloc_ffsl(x) __builtin_ffsl(x) +# else +# define hwloc_ffs(x) __builtin_ffs(x) +# define HWLOC_NEED_FFSL +# endif + +#elif defined(HWLOC_HAVE_FFSL) + +# ifndef HWLOC_HAVE_DECL_FFSL +extern int ffsl(long) __hwloc_attribute_const; +# endif + +# define hwloc_ffsl(x) ffsl(x) + +#elif defined(HWLOC_HAVE_FFS) + +# ifndef HWLOC_HAVE_DECL_FFS +extern int ffs(int) __hwloc_attribute_const; +# endif + +# define hwloc_ffs(x) ffs(x) +# define HWLOC_NEED_FFSL + +#else /* no ffs implementation */ + +# define HWLOC_NO_FFS + +#endif + +#ifdef HWLOC_NO_FFS + +/* no ffs or it is known to be broken */ +static __hwloc_inline int +hwloc_ffsl_manual(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_ffsl_manual(unsigned long x) +{ + int i; + + if (!x) + return 0; + + i = 1; +#if HWLOC_BITS_PER_LONG >= 64 + if (!(x & 0xfffffffful)) { + x >>= 32; + i += 32; + } +#endif + if (!(x & 0xffffu)) { + x >>= 16; + i += 16; + } + if (!(x & 0xff)) { + x >>= 8; + i += 8; + } + if (!(x & 0xf)) { + x >>= 4; + i += 4; + } + if (!(x & 0x3)) { + x >>= 2; + i += 2; + } + if (!(x & 0x1)) { + x >>= 1; + i += 1; + } + + return i; +} +/* always define hwloc_ffsl as a macro, to avoid renaming breakage */ +#define hwloc_ffsl hwloc_ffsl_manual + +#elif defined(HWLOC_NEED_FFSL) + +/* We only have an int ffs(int) implementation, build a long one. */ + +/* First make it 32 bits if it was only 16. */ +static __hwloc_inline int +hwloc_ffs32(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_ffs32(unsigned long x) +{ +#if HWLOC_BITS_PER_INT == 16 + int low_ffs, hi_ffs; + + low_ffs = hwloc_ffs(x & 0xfffful); + if (low_ffs) + return low_ffs; + + hi_ffs = hwloc_ffs(x >> 16); + if (hi_ffs) + return hi_ffs + 16; + + return 0; +#else + return hwloc_ffs(x); +#endif +} + +/* Then make it 64 bit if longs are. */ +static __hwloc_inline int +hwloc_ffsl_from_ffs32(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_ffsl_from_ffs32(unsigned long x) +{ +#if HWLOC_BITS_PER_LONG == 64 + int low_ffs, hi_ffs; + + low_ffs = hwloc_ffs32(x & 0xfffffffful); + if (low_ffs) + return low_ffs; + + hi_ffs = hwloc_ffs32(x >> 32); + if (hi_ffs) + return hi_ffs + 32; + + return 0; +#else + return hwloc_ffs32(x); +#endif +} +/* always define hwloc_ffsl as a macro, to avoid renaming breakage */ +#define hwloc_ffsl hwloc_ffsl_from_ffs32 + +#endif + +/** + * flsl helpers. + */ +#ifdef __GNUC_____ + +# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) +# define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0) +# else +# define hwloc_fls(x) ((x) ? (8*sizeof(int) - __builtin_clz(x)) : 0) +# define HWLOC_NEED_FLSL +# endif + +#elif defined(HWLOC_HAVE_FLSL) + +# ifndef HWLOC_HAVE_DECL_FLSL +extern int flsl(long) __hwloc_attribute_const; +# endif + +# define hwloc_flsl(x) flsl(x) + +#elif defined(HWLOC_HAVE_CLZL) + +# ifndef HWLOC_HAVE_DECL_CLZL +extern int clzl(long) __hwloc_attribute_const; +# endif + +# define hwloc_flsl(x) ((x) ? (8*sizeof(long) - clzl(x)) : 0) + +#elif defined(HWLOC_HAVE_FLS) + +# ifndef HWLOC_HAVE_DECL_FLS +extern int fls(int) __hwloc_attribute_const; +# endif + +# define hwloc_fls(x) fls(x) +# define HWLOC_NEED_FLSL + +#elif defined(HWLOC_HAVE_CLZ) + +# ifndef HWLOC_HAVE_DECL_CLZ +extern int clz(int) __hwloc_attribute_const; +# endif + +# define hwloc_fls(x) ((x) ? (8*sizeof(int) - clz(x)) : 0) +# define HWLOC_NEED_FLSL + +#else /* no fls implementation */ + +static __hwloc_inline int +hwloc_flsl_manual(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_flsl_manual(unsigned long x) +{ + int i = 0; + + if (!x) + return 0; + + i = 1; +#if HWLOC_BITS_PER_LONG >= 64 + if ((x & 0xffffffff00000000ul)) { + x >>= 32; + i += 32; + } +#endif + if ((x & 0xffff0000u)) { + x >>= 16; + i += 16; + } + if ((x & 0xff00)) { + x >>= 8; + i += 8; + } + if ((x & 0xf0)) { + x >>= 4; + i += 4; + } + if ((x & 0xc)) { + x >>= 2; + i += 2; + } + if ((x & 0x2)) { + x >>= 1; + i += 1; + } + + return i; +} +/* always define hwloc_flsl as a macro, to avoid renaming breakage */ +#define hwloc_flsl hwloc_flsl_manual + +#endif + +#ifdef HWLOC_NEED_FLSL + +/* We only have an int fls(int) implementation, build a long one. */ + +/* First make it 32 bits if it was only 16. */ +static __hwloc_inline int +hwloc_fls32(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_fls32(unsigned long x) +{ +#if HWLOC_BITS_PER_INT == 16 + int low_fls, hi_fls; + + hi_fls = hwloc_fls(x >> 16); + if (hi_fls) + return hi_fls + 16; + + low_fls = hwloc_fls(x & 0xfffful); + if (low_fls) + return low_fls; + + return 0; +#else + return hwloc_fls(x); +#endif +} + +/* Then make it 64 bit if longs are. */ +static __hwloc_inline int +hwloc_flsl_from_fls32(unsigned long x) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_flsl_from_fls32(unsigned long x) +{ +#if HWLOC_BITS_PER_LONG == 64 + int low_fls, hi_fls; + + hi_fls = hwloc_fls32(x >> 32); + if (hi_fls) + return hi_fls + 32; + + low_fls = hwloc_fls32(x & 0xfffffffful); + if (low_fls) + return low_fls; + + return 0; +#else + return hwloc_fls32(x); +#endif +} +/* always define hwloc_flsl as a macro, to avoid renaming breakage */ +#define hwloc_flsl hwloc_flsl_from_fls32 + +#endif + +static __hwloc_inline int +hwloc_weight_long(unsigned long w) __hwloc_attribute_const; +static __hwloc_inline int +hwloc_weight_long(unsigned long w) +{ +#if HWLOC_BITS_PER_LONG == 32 +#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4) + return __builtin_popcount(w); +#else + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); +#endif +#else /* HWLOC_BITS_PER_LONG == 32 */ +#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4) + return __builtin_popcountll(w); +#else + unsigned long res; + res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful); + res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul); + res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul); + return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul); +#endif +#endif /* HWLOC_BITS_PER_LONG == 64 */ +} + +#if !HAVE_DECL_STRTOULL && defined(HAVE_STRTOULL) +unsigned long long int strtoull(const char *nptr, char **endptr, int base); +#endif + +static __hwloc_inline int hwloc_strncasecmp(const char *s1, const char *s2, size_t n) +{ +#ifdef HWLOC_HAVE_DECL_STRNCASECMP + return strncasecmp(s1, s2, n); +#else + while (n) { + char c1 = tolower(*s1), c2 = tolower(*s2); + if (!c1 || !c2 || c1 != c2) + return c1-c2; + n--; s1++; s2++; + } + return 0; +#endif +} + +static __hwloc_inline hwloc_obj_type_t hwloc_cache_type_by_depth_type(unsigned depth, hwloc_obj_cache_type_t type) +{ + if (type == HWLOC_OBJ_CACHE_INSTRUCTION) { + if (depth >= 1 && depth <= 3) + return HWLOC_OBJ_L1ICACHE + depth-1; + else + return HWLOC_OBJ_TYPE_NONE; + } else { + if (depth >= 1 && depth <= 5) + return HWLOC_OBJ_L1CACHE + depth-1; + else + return HWLOC_OBJ_TYPE_NONE; + } +} + +#define HWLOC_BITMAP_EQUAL 0 /* Bitmaps are equal */ +#define HWLOC_BITMAP_INCLUDED 1 /* First bitmap included in second */ +#define HWLOC_BITMAP_CONTAINS 2 /* First bitmap contains second */ +#define HWLOC_BITMAP_INTERSECTS 3 /* Bitmaps intersect without any inclusion */ +#define HWLOC_BITMAP_DIFFERENT 4 /* Bitmaps do not intersect */ + +/* Compare bitmaps \p bitmap1 and \p bitmap2 from an inclusion point of view. */ +HWLOC_DECLSPEC int hwloc_bitmap_compare_inclusion(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; + +/* Return a stringified PCI class. */ +HWLOC_DECLSPEC extern const char * hwloc_pci_class_string(unsigned short class_id); + +/* Parse a PCI link speed (GT/s) string from Linux sysfs */ +#ifdef HWLOC_LINUX_SYS +#include <stdlib.h> /* for atof() */ +static __hwloc_inline float +hwloc_linux_pci_link_speed_from_string(const char *string) +{ + /* don't parse Gen1 with atof() since it expects a localized string + * while the kernel sysfs files aren't. + */ + if (!strncmp(string, "2.5 ", 4)) + /* "2.5 GT/s" is Gen1 with 8/10 encoding */ + return 2.5 * .8; + + /* also hardwire Gen2 since it also has a specific encoding */ + if (!strncmp(string, "5 ", 2)) + /* "5 GT/s" is Gen2 with 8/10 encoding */ + return 5 * .8; + + /* handle Gen3+ in a generic way */ + return atof(string) * 128./130; /* Gen3+ encoding is 128/130 */ +} +#endif + +/* Traverse children of a parent */ +#define for_each_child(child, parent) for(child = parent->first_child; child; child = child->next_sibling) +#define for_each_memory_child(child, parent) for(child = parent->memory_first_child; child; child = child->next_sibling) +#define for_each_io_child(child, parent) for(child = parent->io_first_child; child; child = child->next_sibling) +#define for_each_misc_child(child, parent) for(child = parent->misc_first_child; child; child = child->next_sibling) + +/* Any object attached to normal children */ +static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return type <= HWLOC_OBJ_GROUP; +} + +/* Any object attached to memory children, currently only NUMA nodes */ +static __hwloc_inline int hwloc__obj_type_is_memory (hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return type == HWLOC_OBJ_NUMANODE; +} + +/* I/O or Misc object, without cpusets or nodesets. */ +static __hwloc_inline int hwloc__obj_type_is_special (hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_MISC; +} + +/* Any object attached to io children */ +static __hwloc_inline int hwloc__obj_type_is_io (hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE; +} + +static __hwloc_inline int +hwloc__obj_type_is_cache(hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return (type >= HWLOC_OBJ_L1CACHE && type <= HWLOC_OBJ_L3ICACHE); +} + +static __hwloc_inline int +hwloc__obj_type_is_dcache(hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return (type >= HWLOC_OBJ_L1CACHE && type <= HWLOC_OBJ_L5CACHE); +} + +/** \brief Check whether an object is a Instruction Cache. */ +static __hwloc_inline int +hwloc__obj_type_is_icache(hwloc_obj_type_t type) +{ + /* type contiguity is asserted in topology_check() */ + return (type >= HWLOC_OBJ_L1ICACHE && type <= HWLOC_OBJ_L3ICACHE); +} + +#ifdef HAVE_USELOCALE +#include "locale.h" +#ifdef HAVE_XLOCALE_H +#include "xlocale.h" +#endif +#define hwloc_localeswitch_declare locale_t __old_locale = (locale_t)0, __new_locale +#define hwloc_localeswitch_init() do { \ + __new_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0); \ + if (__new_locale != (locale_t)0) \ + __old_locale = uselocale(__new_locale); \ +} while (0) +#define hwloc_localeswitch_fini() do { \ + if (__new_locale != (locale_t)0) { \ + uselocale(__old_locale); \ + freelocale(__new_locale); \ + } \ +} while(0) +#else /* HAVE_USELOCALE */ +#if __HWLOC_HAVE_ATTRIBUTE_UNUSED +#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused +#define hwloc_localeswitch_init() +#else +#define hwloc_localeswitch_declare int __dummy_nolocale +#define hwloc_localeswitch_init() (void)__dummy_nolocale +#endif +#define hwloc_localeswitch_fini() +#endif /* HAVE_USELOCALE */ + +#if !HAVE_DECL_FABSF +#define fabsf(f) fabs((double)(f)) +#endif + +#if !HAVE_DECL_MODFF +#define modff(x,iptr) (float)modf((double)x,(double *)iptr) +#endif + +#if HAVE_DECL__SC_PAGE_SIZE +#define hwloc_getpagesize() sysconf(_SC_PAGE_SIZE) +#elif HAVE_DECL__SC_PAGESIZE +#define hwloc_getpagesize() sysconf(_SC_PAGESIZE) +#elif defined HAVE_GETPAGESIZE +#define hwloc_getpagesize() getpagesize() +#else +#undef hwloc_getpagesize +#endif + +#if HWLOC_HAVE_ATTRIBUTE_FORMAT +# define __hwloc_attribute_format(type, str, arg) __attribute__((__format__(type, str, arg))) +#else +# define __hwloc_attribute_format(type, str, arg) +#endif + +#define hwloc_memory_size_printf_value(_size, _verbose) \ + ((_size) < (10ULL<<20) || (_verbose) ? (((_size)>>9)+1)>>1 : (_size) < (10ULL<<30) ? (((_size)>>19)+1)>>1 : (_size) < (10ULL<<40) ? (((_size)>>29)+1)>>1 : (((_size)>>39)+1)>>1) +#define hwloc_memory_size_printf_unit(_size, _verbose) \ + ((_size) < (10ULL<<20) || (_verbose) ? "KB" : (_size) < (10ULL<<30) ? "MB" : (_size) < (10ULL<<40) ? "GB" : "TB") + +#ifdef HWLOC_WIN_SYS +# ifndef HAVE_SSIZE_T +typedef SSIZE_T ssize_t; +# endif +# if !HAVE_DECL_STRTOULL && !defined(HAVE_STRTOULL) +# define strtoull _strtoui64 +# endif +# ifndef S_ISREG +# define S_ISREG(m) ((m) & S_IFREG) +# endif +# ifndef S_ISDIR +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif +# ifndef S_IRWXU +# define S_IRWXU 00700 +# endif +# ifndef HWLOC_HAVE_DECL_STRCASECMP +# define strcasecmp _stricmp +# endif +# if !HAVE_DECL_SNPRINTF +# define snprintf _snprintf +# endif +# if HAVE_DECL__STRDUP +# define strdup _strdup +# endif +# if HAVE_DECL__PUTENV +# define putenv _putenv +# endif +#endif + +#if defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined(__CYGWIN__) +/* MSVC doesn't support C99 variable-length array */ +#include <malloc.h> +#define HWLOC_VLA(_type, _name, _nb) _type *_name = (_type*) _alloca((_nb)*sizeof(_type)) +#else +#define HWLOC_VLA(_type, _name, _nb) _type _name[_nb] +#endif + +#endif /* HWLOC_PRIVATE_MISC_H */ diff --git a/src/3rdparty/hwloc/include/private/netloc.h b/src/3rdparty/hwloc/include/private/netloc.h new file mode 100644 index 000000000..c070c54cc --- /dev/null +++ b/src/3rdparty/hwloc/include/private/netloc.h @@ -0,0 +1,578 @@ +/* + * Copyright © 2014 Cisco Systems, Inc. All rights reserved. + * Copyright © 2013-2014 University of Wisconsin-La Crosse. + * All rights reserved. + * Copyright © 2015-2017 Inria. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * See COPYING in top-level directory. + * + * $HEADER$ + */ + +#ifndef _NETLOC_PRIVATE_H_ +#define _NETLOC_PRIVATE_H_ + +#include <hwloc.h> +#include <netloc.h> +#include <netloc/uthash.h> +#include <netloc/utarray.h> +#include <private/autogen/config.h> + +#define NETLOCFILE_VERSION 1 + +#ifdef NETLOC_SCOTCH +#include <stdint.h> +#include <scotch.h> +#define NETLOC_int SCOTCH_Num +#else +#define NETLOC_int int +#endif + +/* + * "Import" a few things from hwloc + */ +#define __netloc_attribute_unused __hwloc_attribute_unused +#define __netloc_attribute_malloc __hwloc_attribute_malloc +#define __netloc_attribute_const __hwloc_attribute_const +#define __netloc_attribute_pure __hwloc_attribute_pure +#define __netloc_attribute_deprecated __hwloc_attribute_deprecated +#define __netloc_attribute_may_alias __hwloc_attribute_may_alias +#define NETLOC_DECLSPEC HWLOC_DECLSPEC + + +/********************************************************************** + * Types + **********************************************************************/ + +/** + * Definitions for Comparators + * \sa These are the return values from the following functions: + * netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare + */ +typedef enum { + NETLOC_CMP_SAME = 0, /**< Compared as the Same */ + NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */ + NETLOC_CMP_DIFF = -2 /**< Compared as Different */ +} netloc_compare_type_t; + +/** + * Enumerated type for the various types of supported networks + */ +typedef enum { + NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */ + NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */ + NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */ +} netloc_network_type_t; + +/** + * Enumerated type for the various types of supported topologies + */ +typedef enum { + NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */ + NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */ +} netloc_topology_type_t; + +/** + * Enumerated type for the various types of nodes + */ +typedef enum { + NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */ + NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */ + NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */ +} netloc_node_type_t; + +typedef enum { + NETLOC_ARCH_TREE = 0, /* Fat tree */ +} netloc_arch_type_t; + + +/* Pre declarations to avoid inter dependency problems */ +/** \cond IGNORE */ +struct netloc_topology_t; +typedef struct netloc_topology_t netloc_topology_t; +struct netloc_node_t; +typedef struct netloc_node_t netloc_node_t; +struct netloc_edge_t; +typedef struct netloc_edge_t netloc_edge_t; +struct netloc_physical_link_t; +typedef struct netloc_physical_link_t netloc_physical_link_t; +struct netloc_path_t; +typedef struct netloc_path_t netloc_path_t; + +struct netloc_arch_tree_t; +typedef struct netloc_arch_tree_t netloc_arch_tree_t; +struct netloc_arch_node_t; +typedef struct netloc_arch_node_t netloc_arch_node_t; +struct netloc_arch_node_slot_t; +typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t; +struct netloc_arch_t; +typedef struct netloc_arch_t netloc_arch_t; +/** \endcond */ + +/** + * \struct netloc_topology_t + * \brief Netloc Topology Context + * + * An opaque data structure used to reference a network topology. + * + * \note Must be initialized with \ref netloc_topology_construct() + */ +struct netloc_topology_t { + /** Topology path */ + char *topopath; + /** Subnet ID */ + char *subnet_id; + + /** Node List */ + netloc_node_t *nodes; /* Hash table of nodes by physical_id */ + netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */ + + netloc_physical_link_t *physical_links; /* Hash table with physcial links */ + + /** Partition List */ + UT_array *partitions; + + /** Hwloc topology List */ + char *hwlocpath; + UT_array *topos; + hwloc_topology_t *hwloc_topos; + + /** Type of the graph */ + netloc_topology_type_t type; +}; + +/** + * \brief Netloc Node Type + * + * Represents the concept of a node (a.k.a., vertex, endpoint) within a network + * graph. This could be a server or a network switch. The \ref node_type parameter + * will distinguish the exact type of node this represents in the graph. + */ +struct netloc_node_t { + UT_hash_handle hh; /* makes this structure hashable with physical_id */ + UT_hash_handle hh2; /* makes this structure hashable with hostname */ + + /** Physical ID of the node */ + char physical_id[20]; + + /** Logical ID of the node (if any) */ + int logical_id; + + /** Type of the node */ + netloc_node_type_t type; + + /* Pointer to physical_links */ + UT_array *physical_links; + + /** Description information from discovery (if any) */ + char *description; + + /** + * Application-given private data pointer. + * Initialized to NULL, and not used by the netloc library. + */ + void * userdata; + + /** Outgoing edges from this node */ + netloc_edge_t *edges; + + UT_array *subnodes; /* the group of nodes for the virtual nodes */ + + netloc_path_t *paths; + + char *hostname; + + UT_array *partitions; /* index in the list from the topology */ + + hwloc_topology_t hwlocTopo; + int hwlocTopoIdx; +}; + +/** + * \brief Netloc Edge Type + * + * Represents the concept of a directed edge within a network graph. + * + * \note We do not point to the netloc_node_t structure directly to + * simplify the representation, and allow the information to more easily + * be entered into the data store without circular references. + * \todo JJH Is the note above still true? + */ +struct netloc_edge_t { + UT_hash_handle hh; /* makes this structure hashable */ + + netloc_node_t *dest; + + int id; + + /** Pointers to the parent node */ + netloc_node_t *node; + + /* Pointer to physical_links */ + UT_array *physical_links; + + /** total gbits of the links */ + float total_gbits; + + UT_array *partitions; /* index in the list from the topology */ + + UT_array *subnode_edges; /* for edges going to virtual nodes */ + + struct netloc_edge_t *other_way; + + /** + * Application-given private data pointer. + * Initialized to NULL, and not used by the netloc library. + */ + void * userdata; +}; + + +struct netloc_physical_link_t { + UT_hash_handle hh; /* makes this structure hashable */ + + int id; // TODO long long + netloc_node_t *src; + netloc_node_t *dest; + int ports[2]; + char *width; + char *speed; + + netloc_edge_t *edge; + + int other_way_id; + struct netloc_physical_link_t *other_way; + + UT_array *partitions; /* index in the list from the topology */ + + /** gbits of the link from speed and width */ + float gbits; + + /** Description information from discovery (if any) */ + char *description; +}; + +struct netloc_path_t { + UT_hash_handle hh; /* makes this structure hashable */ + char dest_id[20]; + UT_array *links; +}; + + +/********************************************************************** + * Architecture structures + **********************************************************************/ +struct netloc_arch_tree_t { + NETLOC_int num_levels; + NETLOC_int *degrees; + NETLOC_int *cost; +}; + +struct netloc_arch_node_t { + UT_hash_handle hh; /* makes this structure hashable */ + char *name; /* Hash key */ + netloc_node_t *node; /* Corresponding node */ + int idx_in_topo; /* idx with ghost hosts to have complete topo */ + int num_slots; /* it is not the real number of slots but the maximum slot idx */ + int *slot_idx; /* corresponding idx in slot_tree */ + int *slot_os_idx; /* corresponding os index for each leaf in tree */ + netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */ + int num_current_slots; /* Number of PUs */ + NETLOC_int *current_slots; /* indices in the complete tree */ + int *slot_ranks; /* corresponding MPI rank for each leaf in tree */ +}; + +struct netloc_arch_node_slot_t { + netloc_arch_node_t *node; + int slot; +}; + +struct netloc_arch_t { + netloc_topology_t *topology; + int has_slots; /* if slots are included in the architecture */ + netloc_arch_type_t type; + union { + netloc_arch_tree_t *node_tree; + netloc_arch_tree_t *global_tree; + } arch; + netloc_arch_node_t *nodes_by_name; + netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */ + NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */ + NETLOC_int *current_hosts; /* indices in the complete topology */ +}; + +/********************************************************************** + * Topology Functions + **********************************************************************/ +/** + * Allocate a topology handle. + * + * User is responsible for calling \ref netloc_detach on the topology handle. + * The network parameter information is deep copied into the topology handle, so the + * user may destruct the network handle after calling this function and/or reuse + * the network handle. + * + * \returns NETLOC_SUCCESS on success + * \returns NETLOC_ERROR upon an error. + */ +netloc_topology_t *netloc_topology_construct(char *path); + +/** + * Destruct a topology handle + * + * \param topology A valid pointer to a \ref netloc_topology_t handle created + * from a prior call to \ref netloc_topology_construct. + * + * \returns NETLOC_SUCCESS on success + * \returns NETLOC_ERROR upon an error. + */ +int netloc_topology_destruct(netloc_topology_t *topology); + +int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name); + +int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes, + netloc_node_t **node_list); + +#define netloc_topology_iter_partitions(topology,partition) \ + for ((partition) = (char **)utarray_front(topology->partitions); \ + (partition) != NULL; \ + (partition) = (char **)utarray_next(topology->partitions, partition)) + +#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \ + for ((hwloctopo) = (char **)utarray_front(topology->topos); \ + (hwloctopo) != NULL; \ + (hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo)) + +#define netloc_topology_find_node(topology,node_id,node) \ + HASH_FIND_STR(topology->nodes, node_id, node) + +#define netloc_topology_iter_nodes(topology,node,_tmp) \ + HASH_ITER(hh, topology->nodes, node, _tmp) + +#define netloc_topology_num_nodes(topology) \ + HASH_COUNT(topology->nodes) + +/*************************************************/ + + +/** + * Constructor for netloc_node_t + * + * User is responsible for calling the destructor on the handle. + * + * Returns + * A newly allocated pointer to the network information. + */ +netloc_node_t *netloc_node_construct(void); + +/** + * Destructor for netloc_node_t + * + * \param node A valid node handle + * + * Returns + * NETLOC_SUCCESS on success + * NETLOC_ERROR on error + */ +int netloc_node_destruct(netloc_node_t *node); + +char *netloc_node_pretty_print(netloc_node_t* node); + +#define netloc_node_get_num_subnodes(node) \ + utarray_len((node)->subnodes) + +#define netloc_node_get_subnode(node,i) \ + (*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i))) + +#define netloc_node_get_num_edges(node) \ + utarray_len((node)->edges) + +#define netloc_node_get_edge(node,i) \ + (*(netloc_edge_t **)utarray_eltptr((node)->edges, (i))) + +#define netloc_node_iter_edges(node,edge,_tmp) \ + HASH_ITER(hh, node->edges, edge, _tmp) + +#define netloc_node_iter_paths(node,path,_tmp) \ + HASH_ITER(hh, node->paths, path, _tmp) + +#define netloc_node_is_host(node) \ + (node->type == NETLOC_NODE_TYPE_HOST) + +#define netloc_node_is_switch(node) \ + (node->type == NETLOC_NODE_TYPE_SWITCH) + +#define netloc_node_iter_paths(node, path,_tmp) \ + HASH_ITER(hh, node->paths, path, _tmp) + +int netloc_node_is_in_partition(netloc_node_t *node, int partition); + +/*************************************************/ + + +/** + * Constructor for netloc_edge_t + * + * User is responsible for calling the destructor on the handle. + * + * Returns + * A newly allocated pointer to the edge information. + */ +netloc_edge_t *netloc_edge_construct(void); + +/** + * Destructor for netloc_edge_t + * + * \param edge A valid edge handle + * + * Returns + * NETLOC_SUCCESS on success + * NETLOC_ERROR on error + */ +int netloc_edge_destruct(netloc_edge_t *edge); + +char * netloc_edge_pretty_print(netloc_edge_t* edge); + +void netloc_edge_reset_uid(void); + +int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition); + +#define netloc_edge_get_num_links(edge) \ + utarray_len((edge)->physical_links) + +#define netloc_edge_get_link(edge,i) \ + (*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i))) + +#define netloc_edge_get_num_subedges(edge) \ + utarray_len((edge)->subnode_edges) + +#define netloc_edge_get_subedge(edge,i) \ + (*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i))) + +/*************************************************/ + + +/** + * Constructor for netloc_physical_link_t + * + * User is responsible for calling the destructor on the handle. + * + * Returns + * A newly allocated pointer to the physical link information. + */ +netloc_physical_link_t * netloc_physical_link_construct(void); + +/** + * Destructor for netloc_physical_link_t + * + * Returns + * NETLOC_SUCCESS on success + * NETLOC_ERROR on error + */ +int netloc_physical_link_destruct(netloc_physical_link_t *link); + +char * netloc_link_pretty_print(netloc_physical_link_t* link); + +/*************************************************/ + + +netloc_path_t *netloc_path_construct(void); +int netloc_path_destruct(netloc_path_t *path); + + +/********************************************************************** + * Architecture functions + **********************************************************************/ + +netloc_arch_t * netloc_arch_construct(void); + +int netloc_arch_destruct(netloc_arch_t *arch); + +int netloc_arch_build(netloc_arch_t *arch, int add_slots); + +int netloc_arch_set_current_resources(netloc_arch_t *arch); + +int netloc_arch_set_global_resources(netloc_arch_t *arch); + +int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch); + +void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level, + int num_hosts, int **parch_idx); + +NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree); + + +/********************************************************************** + * Access functions of various elements of the topology + **********************************************************************/ + +#define netloc_get_num_partitions(object) \ + utarray_len((object)->partitions) + +#define netloc_get_partition(object,i) \ + (*(int *)utarray_eltptr((object)->partitions, (i))) + + +#define netloc_path_iter_links(path,link) \ + for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \ + (link) != NULL; \ + (link) = (netloc_physical_link_t **)utarray_next(path->links, link)) + +/********************************************************************** + * Misc functions + **********************************************************************/ + +/** + * Decode the network type + * + * \param net_type A valid member of the \ref netloc_network_type_t type + * + * \returns NULL if the type is invalid + * \returns A string for that \ref netloc_network_type_t type + */ +static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) { + if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) { + return "ETH"; + } + else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) { + return "IB"; + } + else { + return NULL; + } +} + +/** + * Decode the node type + * + * \param node_type A valid member of the \ref netloc_node_type_t type + * + * \returns NULL if the type is invalid + * \returns A string for that \ref netloc_node_type_t type + */ +static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) { + if( NETLOC_NODE_TYPE_SWITCH == node_type ) { + return "SW"; + } + else if( NETLOC_NODE_TYPE_HOST == node_type ) { + return "CA"; + } + else { + return NULL; + } +} + +ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream); + +char *netloc_line_get_next_token(char **string, char c); + +int netloc_build_comm_mat(char *filename, int *pn, double ***pmat); + +#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str)) +#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str) + + +#endif // _NETLOC_PRIVATE_H_ diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h new file mode 100644 index 000000000..8e3964ab2 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/private.h @@ -0,0 +1,417 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * + * See COPYING in top-level directory. + */ + +/* Internal types and helpers. */ + + +#ifdef HWLOC_INSIDE_PLUGIN +/* + * these declarations are internal only, they are not available to plugins + * (many functions below are internal static symbols). + */ +#error This file should not be used in plugins +#endif + + +#ifndef HWLOC_PRIVATE_H +#define HWLOC_PRIVATE_H + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <hwloc/bitmap.h> +#include <private/components.h> +#include <private/misc.h> +#include <sys/types.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#ifdef HAVE_SYS_UTSNAME_H +#include <sys/utsname.h> +#endif +#include <string.h> + +#define HWLOC_TOPOLOGY_ABI 0x20000 /* version of the layout of struct topology */ + +/***************************************************** + * WARNING: + * changes below in this structure (and its children) + * should cause a bump of HWLOC_TOPOLOGY_ABI. + *****************************************************/ + +struct hwloc_topology { + unsigned topology_abi; + + unsigned nb_levels; /* Number of horizontal levels */ + unsigned nb_levels_allocated; /* Number of levels allocated and zeroed in level_nbobjects and levels below */ + unsigned *level_nbobjects; /* Number of objects on each horizontal level */ + struct hwloc_obj ***levels; /* Direct access to levels, levels[l = 0 .. nblevels-1][0..level_nbobjects[l]] */ + unsigned long flags; + int type_depth[HWLOC_OBJ_TYPE_MAX]; + enum hwloc_type_filter_e type_filter[HWLOC_OBJ_TYPE_MAX]; + int is_thissystem; + int is_loaded; + int modified; /* >0 if objects were added/removed recently, which means a reconnect is needed */ + hwloc_pid_t pid; /* Process ID the topology is view from, 0 for self */ + void *userdata; + uint64_t next_gp_index; + + void *adopted_shmem_addr; + size_t adopted_shmem_length; + +#define HWLOC_NR_SLEVELS 5 +#define HWLOC_SLEVEL_NUMANODE 0 +#define HWLOC_SLEVEL_BRIDGE 1 +#define HWLOC_SLEVEL_PCIDEV 2 +#define HWLOC_SLEVEL_OSDEV 3 +#define HWLOC_SLEVEL_MISC 4 + /* order must match negative depth, it's asserted in setup_defaults() */ +#define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) +#define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_NUMANODE-(x)) + struct hwloc_special_level_s { + unsigned nbobjs; + struct hwloc_obj **objs; + struct hwloc_obj *first, *last; /* Temporarily used while listing object before building the objs array */ + } slevels[HWLOC_NR_SLEVELS]; + + hwloc_bitmap_t allowed_cpuset; + hwloc_bitmap_t allowed_nodeset; + + struct hwloc_binding_hooks { + int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); + int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); + int (*get_thisthread_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + int (*set_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags); + int (*get_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); +#ifdef hwloc_thread_t + int (*set_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_cpuset_t set, int flags); + int (*get_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_cpuset_t set, int flags); +#endif + + int (*get_thisproc_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + int (*get_thisthread_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); + int (*get_proc_last_cpu_location)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); + + int (*set_thisproc_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + int (*get_thisproc_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + int (*set_thisthread_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + int (*get_thisthread_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + int (*set_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + int (*get_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + int (*set_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + int (*get_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags); + int (*get_area_memlocation)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags); + /* This has to return the same kind of pointer as alloc_membind, so that free_membind can be used on it */ + void *(*alloc)(hwloc_topology_t topology, size_t len); + /* alloc_membind has to always succeed if !(flags & HWLOC_MEMBIND_STRICT). + * see hwloc_alloc_or_fail which is convenient for that. */ + void *(*alloc_membind)(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); + int (*free_membind)(hwloc_topology_t topology, void *addr, size_t len); + + int (*get_allowed_resources)(hwloc_topology_t topology); + } binding_hooks; + + struct hwloc_topology_support support; + + void (*userdata_export_cb)(void *reserved, struct hwloc_topology *topology, struct hwloc_obj *obj); + void (*userdata_import_cb)(struct hwloc_topology *topology, struct hwloc_obj *obj, const char *name, const void *buffer, size_t length); + int userdata_not_decoded; + + struct hwloc_internal_distances_s { + hwloc_obj_type_t type; + /* add union hwloc_obj_attr_u if we ever support groups */ + unsigned nbobjs; + uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */ + uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array. + * distance from i to j is stored in slot i*nbnodes+j. + */ + unsigned long kind; + + /* objects are currently stored in physical_index order */ + hwloc_obj_t *objs; /* array of objects */ + int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */ + + unsigned id; /* to match the container id field of public distances structure */ + struct hwloc_internal_distances_s *prev, *next; + } *first_dist, *last_dist; + unsigned next_dist_id; + + int grouping; + int grouping_verbose; + unsigned grouping_nbaccuracies; + float grouping_accuracies[5]; + unsigned grouping_next_subkind; + + /* list of enabled backends. */ + struct hwloc_backend * backends; + struct hwloc_backend * get_pci_busid_cpuset_backend; + unsigned backend_excludes; + + /* memory allocator for topology objects */ + struct hwloc_tma * tma; + +/***************************************************** + * WARNING: + * changes above in this structure (and its children) + * should cause a bump of HWLOC_TOPOLOGY_ABI. + *****************************************************/ + + /* + * temporary variables during discovery + */ + + /* machine-wide memory. + * temporarily stored there by OSes that only provide this without NUMA information, + * and actually used later by the core. + */ + struct hwloc_numanode_attr_s machine_memory; + + /* pci stuff */ + int need_pci_belowroot_apply_locality; + int pci_has_forced_locality; + unsigned pci_forced_locality_nr; + struct hwloc_pci_forced_locality_s { + unsigned domain; + unsigned bus_first, bus_last; + hwloc_bitmap_t cpuset; + } * pci_forced_locality; + +}; + +extern void hwloc_alloc_root_sets(hwloc_obj_t root); +extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus); +extern int hwloc_get_sysctlbyname(const char *name, int64_t *n); +extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n); +extern int hwloc_fallback_nbprocessors(struct hwloc_topology *topology); + +extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2); +extern void hwloc__reorder_children(hwloc_obj_t parent); + +extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology); +extern void hwloc_topology_clear(struct hwloc_topology *topology); + +/* insert memory object as memory child of normal parent */ +extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, + hwloc_obj_t obj, + hwloc_report_error_t report_error); + +extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); +extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); +extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); + +/* Look for an object matching complete cpuset exactly, or insert one. + * Return NULL on failure. + * Return a good fallback (object above) on failure to insert. + */ +extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset); + +/* Move PCI objects currently attached to the root object ot their actual location. + * Called by the core at the end of hwloc_topology_load(). + * Prior to this call, all PCI objects may be found below the root object. + * After this call and a reconnect of levels, all PCI objects are available through levels. + */ +extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology); + +extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); +extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); +extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); +extern void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count); + +/* set native OS binding hooks */ +extern void hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support); +/* set either native OS binding hooks (if thissystem), or dummy ones */ +extern void hwloc_set_binding_hooks(struct hwloc_topology *topology); + +#if defined(HWLOC_LINUX_SYS) +extern void hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_LINUX_SYS */ + +#if defined(HWLOC_BGQ_SYS) +extern void hwloc_set_bgq_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_BGQ_SYS */ + +#ifdef HWLOC_SOLARIS_SYS +extern void hwloc_set_solaris_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_SOLARIS_SYS */ + +#ifdef HWLOC_AIX_SYS +extern void hwloc_set_aix_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_AIX_SYS */ + +#ifdef HWLOC_WIN_SYS +extern void hwloc_set_windows_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_WIN_SYS */ + +#ifdef HWLOC_DARWIN_SYS +extern void hwloc_set_darwin_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_DARWIN_SYS */ + +#ifdef HWLOC_FREEBSD_SYS +extern void hwloc_set_freebsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_FREEBSD_SYS */ + +#ifdef HWLOC_NETBSD_SYS +extern void hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_NETBSD_SYS */ + +#ifdef HWLOC_HPUX_SYS +extern void hwloc_set_hpux_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support); +#endif /* HWLOC_HPUX_SYS */ + +extern int hwloc_look_hardwired_fujitsu_k(struct hwloc_topology *topology); +extern int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology); +extern int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology); + +/* Insert uname-specific names/values in the object infos array. + * If cached_uname isn't NULL, it is used as a struct utsname instead of recalling uname. + * Any field that starts with \0 is ignored. + */ +extern void hwloc_add_uname_info(struct hwloc_topology *topology, void *cached_uname); + +/* Free obj and its attributes assuming it's not linked to a parent and doesn't have any child */ +extern void hwloc_free_unlinked_object(hwloc_obj_t obj); + +/* Free obj and its children, assuming it's not linked to a parent */ +extern void hwloc_free_object_and_children(hwloc_obj_t obj); + +/* Free obj, its next siblings, and their children, assuming they're not linked to a parent */ +extern void hwloc_free_object_siblings_and_children(hwloc_obj_t obj); + +/* This can be used for the alloc field to get allocated data that can be freed by free() */ +void *hwloc_alloc_heap(hwloc_topology_t topology, size_t len); + +/* This can be used for the alloc field to get allocated data that can be freed by munmap() */ +void *hwloc_alloc_mmap(hwloc_topology_t topology, size_t len); + +/* This can be used for the free_membind field to free data using free() */ +int hwloc_free_heap(hwloc_topology_t topology, void *addr, size_t len); + +/* This can be used for the free_membind field to free data using munmap() */ +int hwloc_free_mmap(hwloc_topology_t topology, void *addr, size_t len); + +/* Allocates unbound memory or fail, depending on whether STRICT is requested + * or not */ +static __hwloc_inline void * +hwloc_alloc_or_fail(hwloc_topology_t topology, size_t len, int flags) +{ + if (flags & HWLOC_MEMBIND_STRICT) + return NULL; + return hwloc_alloc(topology, len); +} + +extern void hwloc_internal_distances_init(hwloc_topology_t topology); +extern void hwloc_internal_distances_prepare(hwloc_topology_t topology); +extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); +extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); +extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); +extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); +extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); + +/* encode src buffer into target buffer. + * targsize must be at least 4*((srclength+2)/3)+1. + * target will be 0-terminated. + */ +extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize); +/* decode src buffer into target buffer. + * src is 0-terminated. + * targsize must be at least srclength*3/4+1 (srclength not including \0) + * but only srclength*3/4 characters will be meaningful + * (the next one may be partially written during decoding, but it should be ignored). + */ +extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize); + +/* Check whether needle matches the beginning of haystack, at least n, and up + * to a colon or \0 */ +extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n); + +/* On some systems, snprintf returns the size of written data, not the actually + * required size. hwloc_snprintf always report the actually required size. */ +extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4); + +/* Return the name of the currently running program, if supported. + * If not NULL, must be freed by the caller. + */ +extern char * hwloc_progname(struct hwloc_topology *topology); + +/* obj->attr->group.kind internal values. + * the core will keep the smallest ones when merging two groups, + * that's why user-given kinds are first. + */ +/* first, user-given groups, should remain as long as possible */ +#define HWLOC_GROUP_KIND_USER 0 /* user-given, user may use subkind too */ +#define HWLOC_GROUP_KIND_SYNTHETIC 10 /* subkind is group depth within synthetic description */ +/* then, hardware-specific groups */ +#define HWLOC_GROUP_KIND_INTEL_KNL_SUBNUMA_CLUSTER 100 /* no subkind */ +#define HWLOC_GROUP_KIND_INTEL_EXTTOPOENUM_UNKNOWN 101 /* subkind is unknown level */ +#define HWLOC_GROUP_KIND_INTEL_MODULE 102 /* no subkind */ +#define HWLOC_GROUP_KIND_INTEL_TILE 103 /* no subkind */ +#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ +#define HWLOC_GROUP_KIND_S390_BOOK 110 /* no subkind */ +#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ +/* then, OS-specific groups */ +#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ +#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */ +#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */ +#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */ +/* distance groups */ +#define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */ +/* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */ +#define HWLOC_GROUP_KIND_IO 1000 /* no subkind */ +#define HWLOC_GROUP_KIND_MEMORY 1001 /* no subkind */ + +/* memory allocator for topology objects */ +struct hwloc_tma { + void * (*malloc)(struct hwloc_tma *, size_t); + void *data; + int dontfree; /* when set, free() or realloc() cannot be used, and tma->malloc() cannot fail */ +}; + +static __hwloc_inline void * +hwloc_tma_malloc(struct hwloc_tma *tma, + size_t size) +{ + if (tma) { + return tma->malloc(tma, size); + } else { + return malloc(size); + } +} + +static __hwloc_inline void * +hwloc_tma_calloc(struct hwloc_tma *tma, + size_t size) +{ + char *ptr = hwloc_tma_malloc(tma, size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} + +static __hwloc_inline char * +hwloc_tma_strdup(struct hwloc_tma *tma, + const char *src) +{ + size_t len = strlen(src); + char *ptr = hwloc_tma_malloc(tma, len+1); + if (ptr) + memcpy(ptr, src, len+1); + return ptr; +} + +/* bitmap allocator to be used inside hwloc */ +extern hwloc_bitmap_t hwloc_bitmap_tma_dup(struct hwloc_tma *tma, hwloc_const_bitmap_t old); + +extern int hwloc__topology_dup(hwloc_topology_t *newp, hwloc_topology_t old, struct hwloc_tma *tma); +extern void hwloc__topology_disadopt(hwloc_topology_t topology); + +#endif /* HWLOC_PRIVATE_H */ diff --git a/src/3rdparty/hwloc/include/private/solaris-chiptype.h b/src/3rdparty/hwloc/include/private/solaris-chiptype.h new file mode 100644 index 000000000..4ad2130a0 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/solaris-chiptype.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2009-2010 Oracle and/or its affiliates. All rights reserved. + * + * Copyright © 2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifdef HWLOC_INSIDE_PLUGIN +/* + * these declarations are internal only, they are not available to plugins + * (functions below are internal static symbols). + */ +#error This file should not be used in plugins +#endif + + +#ifndef HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H +#define HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H + +struct hwloc_solaris_chip_info_s { + char *model; + char *type; + /* L1i, L1d, L2, L3 */ +#define HWLOC_SOLARIS_CHIP_INFO_L1I 0 +#define HWLOC_SOLARIS_CHIP_INFO_L1D 1 +#define HWLOC_SOLARIS_CHIP_INFO_L2I 2 +#define HWLOC_SOLARIS_CHIP_INFO_L2D 3 +#define HWLOC_SOLARIS_CHIP_INFO_L3 4 + long cache_size[5]; /* cleared to -1 if we don't want of that cache */ + unsigned cache_linesize[5]; + unsigned cache_associativity[5]; + int l2_unified; +}; + +/* fills the structure with 0 on error */ +extern void hwloc_solaris_get_chip_info(struct hwloc_solaris_chip_info_s *info); + +#endif /* HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H */ diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h new file mode 100644 index 000000000..7c73384d9 --- /dev/null +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -0,0 +1,108 @@ +/* + * Copyright © 2009-2019 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#ifndef PRIVATE_XML_H +#define PRIVATE_XML_H 1 + +#include <hwloc.h> + +#include <sys/types.h> + +HWLOC_DECLSPEC int hwloc__xml_verbose(void); + +/************** + * XML import * + **************/ + +typedef struct hwloc__xml_import_state_s { + struct hwloc__xml_import_state_s *parent; + + /* globals shared because the entire stack of states during import */ + struct hwloc_xml_backend_data_s *global; + + /* opaque data used to store backend-specific data. + * statically allocated to allow stack-allocation by the common code without knowing actual backend needs. + */ + char data[32]; +} * hwloc__xml_import_state_t; + +struct hwloc__xml_imported_v1distances_s { + unsigned long kind; + unsigned nbobjs; + float *floats; + struct hwloc__xml_imported_v1distances_s *prev, *next; +}; + +HWLOC_DECLSPEC int hwloc__xml_import_diff(hwloc__xml_import_state_t state, hwloc_topology_diff_t *firstdiffp); + +struct hwloc_xml_backend_data_s { + /* xml backend parameters */ + int (*look_init)(struct hwloc_xml_backend_data_s *bdata, struct hwloc__xml_import_state_s *state); + void (*look_done)(struct hwloc_xml_backend_data_s *bdata, int result); + void (*backend_exit)(struct hwloc_xml_backend_data_s *bdata); + int (*next_attr)(struct hwloc__xml_import_state_s * state, char **namep, char **valuep); + int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp); + int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag </name> */ + void (*close_child)(struct hwloc__xml_import_state_s * state); + int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ + void (*close_content)(struct hwloc__xml_import_state_s * state); + char * msgprefix; + void *data; /* libxml2 doc, or nolibxml buffer */ + unsigned version_major, version_minor; + unsigned nbnumanodes; + hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */ + struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist; + int dont_merge_die_groups; +}; + +/************** + * XML export * + **************/ + +typedef struct hwloc__xml_export_state_s { + struct hwloc__xml_export_state_s *parent; + + void (*new_child)(struct hwloc__xml_export_state_s *parentstate, struct hwloc__xml_export_state_s *state, const char *name); + void (*new_prop)(struct hwloc__xml_export_state_s *state, const char *name, const char *value); + void (*add_content)(struct hwloc__xml_export_state_s *state, const char *buffer, size_t length); + void (*end_object)(struct hwloc__xml_export_state_s *state, const char *name); + + struct hwloc__xml_export_data_s { + hwloc_obj_t v1_memory_group; /* if we need to insert intermediate group above memory children when exporting to v1 */ + } *global; + + /* opaque data used to store backend-specific data. + * statically allocated to allow stack-allocation by the common code without knowing actual backend needs. + */ + char data[40]; +} * hwloc__xml_export_state_t; + +HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags); + +HWLOC_DECLSPEC void hwloc__xml_export_diff(hwloc__xml_export_state_t parentstate, hwloc_topology_diff_t diff); + +/****************** + * XML components * + ******************/ + +struct hwloc_xml_callbacks { + int (*backend_init)(struct hwloc_xml_backend_data_s *bdata, const char *xmlpath, const char *xmlbuffer, int xmlbuflen); + int (*export_file)(struct hwloc_topology *topology, struct hwloc__xml_export_data_s *edata, const char *filename, unsigned long flags); + int (*export_buffer)(struct hwloc_topology *topology, struct hwloc__xml_export_data_s *edata, char **xmlbuffer, int *buflen, unsigned long flags); + void (*free_buffer)(void *xmlbuffer); + int (*import_diff)(struct hwloc__xml_import_state_s *state, const char *xmlpath, const char *xmlbuffer, int xmlbuflen, hwloc_topology_diff_t *diff, char **refnamep); + int (*export_diff_file)(union hwloc_topology_diff_u *diff, const char *refname, const char *filename); + int (*export_diff_buffer)(union hwloc_topology_diff_u *diff, const char *refname, char **xmlbuffer, int *buflen); +}; + +struct hwloc_xml_component { + struct hwloc_xml_callbacks *nolibxml_callbacks; + struct hwloc_xml_callbacks *libxml_callbacks; +}; + +HWLOC_DECLSPEC void hwloc_xml_callbacks_register(struct hwloc_xml_component *component); +HWLOC_DECLSPEC void hwloc_xml_callbacks_reset(void); + +#endif /* PRIVATE_XML_H */ diff --git a/src/3rdparty/hwloc/src/base64.c b/src/3rdparty/hwloc/src/base64.c new file mode 100644 index 000000000..7b3e12101 --- /dev/null +++ b/src/3rdparty/hwloc/src/base64.c @@ -0,0 +1,309 @@ +/* + * Copyright © 2012-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + * + * Modifications after import: + * - removed all #if + * - updated prototypes + * - updated #include + */ + +/* include hwloc's config before anything else + * so that extensions and features are properly enabled + */ +#include <private/private.h> + +/* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */ + +/* + * Copyright (c) 1996 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS + * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE + * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +/* + * Portions Copyright (c) 1995 by International Business Machines, Inc. + * + * International Business Machines, Inc. (hereinafter called IBM) grants + * permission under its copyrights to use, copy, modify, and distribute this + * Software with or without fee, provided that the above copyright notice and + * all paragraphs of this notice appear in all copies, and that the name of IBM + * not be used in connection with the marketing of any product incorporating + * the Software or modifications thereof, without specific, written prior + * permission. + * + * To the extent it has a right to do so, IBM grants an immunity from suit + * under its patents, if any, for the use, sale or manufacture of products to + * the extent that such products are used for performing Domain Name System + * dynamic updates in TCP/IP networks by means of the Software. No immunity is + * granted for any product per se or for any other function of any product. + * + * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, + * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN + * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +/* OPENBSD ORIGINAL: lib/libc/net/base64.c */ + +static const char Base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char Pad64 = '='; + +/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt) + The following encoding technique is taken from RFC 1521 by Borenstein + and Freed. It is reproduced here in a slightly edited form for + convenience. + + A 65-character subset of US-ASCII is used, enabling 6 bits to be + represented per printable character. (The extra 65th character, "=", + is used to signify a special processing function.) + + The encoding process represents 24-bit groups of input bits as output + strings of 4 encoded characters. Proceeding from left to right, a + 24-bit input group is formed by concatenating 3 8-bit input groups. + These 24 bits are then treated as 4 concatenated 6-bit groups, each + of which is translated into a single digit in the base64 alphabet. + + Each 6-bit group is used as an index into an array of 64 printable + characters. The character referenced by the index is placed in the + output string. + + Table 1: The Base64 Alphabet + + Value Encoding Value Encoding Value Encoding Value Encoding + 0 A 17 R 34 i 51 z + 1 B 18 S 35 j 52 0 + 2 C 19 T 36 k 53 1 + 3 D 20 U 37 l 54 2 + 4 E 21 V 38 m 55 3 + 5 F 22 W 39 n 56 4 + 6 G 23 X 40 o 57 5 + 7 H 24 Y 41 p 58 6 + 8 I 25 Z 42 q 59 7 + 9 J 26 a 43 r 60 8 + 10 K 27 b 44 s 61 9 + 11 L 28 c 45 t 62 + + 12 M 29 d 46 u 63 / + 13 N 30 e 47 v + 14 O 31 f 48 w (pad) = + 15 P 32 g 49 x + 16 Q 33 h 50 y + + Special processing is performed if fewer than 24 bits are available + at the end of the data being encoded. A full encoding quantum is + always completed at the end of a quantity. When fewer than 24 input + bits are available in an input group, zero bits are added (on the + right) to form an integral number of 6-bit groups. Padding at the + end of the data is performed using the '=' character. + + Since all base64 input is an integral number of octets, only the + ------------------------------------------------- + following cases can arise: + + (1) the final quantum of encoding input is an integral + multiple of 24 bits; here, the final unit of encoded + output will be an integral multiple of 4 characters + with no "=" padding, + (2) the final quantum of encoding input is exactly 8 bits; + here, the final unit of encoded output will be two + characters followed by two "=" padding characters, or + (3) the final quantum of encoding input is exactly 16 bits; + here, the final unit of encoded output will be three + characters followed by one "=" padding character. + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +int +hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize) +{ + size_t datalength = 0; + unsigned char input[3]; + unsigned char output[4]; + unsigned int i; + + while (2 < srclength) { + input[0] = *src++; + input[1] = *src++; + input[2] = *src++; + srclength -= 3; + + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + output[3] = input[2] & 0x3f; + + if (datalength + 4 > targsize) + return (-1); + target[datalength++] = Base64[output[0]]; + target[datalength++] = Base64[output[1]]; + target[datalength++] = Base64[output[2]]; + target[datalength++] = Base64[output[3]]; + } + + /* Now we worry about padding. */ + if (0 != srclength) { + /* Get what's left. */ + input[0] = input[1] = input[2] = '\0'; + for (i = 0; i < srclength; i++) + input[i] = *src++; + + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + + if (datalength + 4 > targsize) + return (-1); + target[datalength++] = Base64[output[0]]; + target[datalength++] = Base64[output[1]]; + if (srclength == 1) + target[datalength++] = Pad64; + else + target[datalength++] = Base64[output[2]]; + target[datalength++] = Pad64; + } + if (datalength >= targsize) + return (-1); + target[datalength] = '\0'; /* Returned value doesn't count \0. */ + return (int)(datalength); +} + +/* skips all whitespace anywhere. + converts characters, four at a time, starting at (or after) + src from base - 64 numbers into three 8 bit bytes in the target area. + it returns the number of data bytes stored at the target, or -1 on error. + */ + +int +hwloc_decode_from_base64(char const *src, char *target, size_t targsize) +{ + unsigned int tarindex, state; + int ch; + char *pos; + + state = 0; + tarindex = 0; + + while ((ch = *src++) != '\0') { + if (isspace(ch)) /* Skip whitespace anywhere. */ + continue; + + if (ch == Pad64) + break; + + pos = strchr(Base64, ch); + if (pos == 0) /* A non-base64 character. */ + return (-1); + + switch (state) { + case 0: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] = (char)(pos - Base64) << 2; + } + state = 1; + break; + case 1: + if (target) { + if (tarindex + 1 >= targsize) + return (-1); + target[tarindex] |= (pos - Base64) >> 4; + target[tarindex+1] = ((pos - Base64) & 0x0f) + << 4 ; + } + tarindex++; + state = 2; + break; + case 2: + if (target) { + if (tarindex + 1 >= targsize) + return (-1); + target[tarindex] |= (pos - Base64) >> 2; + target[tarindex+1] = ((pos - Base64) & 0x03) + << 6; + } + tarindex++; + state = 3; + break; + case 3: + if (target) { + if (tarindex >= targsize) + return (-1); + target[tarindex] |= (pos - Base64); + } + tarindex++; + state = 0; + break; + } + } + + /* + * We are done decoding Base-64 chars. Let's see if we ended + * on a byte boundary, and/or with erroneous trailing characters. + */ + + if (ch == Pad64) { /* We got a pad char. */ + ch = *src++; /* Skip it, get next. */ + switch (state) { + case 0: /* Invalid = in first position */ + case 1: /* Invalid = in second position */ + return (-1); + + case 2: /* Valid, means one byte of info */ + /* Skip any number of spaces. */ + for (; ch != '\0'; ch = *src++) + if (!isspace(ch)) + break; + /* Make sure there is another trailing = sign. */ + if (ch != Pad64) + return (-1); + ch = *src++; /* Skip the = */ + /* Fall through to "single trailing =" case. */ + /* FALLTHROUGH */ + + case 3: /* Valid, means two bytes of info */ + /* + * We know this char is an =. Is there anything but + * whitespace after it? + */ + for (; ch != '\0'; ch = *src++) + if (!isspace(ch)) + return (-1); + + /* + * Now make sure for cases 2 and 3 that the "extra" + * bits that slopped past the last full byte were + * zeros. If we don't check them, they become a + * subliminal channel. + */ + if (target && target[tarindex] != 0) + return (-1); + } + } else { + /* + * We ended by seeing the end of the string. Make sure we + * have no partial bytes lying around. + */ + if (state != 0) + return (-1); + } + + return (tarindex); +} diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c new file mode 100644 index 000000000..b3457bc76 --- /dev/null +++ b/src/3rdparty/hwloc/src/bind.c @@ -0,0 +1,922 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2010, 2012 Université Bordeaux + * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <hwloc/helper.h> +#ifdef HAVE_SYS_MMAN_H +# include <sys/mman.h> +#endif +/* <malloc.h> is only needed if we don't have posix_memalign() */ +#if defined(hwloc_getpagesize) && !defined(HAVE_POSIX_MEMALIGN) && defined(HAVE_MEMALIGN) && defined(HAVE_MALLOC_H) +#include <malloc.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <stdlib.h> +#include <errno.h> + +/* TODO: HWLOC_GNU_SYS, + * + * We could use glibc's sched_setaffinity generically when it is available + * + * Darwin and OpenBSD don't seem to have binding facilities. + */ + +#define HWLOC_CPUBIND_ALLFLAGS (HWLOC_CPUBIND_PROCESS|HWLOC_CPUBIND_THREAD|HWLOC_CPUBIND_STRICT|HWLOC_CPUBIND_NOMEMBIND) + +static hwloc_const_bitmap_t +hwloc_fix_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set) +{ + hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology); + hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology); + + if (hwloc_bitmap_iszero(set)) { + errno = EINVAL; + return NULL; + } + + if (!hwloc_bitmap_isincluded(set, complete_set)) { + errno = EINVAL; + return NULL; + } + + if (hwloc_bitmap_isincluded(topology_set, set)) + set = complete_set; + + return set; +} + +int +hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + set = hwloc_fix_cpubind(topology, set); + if (!set) + return -1; + + if (flags & HWLOC_CPUBIND_PROCESS) { + if (topology->binding_hooks.set_thisproc_cpubind) + return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); + } else if (flags & HWLOC_CPUBIND_THREAD) { + if (topology->binding_hooks.set_thisthread_cpubind) + return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); + } else { + if (topology->binding_hooks.set_thisproc_cpubind) { + int err = topology->binding_hooks.set_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_cpubind) + return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags); + } + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_cpubind(hwloc_topology_t topology, hwloc_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (flags & HWLOC_CPUBIND_PROCESS) { + if (topology->binding_hooks.get_thisproc_cpubind) + return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); + } else if (flags & HWLOC_CPUBIND_THREAD) { + if (topology->binding_hooks.get_thisthread_cpubind) + return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); + } else { + if (topology->binding_hooks.get_thisproc_cpubind) { + int err = topology->binding_hooks.get_thisproc_cpubind(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_cpubind) + return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags); + } + + errno = ENOSYS; + return -1; +} + +int +hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + set = hwloc_fix_cpubind(topology, set); + if (!set) + return -1; + + if (topology->binding_hooks.set_proc_cpubind) + return topology->binding_hooks.set_proc_cpubind(topology, pid, set, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_proc_cpubind) + return topology->binding_hooks.get_proc_cpubind(topology, pid, set, flags); + + errno = ENOSYS; + return -1; +} + +#ifdef hwloc_thread_t +int +hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + set = hwloc_fix_cpubind(topology, set); + if (!set) + return -1; + + if (topology->binding_hooks.set_thread_cpubind) + return topology->binding_hooks.set_thread_cpubind(topology, tid, set, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_thread_cpubind) + return topology->binding_hooks.get_thread_cpubind(topology, tid, set, flags); + + errno = ENOSYS; + return -1; +} +#endif + +int +hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (flags & HWLOC_CPUBIND_PROCESS) { + if (topology->binding_hooks.get_thisproc_last_cpu_location) + return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); + } else if (flags & HWLOC_CPUBIND_THREAD) { + if (topology->binding_hooks.get_thisthread_last_cpu_location) + return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); + } else { + if (topology->binding_hooks.get_thisproc_last_cpu_location) { + int err = topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_last_cpu_location) + return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags); + } + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags) +{ + if (flags & ~HWLOC_CPUBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_proc_last_cpu_location) + return topology->binding_hooks.get_proc_last_cpu_location(topology, pid, set, flags); + + errno = ENOSYS; + return -1; +} + +#define HWLOC_MEMBIND_ALLFLAGS (HWLOC_MEMBIND_PROCESS|HWLOC_MEMBIND_THREAD|HWLOC_MEMBIND_STRICT|HWLOC_MEMBIND_MIGRATE|HWLOC_MEMBIND_NOCPUBIND|HWLOC_MEMBIND_BYNODESET) + +static hwloc_const_nodeset_t +hwloc_fix_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) +{ + hwloc_const_bitmap_t topology_nodeset = hwloc_topology_get_topology_nodeset(topology); + hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology); + + if (hwloc_bitmap_iszero(nodeset)) { + errno = EINVAL; + return NULL; + } + + if (!hwloc_bitmap_isincluded(nodeset, complete_nodeset)) { + errno = EINVAL; + return NULL; + } + + if (hwloc_bitmap_isincluded(topology_nodeset, nodeset)) + return complete_nodeset; + + return nodeset; +} + +static int +hwloc_fix_membind_cpuset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_const_cpuset_t cpuset) +{ + hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology); + hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology); + hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology); + + if (hwloc_bitmap_iszero(cpuset)) { + errno = EINVAL; + return -1; + } + + if (!hwloc_bitmap_isincluded(cpuset, complete_set)) { + errno = EINVAL; + return -1; + } + + if (hwloc_bitmap_isincluded(topology_set, cpuset)) { + hwloc_bitmap_copy(nodeset, complete_nodeset); + return 0; + } + + hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); + return 0; +} + +static __hwloc_inline int hwloc__check_membind_policy(hwloc_membind_policy_t policy) +{ + if (policy == HWLOC_MEMBIND_DEFAULT + || policy == HWLOC_MEMBIND_FIRSTTOUCH + || policy == HWLOC_MEMBIND_BIND + || policy == HWLOC_MEMBIND_INTERLEAVE + || policy == HWLOC_MEMBIND_NEXTTOUCH) + return 0; + return -1; +} + +static int +hwloc_set_membind_by_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + if ((flags & ~HWLOC_MEMBIND_ALLFLAGS) || hwloc__check_membind_policy(policy) < 0) { + errno = EINVAL; + return -1; + } + + nodeset = hwloc_fix_membind(topology, nodeset); + if (!nodeset) + return -1; + + if (flags & HWLOC_MEMBIND_PROCESS) { + if (topology->binding_hooks.set_thisproc_membind) + return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); + } else if (flags & HWLOC_MEMBIND_THREAD) { + if (topology->binding_hooks.set_thisthread_membind) + return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); + } else { + if (topology->binding_hooks.set_thisproc_membind) { + int err = topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.set_thisthread_membind) + return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags); + } + + errno = ENOSYS; + return -1; +} + +int +hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_set_membind_by_nodeset(topology, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + if (hwloc_fix_membind_cpuset(topology, nodeset, set)) + ret = -1; + else + ret = hwloc_set_membind_by_nodeset(topology, nodeset, policy, flags); + hwloc_bitmap_free(nodeset); + } + return ret; +} + +static int +hwloc_get_membind_by_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + if (flags & ~HWLOC_MEMBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (flags & HWLOC_MEMBIND_PROCESS) { + if (topology->binding_hooks.get_thisproc_membind) + return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); + } else if (flags & HWLOC_MEMBIND_THREAD) { + if (topology->binding_hooks.get_thisthread_membind) + return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); + } else { + if (topology->binding_hooks.get_thisproc_membind) { + int err = topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags); + if (err >= 0 || errno != ENOSYS) + return err; + /* ENOSYS, fallback */ + } + if (topology->binding_hooks.get_thisthread_membind) + return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags); + } + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_get_membind_by_nodeset(topology, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + ret = hwloc_get_membind_by_nodeset(topology, nodeset, policy, flags); + if (!ret) + hwloc_cpuset_from_nodeset(topology, set, nodeset); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +static int +hwloc_set_proc_membind_by_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + if ((flags & ~HWLOC_MEMBIND_ALLFLAGS) || hwloc__check_membind_policy(policy) < 0) { + errno = EINVAL; + return -1; + } + + nodeset = hwloc_fix_membind(topology, nodeset); + if (!nodeset) + return -1; + + if (topology->binding_hooks.set_proc_membind) + return topology->binding_hooks.set_proc_membind(topology, pid, nodeset, policy, flags); + + errno = ENOSYS; + return -1; +} + + +int +hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_set_proc_membind_by_nodeset(topology, pid, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + if (hwloc_fix_membind_cpuset(topology, nodeset, set)) + ret = -1; + else + ret = hwloc_set_proc_membind_by_nodeset(topology, pid, nodeset, policy, flags); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +static int +hwloc_get_proc_membind_by_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + if (flags & ~HWLOC_MEMBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_proc_membind) + return topology->binding_hooks.get_proc_membind(topology, pid, nodeset, policy, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_get_proc_membind_by_nodeset(topology, pid, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + ret = hwloc_get_proc_membind_by_nodeset(topology, pid, nodeset, policy, flags); + if (!ret) + hwloc_cpuset_from_nodeset(topology, set, nodeset); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +static int +hwloc_set_area_membind_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + if ((flags & ~HWLOC_MEMBIND_ALLFLAGS) || hwloc__check_membind_policy(policy) < 0) { + errno = EINVAL; + return -1; + } + + if (!len) + /* nothing to do */ + return 0; + + nodeset = hwloc_fix_membind(topology, nodeset); + if (!nodeset) + return -1; + + if (topology->binding_hooks.set_area_membind) + return topology->binding_hooks.set_area_membind(topology, addr, len, nodeset, policy, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_set_area_membind_by_nodeset(topology, addr, len, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + if (hwloc_fix_membind_cpuset(topology, nodeset, set)) + ret = -1; + else + ret = hwloc_set_area_membind_by_nodeset(topology, addr, len, nodeset, policy, flags); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +static int +hwloc_get_area_membind_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + if (flags & ~HWLOC_MEMBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (!len) { + /* nothing to query */ + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_area_membind) + return topology->binding_hooks.get_area_membind(topology, addr, len, nodeset, policy, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_get_area_membind_by_nodeset(topology, addr, len, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + ret = hwloc_get_area_membind_by_nodeset(topology, addr, len, nodeset, policy, flags); + if (!ret) + hwloc_cpuset_from_nodeset(topology, set, nodeset); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +static int +hwloc_get_area_memlocation_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags) +{ + if (flags & ~HWLOC_MEMBIND_ALLFLAGS) { + errno = EINVAL; + return -1; + } + + if (!len) + /* nothing to do */ + return 0; + + if (topology->binding_hooks.get_area_memlocation) + return topology->binding_hooks.get_area_memlocation(topology, addr, len, nodeset, flags); + + errno = ENOSYS; + return -1; +} + +int +hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t set, int flags) +{ + int ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_get_area_memlocation_by_nodeset(topology, addr, len, set, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + ret = hwloc_get_area_memlocation_by_nodeset(topology, addr, len, nodeset, flags); + if (!ret) + hwloc_cpuset_from_nodeset(topology, set, nodeset); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +void * +hwloc_alloc_heap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) +{ + void *p = NULL; +#if defined(hwloc_getpagesize) && defined(HAVE_POSIX_MEMALIGN) + errno = posix_memalign(&p, hwloc_getpagesize(), len); + if (errno) + p = NULL; +#elif defined(hwloc_getpagesize) && defined(HAVE_MEMALIGN) + p = memalign(hwloc_getpagesize(), len); +#else + p = malloc(len); +#endif + return p; +} + +#ifdef MAP_ANONYMOUS +void * +hwloc_alloc_mmap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) +{ + void * buffer = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + return buffer == MAP_FAILED ? NULL : buffer; +} +#endif + +int +hwloc_free_heap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) +{ + free(addr); + return 0; +} + +#ifdef MAP_ANONYMOUS +int +hwloc_free_mmap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len) +{ + if (!addr) + return 0; + return munmap(addr, len); +} +#endif + +void * +hwloc_alloc(hwloc_topology_t topology, size_t len) +{ + if (topology->binding_hooks.alloc) + return topology->binding_hooks.alloc(topology, len); + return hwloc_alloc_heap(topology, len); +} + +static void * +hwloc_alloc_membind_by_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + void *p; + + if ((flags & ~HWLOC_MEMBIND_ALLFLAGS) || hwloc__check_membind_policy(policy) < 0) { + errno = EINVAL; + return NULL; + } + + nodeset = hwloc_fix_membind(topology, nodeset); + if (!nodeset) + goto fallback; + if (flags & HWLOC_MEMBIND_MIGRATE) { + errno = EINVAL; + goto fallback; + } + + if (topology->binding_hooks.alloc_membind) + return topology->binding_hooks.alloc_membind(topology, len, nodeset, policy, flags); + else if (topology->binding_hooks.set_area_membind) { + p = hwloc_alloc(topology, len); + if (!p) + return NULL; + if (topology->binding_hooks.set_area_membind(topology, p, len, nodeset, policy, flags) && flags & HWLOC_MEMBIND_STRICT) { + int error = errno; + free(p); + errno = error; + return NULL; + } + return p; + } else { + errno = ENOSYS; + } + +fallback: + if (flags & HWLOC_MEMBIND_STRICT) + /* Report error */ + return NULL; + /* Never mind, allocate anyway */ + return hwloc_alloc(topology, len); +} + +void * +hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) +{ + void *ret; + + if (flags & HWLOC_MEMBIND_BYNODESET) { + ret = hwloc_alloc_membind_by_nodeset(topology, len, set, policy, flags); + } else { + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); + if (hwloc_fix_membind_cpuset(topology, nodeset, set)) { + if (flags & HWLOC_MEMBIND_STRICT) + ret = NULL; + else + ret = hwloc_alloc(topology, len); + } else + ret = hwloc_alloc_membind_by_nodeset(topology, len, nodeset, policy, flags); + hwloc_bitmap_free(nodeset); + } + + return ret; +} + +int +hwloc_free(hwloc_topology_t topology, void *addr, size_t len) +{ + if (topology->binding_hooks.free_membind) + return topology->binding_hooks.free_membind(topology, addr, len); + return hwloc_free_heap(topology, addr, len); +} + +/* + * Empty binding hooks always returning success + */ + +static int dontset_return_complete_cpuset(hwloc_topology_t topology, hwloc_cpuset_t set) +{ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); + return 0; +} + +static int dontset_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_cpuset(topology, set); +} +static int dontset_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_cpuset(topology, set); +} +static int dontset_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_cpuset(topology, cpuset); +} +#ifdef hwloc_thread_t +static int dontset_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_cpuset(topology, cpuset); +} +#endif + +static int dontset_return_complete_nodeset(hwloc_topology_t topology, hwloc_nodeset_t set, hwloc_membind_policy_t *policy) +{ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_nodeset(topology)); + *policy = HWLOC_MEMBIND_MIXED; + return 0; +} + +static int dontset_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_nodeset(topology, set, policy); +} + +static int dontset_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_nodeset(topology, set, policy); +} + +static int dontset_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_nodeset(topology, set, policy); +} + +static int dontset_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return 0; +} +static int dontget_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused) +{ + return dontset_return_complete_nodeset(topology, set, policy); +} +static int dontget_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused) +{ + hwloc_membind_policy_t policy; + return dontset_return_complete_nodeset(topology, set, &policy); +} + +static void * dontalloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused) +{ + return malloc(size); +} +static int dontfree_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused) +{ + free(addr); + return 0; +} + +static void hwloc_set_dummy_hooks(struct hwloc_binding_hooks *hooks, + struct hwloc_topology_support *support __hwloc_attribute_unused) +{ + hooks->set_thisproc_cpubind = dontset_thisproc_cpubind; + hooks->get_thisproc_cpubind = dontget_thisproc_cpubind; + hooks->set_thisthread_cpubind = dontset_thisthread_cpubind; + hooks->get_thisthread_cpubind = dontget_thisthread_cpubind; + hooks->set_proc_cpubind = dontset_proc_cpubind; + hooks->get_proc_cpubind = dontget_proc_cpubind; +#ifdef hwloc_thread_t + hooks->set_thread_cpubind = dontset_thread_cpubind; + hooks->get_thread_cpubind = dontget_thread_cpubind; +#endif + hooks->get_thisproc_last_cpu_location = dontget_thisproc_cpubind; /* cpubind instead of last_cpu_location is ok */ + hooks->get_thisthread_last_cpu_location = dontget_thisthread_cpubind; /* cpubind instead of last_cpu_location is ok */ + hooks->get_proc_last_cpu_location = dontget_proc_cpubind; /* cpubind instead of last_cpu_location is ok */ + /* TODO: get_thread_last_cpu_location */ + hooks->set_thisproc_membind = dontset_thisproc_membind; + hooks->get_thisproc_membind = dontget_thisproc_membind; + hooks->set_thisthread_membind = dontset_thisthread_membind; + hooks->get_thisthread_membind = dontget_thisthread_membind; + hooks->set_proc_membind = dontset_proc_membind; + hooks->get_proc_membind = dontget_proc_membind; + hooks->set_area_membind = dontset_area_membind; + hooks->get_area_membind = dontget_area_membind; + hooks->get_area_memlocation = dontget_area_memlocation; + hooks->alloc_membind = dontalloc_membind; + hooks->free_membind = dontfree_membind; +} + +void +hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support) +{ +# ifdef HWLOC_LINUX_SYS + hwloc_set_linuxfs_hooks(hooks, support); +# endif /* HWLOC_LINUX_SYS */ + +# ifdef HWLOC_BGQ_SYS + hwloc_set_bgq_hooks(hooks, support); +# endif /* HWLOC_BGQ_SYS */ + +# ifdef HWLOC_AIX_SYS + hwloc_set_aix_hooks(hooks, support); +# endif /* HWLOC_AIX_SYS */ + +# ifdef HWLOC_SOLARIS_SYS + hwloc_set_solaris_hooks(hooks, support); +# endif /* HWLOC_SOLARIS_SYS */ + +# ifdef HWLOC_WIN_SYS + hwloc_set_windows_hooks(hooks, support); +# endif /* HWLOC_WIN_SYS */ + +# ifdef HWLOC_DARWIN_SYS + hwloc_set_darwin_hooks(hooks, support); +# endif /* HWLOC_DARWIN_SYS */ + +# ifdef HWLOC_FREEBSD_SYS + hwloc_set_freebsd_hooks(hooks, support); +# endif /* HWLOC_FREEBSD_SYS */ + +# ifdef HWLOC_NETBSD_SYS + hwloc_set_netbsd_hooks(hooks, support); +# endif /* HWLOC_NETBSD_SYS */ + +# ifdef HWLOC_HPUX_SYS + hwloc_set_hpux_hooks(hooks, support); +# endif /* HWLOC_HPUX_SYS */ +} + +/* If the represented system is actually not this system, use dummy binding hooks. */ +void +hwloc_set_binding_hooks(struct hwloc_topology *topology) +{ + if (topology->is_thissystem) { + hwloc_set_native_binding_hooks(&topology->binding_hooks, &topology->support); + /* every hook not set above will return ENOSYS */ + } else { + /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */ + hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support); + } + + /* if not is_thissystem, set_cpubind is fake + * and get_cpubind returns the whole system cpuset, + * so don't report that set/get_cpubind as supported + */ + if (topology->is_thissystem) { +#define DO(which,kind) \ + if (topology->binding_hooks.kind) \ + topology->support.which##bind->kind = 1; + DO(cpu,set_thisproc_cpubind); + DO(cpu,get_thisproc_cpubind); + DO(cpu,set_proc_cpubind); + DO(cpu,get_proc_cpubind); + DO(cpu,set_thisthread_cpubind); + DO(cpu,get_thisthread_cpubind); +#ifdef hwloc_thread_t + DO(cpu,set_thread_cpubind); + DO(cpu,get_thread_cpubind); +#endif + DO(cpu,get_thisproc_last_cpu_location); + DO(cpu,get_proc_last_cpu_location); + DO(cpu,get_thisthread_last_cpu_location); + DO(mem,set_thisproc_membind); + DO(mem,get_thisproc_membind); + DO(mem,set_thisthread_membind); + DO(mem,get_thisthread_membind); + DO(mem,set_proc_membind); + DO(mem,get_proc_membind); + DO(mem,set_area_membind); + DO(mem,get_area_membind); + DO(mem,get_area_memlocation); + DO(mem,alloc_membind); + } +} diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c new file mode 100644 index 000000000..ea1264afc --- /dev/null +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -0,0 +1,1676 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc/autogen/config.h> +#include <hwloc.h> +#include <private/misc.h> +#include <private/private.h> +#include <private/debug.h> +#include <hwloc/bitmap.h> + +#include <stdarg.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <ctype.h> + +/* + * possible improvements: + * - have a way to change the initial allocation size: + * add hwloc_bitmap_set_foo() to changes a global here, + * and make the hwloc core call based on the early number of PUs + * - make HWLOC_BITMAP_PREALLOC_BITS configurable, and detectable + * by parsing /proc/cpuinfo during configure on Linux. + * - preallocate inside the bitmap structure (so that the whole structure is a cacheline for instance) + * and allocate a dedicated array only later when reallocating larger + * - add a bitmap->ulongs_empty_first which guarantees that some first ulongs are empty, + * making tests much faster for big bitmaps since there's no need to look at first ulongs. + * no need for ulongs_empty_first to be exactly the max number of empty ulongs, + * clearing bits that were set earlier isn't very common. + */ + +/* magic number */ +#define HWLOC_BITMAP_MAGIC 0x20091007 + +/* preallocated bits in every bitmap */ +#define HWLOC_BITMAP_PREALLOC_BITS 512 +#define HWLOC_BITMAP_PREALLOC_ULONGS (HWLOC_BITMAP_PREALLOC_BITS/HWLOC_BITS_PER_LONG) + +/* actual opaque type internals */ +struct hwloc_bitmap_s { + unsigned ulongs_count; /* how many ulong bitmasks are valid, >= 1 */ + unsigned ulongs_allocated; /* how many ulong bitmasks are allocated, >= ulongs_count */ + unsigned long *ulongs; + int infinite; /* set to 1 if all bits beyond ulongs are set */ +#ifdef HWLOC_DEBUG + int magic; +#endif +}; + +/* overzealous check in debug-mode, not as powerful as valgrind but still useful */ +#ifdef HWLOC_DEBUG +#define HWLOC__BITMAP_CHECK(set) do { \ + assert((set)->magic == HWLOC_BITMAP_MAGIC); \ + assert((set)->ulongs_count >= 1); \ + assert((set)->ulongs_allocated >= (set)->ulongs_count); \ +} while (0) +#else +#define HWLOC__BITMAP_CHECK(set) +#endif + +/* extract a subset from a set using an index or a cpu */ +#define HWLOC_SUBBITMAP_INDEX(cpu) ((cpu)/(HWLOC_BITS_PER_LONG)) +#define HWLOC_SUBBITMAP_CPU_ULBIT(cpu) ((cpu)%(HWLOC_BITS_PER_LONG)) +/* Read from a bitmap ulong without knowing whether x is valid. + * Writers should make sure that x is valid and modify set->ulongs[x] directly. + */ +#define HWLOC_SUBBITMAP_READULONG(set,x) ((x) < (set)->ulongs_count ? (set)->ulongs[x] : (set)->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO) + +/* predefined subset values */ +#define HWLOC_SUBBITMAP_ZERO 0UL +#define HWLOC_SUBBITMAP_FULL (~0UL) +#define HWLOC_SUBBITMAP_ULBIT(bit) (1UL<<(bit)) +#define HWLOC_SUBBITMAP_CPU(cpu) HWLOC_SUBBITMAP_ULBIT(HWLOC_SUBBITMAP_CPU_ULBIT(cpu)) +#define HWLOC_SUBBITMAP_ULBIT_TO(bit) (HWLOC_SUBBITMAP_FULL>>(HWLOC_BITS_PER_LONG-1-(bit))) +#define HWLOC_SUBBITMAP_ULBIT_FROM(bit) (HWLOC_SUBBITMAP_FULL<<(bit)) +#define HWLOC_SUBBITMAP_ULBIT_FROMTO(begin,end) (HWLOC_SUBBITMAP_ULBIT_TO(end) & HWLOC_SUBBITMAP_ULBIT_FROM(begin)) + +struct hwloc_bitmap_s * hwloc_bitmap_alloc(void) +{ + struct hwloc_bitmap_s * set; + + set = malloc(sizeof(struct hwloc_bitmap_s)); + if (!set) + return NULL; + + set->ulongs_count = 1; + set->ulongs_allocated = HWLOC_BITMAP_PREALLOC_ULONGS; + set->ulongs = malloc(HWLOC_BITMAP_PREALLOC_ULONGS * sizeof(unsigned long)); + if (!set->ulongs) { + free(set); + return NULL; + } + + set->ulongs[0] = HWLOC_SUBBITMAP_ZERO; + set->infinite = 0; +#ifdef HWLOC_DEBUG + set->magic = HWLOC_BITMAP_MAGIC; +#endif + return set; +} + +struct hwloc_bitmap_s * hwloc_bitmap_alloc_full(void) +{ + struct hwloc_bitmap_s * set = hwloc_bitmap_alloc(); + if (set) { + set->infinite = 1; + set->ulongs[0] = HWLOC_SUBBITMAP_FULL; + } + return set; +} + +void hwloc_bitmap_free(struct hwloc_bitmap_s * set) +{ + if (!set) + return; + + HWLOC__BITMAP_CHECK(set); +#ifdef HWLOC_DEBUG + set->magic = 0; +#endif + + free(set->ulongs); + free(set); +} + +/* enlarge until it contains at least needed_count ulongs. + */ +static int +hwloc_bitmap_enlarge_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) __hwloc_attribute_warn_unused_result; +static int +hwloc_bitmap_enlarge_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) +{ + unsigned tmp = 1U << hwloc_flsl((unsigned long) needed_count - 1); + if (tmp > set->ulongs_allocated) { + unsigned long *tmpulongs; + tmpulongs = realloc(set->ulongs, tmp * sizeof(unsigned long)); + if (!tmpulongs) + return -1; + set->ulongs = tmpulongs; + set->ulongs_allocated = tmp; + } + return 0; +} + +/* enlarge until it contains at least needed_count ulongs, + * and update new ulongs according to the infinite field. + */ +static int +hwloc_bitmap_realloc_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) __hwloc_attribute_warn_unused_result; +static int +hwloc_bitmap_realloc_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) +{ + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + if (needed_count <= set->ulongs_count) + return 0; + + /* realloc larger if needed */ + if (hwloc_bitmap_enlarge_by_ulongs(set, needed_count) < 0) + return -1; + + /* fill the newly allocated subset depending on the infinite flag */ + for(i=set->ulongs_count; i<needed_count; i++) + set->ulongs[i] = set->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + set->ulongs_count = needed_count; + return 0; +} + +/* realloc until it contains at least cpu+1 bits */ +#define hwloc_bitmap_realloc_by_cpu_index(set, cpu) hwloc_bitmap_realloc_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1) + +/* reset a bitmap to exactely the needed size. + * the caller must reinitialize all ulongs and the infinite flag later. + */ +static int +hwloc_bitmap_reset_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) __hwloc_attribute_warn_unused_result; +static int +hwloc_bitmap_reset_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count) +{ + if (hwloc_bitmap_enlarge_by_ulongs(set, needed_count)) + return -1; + set->ulongs_count = needed_count; + return 0; +} + +/* reset until it contains exactly cpu+1 bits (roundup to a ulong). + * the caller must reinitialize all ulongs and the infinite flag later. + */ +#define hwloc_bitmap_reset_by_cpu_index(set, cpu) hwloc_bitmap_reset_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1) + +struct hwloc_bitmap_s * hwloc_bitmap_tma_dup(struct hwloc_tma *tma, const struct hwloc_bitmap_s * old) +{ + struct hwloc_bitmap_s * new; + + if (!old) + return NULL; + + HWLOC__BITMAP_CHECK(old); + + new = hwloc_tma_malloc(tma, sizeof(struct hwloc_bitmap_s)); + if (!new) + return NULL; + + new->ulongs = hwloc_tma_malloc(tma, old->ulongs_allocated * sizeof(unsigned long)); + if (!new->ulongs) { + free(new); + return NULL; + } + new->ulongs_allocated = old->ulongs_allocated; + new->ulongs_count = old->ulongs_count; + memcpy(new->ulongs, old->ulongs, new->ulongs_count * sizeof(unsigned long)); + new->infinite = old->infinite; +#ifdef HWLOC_DEBUG + new->magic = HWLOC_BITMAP_MAGIC; +#endif + return new; +} + +struct hwloc_bitmap_s * hwloc_bitmap_dup(const struct hwloc_bitmap_s * old) +{ + return hwloc_bitmap_tma_dup(NULL, old); +} + +int hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s * src) +{ + HWLOC__BITMAP_CHECK(dst); + HWLOC__BITMAP_CHECK(src); + + if (hwloc_bitmap_reset_by_ulongs(dst, src->ulongs_count) < 0) + return -1; + + memcpy(dst->ulongs, src->ulongs, src->ulongs_count * sizeof(unsigned long)); + dst->infinite = src->infinite; + return 0; +} + +/* Strings always use 32bit groups */ +#define HWLOC_PRIxSUBBITMAP "%08lx" +#define HWLOC_BITMAP_SUBSTRING_SIZE 32 +#define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4) +#define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE) + +int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + ssize_t size = buflen; + char *tmp = buf; + int res, ret = 0; + int needcomma = 0; + int i; + unsigned long accum = 0; + int accumed = 0; +#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE + const unsigned long accum_mask = ~0UL; +#else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ + const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE); +#endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ + + HWLOC__BITMAP_CHECK(set); + + /* mark the end in case we do nothing later */ + if (buflen > 0) + tmp[0] = '\0'; + + if (set->infinite) { + res = hwloc_snprintf(tmp, size, "0xf...f"); + needcomma = 1; + if (res < 0) + return -1; + ret += res; + if (res >= size) + res = size>0 ? (int)size - 1 : 0; + tmp += res; + size -= res; + } + + i=(int) set->ulongs_count-1; + + if (set->infinite) { + /* ignore starting FULL since we have 0xf...f already */ + while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_FULL) + i--; + } else { + /* ignore starting ZERO except the last one */ + while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_ZERO) + i--; + } + + while (i>=0 || accumed) { + /* Refill accumulator */ + if (!accumed) { + accum = set->ulongs[i--]; + accumed = HWLOC_BITS_PER_LONG; + } + + if (accum & accum_mask) { + /* print the whole subset if not empty */ + res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, + (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE)); + needcomma = 1; + } else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) { + /* print a single 0 to mark the last subset */ + res = hwloc_snprintf(tmp, size, needcomma ? ",0x0" : "0x0"); + } else if (needcomma) { + res = hwloc_snprintf(tmp, size, ","); + } else { + res = 0; + } + if (res < 0) + return -1; + ret += res; + +#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE + accum = 0; + accumed = 0; +#else + accum <<= HWLOC_BITMAP_SUBSTRING_SIZE; + accumed -= HWLOC_BITMAP_SUBSTRING_SIZE; +#endif + + if (res >= size) + res = size>0 ? (int)size - 1 : 0; + + tmp += res; + size -= res; + } + + /* if didn't display anything, display 0x0 */ + if (!ret) { + res = hwloc_snprintf(tmp, size, "0x0"); + if (res < 0) + return -1; + ret += res; + } + + return ret; +} + +int hwloc_bitmap_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + int len; + char *buf; + + HWLOC__BITMAP_CHECK(set); + + len = hwloc_bitmap_snprintf(NULL, 0, set); + buf = malloc(len+1); + if (!buf) + return -1; + *strp = buf; + return hwloc_bitmap_snprintf(buf, len+1, set); +} + +int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string) +{ + const char * current = string; + unsigned long accum = 0; + int count=0; + int infinite = 0; + + /* count how many substrings there are */ + count++; + while ((current = strchr(current+1, ',')) != NULL) + count++; + + current = string; + if (!strncmp("0xf...f", current, 7)) { + current += 7; + if (*current != ',') { + /* special case for infinite/full bitmap */ + hwloc_bitmap_fill(set); + return 0; + } + current++; + infinite = 1; + count--; + } + + if (hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG) < 0) + return -1; + set->infinite = 0; + + while (*current != '\0') { + unsigned long val; + char *next; + val = strtoul(current, &next, 16); + + assert(count > 0); + count--; + + accum |= (val << ((count * HWLOC_BITMAP_SUBSTRING_SIZE) % HWLOC_BITS_PER_LONG)); + if (!(count % HWLOC_BITMAP_STRING_PER_LONG)) { + set->ulongs[count / HWLOC_BITMAP_STRING_PER_LONG] = accum; + accum = 0; + } + + if (*next != ',') { + if (*next || count > 0) + goto failed; + else + break; + } + current = (const char*) next+1; + } + + set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */ + + return 0; + + failed: + /* failure to parse */ + hwloc_bitmap_zero(set); + return -1; +} + +int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + int prev = -1; + ssize_t size = buflen; + char *tmp = buf; + int res, ret = 0; + int needcomma = 0; + + HWLOC__BITMAP_CHECK(set); + + /* mark the end in case we do nothing later */ + if (buflen > 0) + tmp[0] = '\0'; + + while (1) { + int begin, end; + + begin = hwloc_bitmap_next(set, prev); + if (begin == -1) + break; + end = hwloc_bitmap_next_unset(set, begin); + + if (end == begin+1) { + res = hwloc_snprintf(tmp, size, needcomma ? ",%d" : "%d", begin); + } else if (end == -1) { + res = hwloc_snprintf(tmp, size, needcomma ? ",%d-" : "%d-", begin); + } else { + res = hwloc_snprintf(tmp, size, needcomma ? ",%d-%d" : "%d-%d", begin, end-1); + } + if (res < 0) + return -1; + ret += res; + + if (res >= size) + res = size>0 ? (int)size - 1 : 0; + + tmp += res; + size -= res; + needcomma = 1; + + if (end == -1) + break; + else + prev = end - 1; + } + + return ret; +} + +int hwloc_bitmap_list_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + int len; + char *buf; + + HWLOC__BITMAP_CHECK(set); + + len = hwloc_bitmap_list_snprintf(NULL, 0, set); + buf = malloc(len+1); + if (!buf) + return -1; + *strp = buf; + return hwloc_bitmap_list_snprintf(buf, len+1, set); +} + +int hwloc_bitmap_list_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string) +{ + const char * current = string; + char *next; + long begin = -1, val; + + hwloc_bitmap_zero(set); + + while (*current != '\0') { + + /* ignore empty ranges */ + while (*current == ',' || *current == ' ') + current++; + + val = strtoul(current, &next, 0); + /* make sure we got at least one digit */ + if (next == current) + goto failed; + + if (begin != -1) { + /* finishing a range */ + hwloc_bitmap_set_range(set, begin, val); + begin = -1; + + } else if (*next == '-') { + /* starting a new range */ + if (*(next+1) == '\0') { + /* infinite range */ + hwloc_bitmap_set_range(set, val, -1); + break; + } else { + /* normal range */ + begin = val; + } + + } else if (*next == ',' || *next == ' ' || *next == '\0') { + /* single digit */ + hwloc_bitmap_set(set, val); + } + + if (*next == '\0') + break; + current = next+1; + } + + return 0; + + failed: + /* failure to parse */ + hwloc_bitmap_zero(set); + return -1; +} + +int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + ssize_t size = buflen; + char *tmp = buf; + int res, ret = 0; + int started = 0; + int i; + + HWLOC__BITMAP_CHECK(set); + + /* mark the end in case we do nothing later */ + if (buflen > 0) + tmp[0] = '\0'; + + if (set->infinite) { + res = hwloc_snprintf(tmp, size, "0xf...f"); + started = 1; + if (res < 0) + return -1; + ret += res; + if (res >= size) + res = size>0 ? (int)size - 1 : 0; + tmp += res; + size -= res; + } + + i=set->ulongs_count-1; + + if (set->infinite) { + /* ignore starting FULL since we have 0xf...f already */ + while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_FULL) + i--; + } else { + /* ignore starting ZERO except the last one */ + while (i>=1 && set->ulongs[i] == HWLOC_SUBBITMAP_ZERO) + i--; + } + + while (i>=0) { + unsigned long val = set->ulongs[i--]; + if (started) { + /* print the whole subset */ +#if HWLOC_BITS_PER_LONG == 64 + res = hwloc_snprintf(tmp, size, "%016lx", val); +#else + res = hwloc_snprintf(tmp, size, "%08lx", val); +#endif + } else if (val || i == -1) { + res = hwloc_snprintf(tmp, size, "0x%lx", val); + started = 1; + } else { + res = 0; + } + if (res < 0) + return -1; + ret += res; + if (res >= size) + res = size>0 ? (int)size - 1 : 0; + tmp += res; + size -= res; + } + + /* if didn't display anything, display 0x0 */ + if (!ret) { + res = hwloc_snprintf(tmp, size, "0x0"); + if (res < 0) + return -1; + ret += res; + } + + return ret; +} + +int hwloc_bitmap_taskset_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set) +{ + int len; + char *buf; + + HWLOC__BITMAP_CHECK(set); + + len = hwloc_bitmap_taskset_snprintf(NULL, 0, set); + buf = malloc(len+1); + if (!buf) + return -1; + *strp = buf; + return hwloc_bitmap_taskset_snprintf(buf, len+1, set); +} + +int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string) +{ + const char * current = string; + int chars; + int count; + int infinite = 0; + + if (!strncmp("0xf...f", current, 7)) { + /* infinite bitmap */ + infinite = 1; + current += 7; + if (*current == '\0') { + /* special case for infinite/full bitmap */ + hwloc_bitmap_fill(set); + return 0; + } + } else { + /* finite bitmap */ + if (!strncmp("0x", current, 2)) + current += 2; + if (*current == '\0') { + /* special case for empty bitmap */ + hwloc_bitmap_zero(set); + return 0; + } + } + /* we know there are other characters now */ + + chars = (int)strlen(current); + count = (chars * 4 + HWLOC_BITS_PER_LONG - 1) / HWLOC_BITS_PER_LONG; + + if (hwloc_bitmap_reset_by_ulongs(set, count) < 0) + return -1; + set->infinite = 0; + + while (*current != '\0') { + int tmpchars; + char ustr[17]; + unsigned long val; + char *next; + + tmpchars = chars % (HWLOC_BITS_PER_LONG/4); + if (!tmpchars) + tmpchars = (HWLOC_BITS_PER_LONG/4); + + memcpy(ustr, current, tmpchars); + ustr[tmpchars] = '\0'; + val = strtoul(ustr, &next, 16); + if (*next != '\0') + goto failed; + + set->ulongs[count-1] = val; + + current += tmpchars; + chars -= tmpchars; + count--; + } + + set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */ + + return 0; + + failed: + /* failure to parse */ + hwloc_bitmap_zero(set); + return -1; +} + +static void hwloc_bitmap__zero(struct hwloc_bitmap_s *set) +{ + unsigned i; + for(i=0; i<set->ulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_ZERO; + set->infinite = 0; +} + +void hwloc_bitmap_zero(struct hwloc_bitmap_s * set) +{ + HWLOC__BITMAP_CHECK(set); + + HWLOC_BUILD_ASSERT(HWLOC_BITMAP_PREALLOC_ULONGS >= 1); + if (hwloc_bitmap_reset_by_ulongs(set, 1) < 0) { + /* cannot fail since we preallocate some ulongs. + * if we ever preallocate nothing, we'll reset to 0 ulongs. + */ + } + hwloc_bitmap__zero(set); +} + +static void hwloc_bitmap__fill(struct hwloc_bitmap_s * set) +{ + unsigned i; + for(i=0; i<set->ulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_FULL; + set->infinite = 1; +} + +void hwloc_bitmap_fill(struct hwloc_bitmap_s * set) +{ + HWLOC__BITMAP_CHECK(set); + + HWLOC_BUILD_ASSERT(HWLOC_BITMAP_PREALLOC_ULONGS >= 1); + if (hwloc_bitmap_reset_by_ulongs(set, 1) < 0) { + /* cannot fail since we pre-allocate some ulongs. + * if we ever pre-allocate nothing, we'll reset to 0 ulongs. + */ + } + hwloc_bitmap__fill(set); +} + +int hwloc_bitmap_from_ulong(struct hwloc_bitmap_s *set, unsigned long mask) +{ + HWLOC__BITMAP_CHECK(set); + + HWLOC_BUILD_ASSERT(HWLOC_BITMAP_PREALLOC_ULONGS >= 1); + if (hwloc_bitmap_reset_by_ulongs(set, 1) < 0) { + /* cannot fail since we pre-allocate some ulongs. + * if ever pre-allocate nothing, we may have to return a failure. + */ + } + set->ulongs[0] = mask; /* there's always at least one ulong allocated */ + set->infinite = 0; + return 0; +} + +int hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask) +{ + unsigned j; + + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_ulongs(set, i+1) < 0) + return -1; + + set->ulongs[i] = mask; + for(j=0; j<i; j++) + set->ulongs[j] = HWLOC_SUBBITMAP_ZERO; + set->infinite = 0; + return 0; +} + +unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set) +{ + HWLOC__BITMAP_CHECK(set); + + return set->ulongs[0]; /* there's always at least one ulong allocated */ +} + +unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsigned i) +{ + HWLOC__BITMAP_CHECK(set); + + return HWLOC_SUBBITMAP_READULONG(set, i); +} + +int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) +{ + unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); + + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_cpu_index(set, cpu) < 0) + return -1; + + hwloc_bitmap__zero(set); + set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu); + return 0; +} + +int hwloc_bitmap_allbut(struct hwloc_bitmap_s * set, unsigned cpu) +{ + unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); + + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_cpu_index(set, cpu) < 0) + return -1; + + hwloc_bitmap__fill(set); + set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu); + return 0; +} + +int hwloc_bitmap_set(struct hwloc_bitmap_s * set, unsigned cpu) +{ + unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); + + HWLOC__BITMAP_CHECK(set); + + /* nothing to do if setting inside the infinite part of the bitmap */ + if (set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + return 0; + + if (hwloc_bitmap_realloc_by_cpu_index(set, cpu) < 0) + return -1; + + set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu); + return 0; +} + +int hwloc_bitmap_set_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu) +{ + unsigned i; + unsigned beginset,endset; + unsigned endcpu = (unsigned) _endcpu; + + HWLOC__BITMAP_CHECK(set); + + if (endcpu < begincpu) + return 0; + if (set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + /* setting only in the already-set infinite part, nothing to do */ + return 0; + + if (_endcpu == -1) { + /* infinite range */ + + /* make sure we can play with the ulong that contains begincpu */ + if (hwloc_bitmap_realloc_by_cpu_index(set, begincpu) < 0) + return -1; + + /* update the ulong that contains begincpu */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + /* set ulongs after begincpu if any already allocated */ + for(i=beginset+1; i<set->ulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_FULL; + /* mark the infinity as set */ + set->infinite = 1; + } else { + /* finite range */ + + /* ignore the part of the range that overlaps with the already-set infinite part */ + if (set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; + /* make sure we can play with the ulongs that contain begincpu and endcpu */ + if (hwloc_bitmap_realloc_by_cpu_index(set, endcpu) < 0) + return -1; + + /* update first and last ulongs */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + endset = HWLOC_SUBBITMAP_INDEX(endcpu); + if (beginset == endset) { + set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } else { + set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + set->ulongs[endset] |= HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } + /* set ulongs in the middle of the range */ + for(i=beginset+1; i<endset; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_FULL; + } + + return 0; +} + +int hwloc_bitmap_set_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask) +{ + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_realloc_by_ulongs(set, i+1) < 0) + return -1; + + set->ulongs[i] = mask; + return 0; +} + +int hwloc_bitmap_clr(struct hwloc_bitmap_s * set, unsigned cpu) +{ + unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); + + HWLOC__BITMAP_CHECK(set); + + /* nothing to do if clearing inside the infinitely-unset part of the bitmap */ + if (!set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + return 0; + + if (hwloc_bitmap_realloc_by_cpu_index(set, cpu) < 0) + return -1; + + set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu); + return 0; +} + +int hwloc_bitmap_clr_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu) +{ + unsigned i; + unsigned beginset,endset; + unsigned endcpu = (unsigned) _endcpu; + + HWLOC__BITMAP_CHECK(set); + + if (endcpu < begincpu) + return 0; + + if (!set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + /* clearing only in the already-unset infinite part, nothing to do */ + return 0; + + if (_endcpu == -1) { + /* infinite range */ + + /* make sure we can play with the ulong that contains begincpu */ + if (hwloc_bitmap_realloc_by_cpu_index(set, begincpu) < 0) + return -1; + + /* update the ulong that contains begincpu */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + /* clear ulong after begincpu if any already allocated */ + for(i=beginset+1; i<set->ulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_ZERO; + /* mark the infinity as unset */ + set->infinite = 0; + } else { + /* finite range */ + + /* ignore the part of the range that overlaps with the already-unset infinite part */ + if (!set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; + /* make sure we can play with the ulongs that contain begincpu and endcpu */ + if (hwloc_bitmap_realloc_by_cpu_index(set, endcpu) < 0) + return -1; + + /* update first and last ulongs */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + endset = HWLOC_SUBBITMAP_INDEX(endcpu); + if (beginset == endset) { + set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } else { + set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + set->ulongs[endset] &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } + /* clear ulongs in the middle of the range */ + for(i=beginset+1; i<endset; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_ZERO; + } + + return 0; +} + +int hwloc_bitmap_isset(const struct hwloc_bitmap_s * set, unsigned cpu) +{ + unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu); + + HWLOC__BITMAP_CHECK(set); + + return (HWLOC_SUBBITMAP_READULONG(set, index_) & HWLOC_SUBBITMAP_CPU(cpu)) != 0; +} + +int hwloc_bitmap_iszero(const struct hwloc_bitmap_s *set) +{ + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + if (set->infinite) + return 0; + for(i=0; i<set->ulongs_count; i++) + if (set->ulongs[i] != HWLOC_SUBBITMAP_ZERO) + return 0; + return 1; +} + +int hwloc_bitmap_isfull(const struct hwloc_bitmap_s *set) +{ + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + if (!set->infinite) + return 0; + for(i=0; i<set->ulongs_count; i++) + if (set->ulongs[i] != HWLOC_SUBBITMAP_FULL) + return 0; + return 1; +} + +int hwloc_bitmap_isequal (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned min_count = count1 < count2 ? count1 : count2; + unsigned i; + + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + for(i=0; i<min_count; i++) + if (set1->ulongs[i] != set2->ulongs[i]) + return 0; + + if (count1 != count2) { + unsigned long w1 = set1->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + unsigned long w2 = set2->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + for(i=min_count; i<count1; i++) { + if (set1->ulongs[i] != w2) + return 0; + } + for(i=min_count; i<count2; i++) { + if (set2->ulongs[i] != w1) + return 0; + } + } + + if (set1->infinite != set2->infinite) + return 0; + + return 1; +} + +int hwloc_bitmap_intersects (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned min_count = count1 < count2 ? count1 : count2; + unsigned i; + + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + for(i=0; i<min_count; i++) + if (set1->ulongs[i] & set2->ulongs[i]) + return 1; + + if (count1 != count2) { + if (set2->infinite) { + for(i=min_count; i<set1->ulongs_count; i++) + if (set1->ulongs[i]) + return 1; + } + if (set1->infinite) { + for(i=min_count; i<set2->ulongs_count; i++) + if (set2->ulongs[i]) + return 1; + } + } + + if (set1->infinite && set2->infinite) + return 1; + + return 0; +} + +int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct hwloc_bitmap_s *super_set) +{ + unsigned super_count = super_set->ulongs_count; + unsigned sub_count = sub_set->ulongs_count; + unsigned min_count = super_count < sub_count ? super_count : sub_count; + unsigned i; + + HWLOC__BITMAP_CHECK(sub_set); + HWLOC__BITMAP_CHECK(super_set); + + for(i=0; i<min_count; i++) + if (super_set->ulongs[i] != (super_set->ulongs[i] | sub_set->ulongs[i])) + return 0; + + if (super_count != sub_count) { + if (!super_set->infinite) + for(i=min_count; i<sub_count; i++) + if (sub_set->ulongs[i]) + return 0; + if (sub_set->infinite) + for(i=min_count; i<super_count; i++) + if (super_set->ulongs[i] != HWLOC_SUBBITMAP_FULL) + return 0; + } + + if (sub_set->infinite && !super_set->infinite) + return 0; + + return 1; +} + +int hwloc_bitmap_or (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + /* cache counts so that we can reset res even if it's also set1 or set2 */ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + unsigned i; + + HWLOC__BITMAP_CHECK(res); + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + if (hwloc_bitmap_reset_by_ulongs(res, max_count) < 0) + return -1; + + for(i=0; i<min_count; i++) + res->ulongs[i] = set1->ulongs[i] | set2->ulongs[i]; + + if (count1 != count2) { + if (min_count < count1) { + if (set2->infinite) { + res->ulongs_count = min_count; + } else { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set1->ulongs[i]; + } + } else { + if (set1->infinite) { + res->ulongs_count = min_count; + } else { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set2->ulongs[i]; + } + } + } + + res->infinite = set1->infinite || set2->infinite; + return 0; +} + +int hwloc_bitmap_and (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + /* cache counts so that we can reset res even if it's also set1 or set2 */ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + unsigned i; + + HWLOC__BITMAP_CHECK(res); + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + if (hwloc_bitmap_reset_by_ulongs(res, max_count) < 0) + return -1; + + for(i=0; i<min_count; i++) + res->ulongs[i] = set1->ulongs[i] & set2->ulongs[i]; + + if (count1 != count2) { + if (min_count < count1) { + if (set2->infinite) { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set1->ulongs[i]; + } else { + res->ulongs_count = min_count; + } + } else { + if (set1->infinite) { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set2->ulongs[i]; + } else { + res->ulongs_count = min_count; + } + } + } + + res->infinite = set1->infinite && set2->infinite; + return 0; +} + +int hwloc_bitmap_andnot (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + /* cache counts so that we can reset res even if it's also set1 or set2 */ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + unsigned i; + + HWLOC__BITMAP_CHECK(res); + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + if (hwloc_bitmap_reset_by_ulongs(res, max_count) < 0) + return -1; + + for(i=0; i<min_count; i++) + res->ulongs[i] = set1->ulongs[i] & ~set2->ulongs[i]; + + if (count1 != count2) { + if (min_count < count1) { + if (!set2->infinite) { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set1->ulongs[i]; + } else { + res->ulongs_count = min_count; + } + } else { + if (set1->infinite) { + for(i=min_count; i<max_count; i++) + res->ulongs[i] = ~set2->ulongs[i]; + } else { + res->ulongs_count = min_count; + } + } + } + + res->infinite = set1->infinite && !set2->infinite; + return 0; +} + +int hwloc_bitmap_xor (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2) +{ + /* cache counts so that we can reset res even if it's also set1 or set2 */ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + unsigned i; + + HWLOC__BITMAP_CHECK(res); + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + if (hwloc_bitmap_reset_by_ulongs(res, max_count) < 0) + return -1; + + for(i=0; i<min_count; i++) + res->ulongs[i] = set1->ulongs[i] ^ set2->ulongs[i]; + + if (count1 != count2) { + if (min_count < count1) { + unsigned long w2 = set2->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set1->ulongs[i] ^ w2; + } else { + unsigned long w1 = set1->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + for(i=min_count; i<max_count; i++) + res->ulongs[i] = set2->ulongs[i] ^ w1; + } + } + + res->infinite = (!set1->infinite) != (!set2->infinite); + return 0; +} + +int hwloc_bitmap_not (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set) +{ + unsigned count = set->ulongs_count; + unsigned i; + + HWLOC__BITMAP_CHECK(res); + HWLOC__BITMAP_CHECK(set); + + if (hwloc_bitmap_reset_by_ulongs(res, count) < 0) + return -1; + + for(i=0; i<count; i++) + res->ulongs[i] = ~set->ulongs[i]; + + res->infinite = !set->infinite; + return 0; +} + +int hwloc_bitmap_first(const struct hwloc_bitmap_s * set) +{ + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + for(i=0; i<set->ulongs_count; i++) { + /* subsets are unsigned longs, use ffsl */ + unsigned long w = set->ulongs[i]; + if (w) + return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + if (set->infinite) + return set->ulongs_count * HWLOC_BITS_PER_LONG; + + return -1; +} + +int hwloc_bitmap_first_unset(const struct hwloc_bitmap_s * set) +{ + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + for(i=0; i<set->ulongs_count; i++) { + /* subsets are unsigned longs, use ffsl */ + unsigned long w = ~set->ulongs[i]; + if (w) + return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + if (!set->infinite) + return set->ulongs_count * HWLOC_BITS_PER_LONG; + + return -1; +} + +int hwloc_bitmap_last(const struct hwloc_bitmap_s * set) +{ + int i; + + HWLOC__BITMAP_CHECK(set); + + if (set->infinite) + return -1; + + for(i=(int)set->ulongs_count-1; i>=0; i--) { + /* subsets are unsigned longs, use flsl */ + unsigned long w = set->ulongs[i]; + if (w) + return hwloc_flsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + return -1; +} + +int hwloc_bitmap_last_unset(const struct hwloc_bitmap_s * set) +{ + int i; + + HWLOC__BITMAP_CHECK(set); + + if (!set->infinite) + return -1; + + for(i=(int)set->ulongs_count-1; i>=0; i--) { + /* subsets are unsigned longs, use flsl */ + unsigned long w = ~set->ulongs[i]; + if (w) + return hwloc_flsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + return -1; +} + +int hwloc_bitmap_next(const struct hwloc_bitmap_s * set, int prev_cpu) +{ + unsigned i = HWLOC_SUBBITMAP_INDEX(prev_cpu + 1); + + HWLOC__BITMAP_CHECK(set); + + if (i >= set->ulongs_count) { + if (set->infinite) + return prev_cpu + 1; + else + return -1; + } + + for(; i<set->ulongs_count; i++) { + /* subsets are unsigned longs, use ffsl */ + unsigned long w = set->ulongs[i]; + + /* if the prev cpu is in the same word as the possible next one, + we need to mask out previous cpus */ + if (prev_cpu >= 0 && HWLOC_SUBBITMAP_INDEX((unsigned) prev_cpu) == i) + w &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(prev_cpu)); + + if (w) + return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + if (set->infinite) + return set->ulongs_count * HWLOC_BITS_PER_LONG; + + return -1; +} + +int hwloc_bitmap_next_unset(const struct hwloc_bitmap_s * set, int prev_cpu) +{ + unsigned i = HWLOC_SUBBITMAP_INDEX(prev_cpu + 1); + + HWLOC__BITMAP_CHECK(set); + + if (i >= set->ulongs_count) { + if (!set->infinite) + return prev_cpu + 1; + else + return -1; + } + + for(; i<set->ulongs_count; i++) { + /* subsets are unsigned longs, use ffsl */ + unsigned long w = ~set->ulongs[i]; + + /* if the prev cpu is in the same word as the possible next one, + we need to mask out previous cpus */ + if (prev_cpu >= 0 && HWLOC_SUBBITMAP_INDEX((unsigned) prev_cpu) == i) + w &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(prev_cpu)); + + if (w) + return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i; + } + + if (!set->infinite) + return set->ulongs_count * HWLOC_BITS_PER_LONG; + + return -1; +} + +int hwloc_bitmap_singlify(struct hwloc_bitmap_s * set) +{ + unsigned i; + int found = 0; + + HWLOC__BITMAP_CHECK(set); + + for(i=0; i<set->ulongs_count; i++) { + if (found) { + set->ulongs[i] = HWLOC_SUBBITMAP_ZERO; + continue; + } else { + /* subsets are unsigned longs, use ffsl */ + unsigned long w = set->ulongs[i]; + if (w) { + int _ffs = hwloc_ffsl(w); + set->ulongs[i] = HWLOC_SUBBITMAP_CPU(_ffs-1); + found = 1; + } + } + } + + if (set->infinite) { + if (found) { + set->infinite = 0; + } else { + /* set the first non allocated bit */ + unsigned first = set->ulongs_count * HWLOC_BITS_PER_LONG; + set->infinite = 0; /* do not let realloc fill the newly allocated sets */ + return hwloc_bitmap_set(set, first); + } + } + + return 0; +} + +int hwloc_bitmap_compare_first(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2) +{ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + unsigned i; + + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + for(i=0; i<min_count; i++) { + unsigned long w1 = set1->ulongs[i]; + unsigned long w2 = set2->ulongs[i]; + if (w1 || w2) { + int _ffs1 = hwloc_ffsl(w1); + int _ffs2 = hwloc_ffsl(w2); + /* if both have a bit set, compare for real */ + if (_ffs1 && _ffs2) + return _ffs1-_ffs2; + /* one is empty, and it is considered higher, so reverse-compare them */ + return _ffs2-_ffs1; + } + } + + if (count1 != count2) { + if (min_count < count2) { + for(i=min_count; i<count2; i++) { + unsigned long w2 = set2->ulongs[i]; + if (set1->infinite) + return -!(w2 & 1); + else if (w2) + return 1; + } + } else { + for(i=min_count; i<count1; i++) { + unsigned long w1 = set1->ulongs[i]; + if (set2->infinite) + return !(w1 & 1); + else if (w1) + return -1; + } + } + } + + return !!set1->infinite - !!set2->infinite; +} + +int hwloc_bitmap_compare(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2) +{ + unsigned count1 = set1->ulongs_count; + unsigned count2 = set2->ulongs_count; + unsigned max_count = count1 > count2 ? count1 : count2; + unsigned min_count = count1 + count2 - max_count; + int i; + + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + if ((!set1->infinite) != (!set2->infinite)) + return !!set1->infinite - !!set2->infinite; + + if (count1 != count2) { + if (min_count < count2) { + unsigned long val1 = set1->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + for(i=(int)max_count-1; i>=(int) min_count; i--) { + unsigned long val2 = set2->ulongs[i]; + if (val1 == val2) + continue; + return val1 < val2 ? -1 : 1; + } + } else { + unsigned long val2 = set2->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO; + for(i=(int)max_count-1; i>=(int) min_count; i--) { + unsigned long val1 = set1->ulongs[i]; + if (val1 == val2) + continue; + return val1 < val2 ? -1 : 1; + } + } + } + + for(i=(int)min_count-1; i>=0; i--) { + unsigned long val1 = set1->ulongs[i]; + unsigned long val2 = set2->ulongs[i]; + if (val1 == val2) + continue; + return val1 < val2 ? -1 : 1; + } + + return 0; +} + +int hwloc_bitmap_weight(const struct hwloc_bitmap_s * set) +{ + int weight = 0; + unsigned i; + + HWLOC__BITMAP_CHECK(set); + + if (set->infinite) + return -1; + + for(i=0; i<set->ulongs_count; i++) + weight += hwloc_weight_long(set->ulongs[i]); + return weight; +} + +int hwloc_bitmap_compare_inclusion(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2) +{ + unsigned max_count = set1->ulongs_count > set2->ulongs_count ? set1->ulongs_count : set2->ulongs_count; + int result = HWLOC_BITMAP_EQUAL; /* means empty sets return equal */ + int empty1 = 1; + int empty2 = 1; + unsigned i; + + HWLOC__BITMAP_CHECK(set1); + HWLOC__BITMAP_CHECK(set2); + + for(i=0; i<max_count; i++) { + unsigned long val1 = HWLOC_SUBBITMAP_READULONG(set1, (unsigned) i); + unsigned long val2 = HWLOC_SUBBITMAP_READULONG(set2, (unsigned) i); + + if (!val1) { + if (!val2) + /* both empty, no change */ + continue; + + /* val1 empty, val2 not */ + if (result == HWLOC_BITMAP_CONTAINS) { + if (!empty2) + return HWLOC_BITMAP_INTERSECTS; + result = HWLOC_BITMAP_DIFFERENT; + } else if (result == HWLOC_BITMAP_EQUAL) { + result = HWLOC_BITMAP_INCLUDED; + } + /* no change otherwise */ + + } else if (!val2) { + /* val2 empty, val1 not */ + if (result == HWLOC_BITMAP_INCLUDED) { + if (!empty1) + return HWLOC_BITMAP_INTERSECTS; + result = HWLOC_BITMAP_DIFFERENT; + } else if (result == HWLOC_BITMAP_EQUAL) { + result = HWLOC_BITMAP_CONTAINS; + } + /* no change otherwise */ + + } else if (val1 == val2) { + /* equal and not empty */ + if (result == HWLOC_BITMAP_DIFFERENT) + return HWLOC_BITMAP_INTERSECTS; + /* equal/contains/included unchanged */ + + } else if ((val1 & val2) == val1) { + /* included and not empty */ + if (result == HWLOC_BITMAP_CONTAINS || result == HWLOC_BITMAP_DIFFERENT) + return HWLOC_BITMAP_INTERSECTS; + /* equal/included unchanged */ + result = HWLOC_BITMAP_INCLUDED; + + } else if ((val1 & val2) == val2) { + /* contains and not empty */ + if (result == HWLOC_BITMAP_INCLUDED || result == HWLOC_BITMAP_DIFFERENT) + return HWLOC_BITMAP_INTERSECTS; + /* equal/contains unchanged */ + result = HWLOC_BITMAP_CONTAINS; + + } else if ((val1 & val2) != 0) { + /* intersects and not empty */ + return HWLOC_BITMAP_INTERSECTS; + + } else { + /* different and not empty */ + + /* equal/included/contains with non-empty sets means intersects */ + if (result == HWLOC_BITMAP_EQUAL && !empty1 /* implies !empty2 */) + return HWLOC_BITMAP_INTERSECTS; + if (result == HWLOC_BITMAP_INCLUDED && !empty1) + return HWLOC_BITMAP_INTERSECTS; + if (result == HWLOC_BITMAP_CONTAINS && !empty2) + return HWLOC_BITMAP_INTERSECTS; + /* otherwise means different */ + result = HWLOC_BITMAP_DIFFERENT; + } + + empty1 &= !val1; + empty2 &= !val2; + } + + if (!set1->infinite) { + if (set2->infinite) { + /* set2 infinite only */ + if (result == HWLOC_BITMAP_CONTAINS) { + if (!empty2) + return HWLOC_BITMAP_INTERSECTS; + result = HWLOC_BITMAP_DIFFERENT; + } else if (result == HWLOC_BITMAP_EQUAL) { + result = HWLOC_BITMAP_INCLUDED; + } + /* no change otherwise */ + } + } else if (!set2->infinite) { + /* set1 infinite only */ + if (result == HWLOC_BITMAP_INCLUDED) { + if (!empty1) + return HWLOC_BITMAP_INTERSECTS; + result = HWLOC_BITMAP_DIFFERENT; + } else if (result == HWLOC_BITMAP_EQUAL) { + result = HWLOC_BITMAP_CONTAINS; + } + /* no change otherwise */ + } else { + /* both infinite */ + if (result == HWLOC_BITMAP_DIFFERENT) + return HWLOC_BITMAP_INTERSECTS; + /* equal/contains/included unchanged */ + } + + return result; +} diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c new file mode 100644 index 000000000..bd7c00e36 --- /dev/null +++ b/src/3rdparty/hwloc/src/components.c @@ -0,0 +1,785 @@ +/* + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2012 Université Bordeaux + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/xml.h> +#include <private/misc.h> + +#define HWLOC_COMPONENT_STOP_NAME "stop" +#define HWLOC_COMPONENT_EXCLUDE_CHAR '-' +#define HWLOC_COMPONENT_SEPS "," + +/* list of all registered discovery components, sorted by priority, higher priority first. + * noos is last because its priority is 0. + * others' priority is 10. + */ +static struct hwloc_disc_component * hwloc_disc_components = NULL; + +static unsigned hwloc_components_users = 0; /* first one initializes, last ones destroys */ + +static int hwloc_components_verbose = 0; +#ifdef HWLOC_HAVE_PLUGINS +static int hwloc_plugins_verbose = 0; +static const char * hwloc_plugins_blacklist = NULL; +#endif + +/* hwloc_components_mutex serializes: + * - loading/unloading plugins, and modifications of the hwloc_plugins list + * - calls to ltdl, including in hwloc_check_plugin_namespace() + * - registration of components with hwloc_disc_component_register() + * and hwloc_xml_callbacks_register() + */ +#ifdef HWLOC_WIN_SYS +/* Basic mutex on top of InterlockedCompareExchange() on windows, + * Far from perfect, but easy to maintain, and way enough given that this code will never be needed for real. */ +#include <windows.h> +static LONG hwloc_components_mutex = 0; +#define HWLOC_COMPONENTS_LOCK() do { \ + while (InterlockedCompareExchange(&hwloc_components_mutex, 1, 0) != 0) \ + SwitchToThread(); \ +} while (0) +#define HWLOC_COMPONENTS_UNLOCK() do { \ + assert(hwloc_components_mutex == 1); \ + hwloc_components_mutex = 0; \ +} while (0) + +#elif defined HWLOC_HAVE_PTHREAD_MUTEX +/* pthread mutex if available (except on windows) */ +#include <pthread.h> +static pthread_mutex_t hwloc_components_mutex = PTHREAD_MUTEX_INITIALIZER; +#define HWLOC_COMPONENTS_LOCK() pthread_mutex_lock(&hwloc_components_mutex) +#define HWLOC_COMPONENTS_UNLOCK() pthread_mutex_unlock(&hwloc_components_mutex) + +#else /* HWLOC_WIN_SYS || HWLOC_HAVE_PTHREAD_MUTEX */ +#error No mutex implementation available +#endif + + +#ifdef HWLOC_HAVE_PLUGINS + +#include <ltdl.h> + +/* array of pointers to dynamically loaded plugins */ +static struct hwloc__plugin_desc { + char *name; + struct hwloc_component *component; + char *filename; + lt_dlhandle handle; + struct hwloc__plugin_desc *next; +} *hwloc_plugins = NULL; + +static int +hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) +{ + const char *basename; + lt_dlhandle handle; + struct hwloc_component *component; + struct hwloc__plugin_desc *desc, **prevdesc; + + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin dlforeach found `%s'\n", filename); + + basename = strrchr(filename, '/'); + if (!basename) + basename = filename; + else + basename++; + + if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin `%s' is blacklisted in the environment\n", basename); + goto out; + } + + /* dlopen and get the component structure */ + handle = lt_dlopenext(filename); + if (!handle) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Failed to load plugin: %s\n", lt_dlerror()); + goto out; + } + +{ + char componentsymbolname[strlen(basename)+10+1]; + sprintf(componentsymbolname, "%s_component", basename); + component = lt_dlsym(handle, componentsymbolname); + if (!component) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Failed to find component symbol `%s'\n", + componentsymbolname); + goto out_with_handle; + } + if (component->abi != HWLOC_COMPONENT_ABI) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin symbol ABI %u instead of %d\n", + component->abi, HWLOC_COMPONENT_ABI); + goto out_with_handle; + } + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin contains expected symbol `%s'\n", + componentsymbolname); +} + + if (HWLOC_COMPONENT_TYPE_DISC == component->type) { + if (strncmp(basename, "hwloc_", 6)) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin name `%s' doesn't match its type DISCOVERY\n", basename); + goto out_with_handle; + } + } else if (HWLOC_COMPONENT_TYPE_XML == component->type) { + if (strncmp(basename, "hwloc_xml_", 10)) { + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin name `%s' doesn't match its type XML\n", basename); + goto out_with_handle; + } + } else { + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin name `%s' has invalid type %u\n", + basename, (unsigned) component->type); + goto out_with_handle; + } + + /* allocate a plugin_desc and queue it */ + desc = malloc(sizeof(*desc)); + if (!desc) + goto out_with_handle; + desc->name = strdup(basename); + desc->filename = strdup(filename); + desc->component = component; + desc->handle = handle; + desc->next = NULL; + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin descriptor `%s' ready\n", basename); + + /* append to the list */ + prevdesc = &hwloc_plugins; + while (*prevdesc) + prevdesc = &((*prevdesc)->next); + *prevdesc = desc; + if (hwloc_plugins_verbose) + fprintf(stderr, "Plugin descriptor `%s' queued\n", basename); + return 0; + + out_with_handle: + lt_dlclose(handle); + out: + return 0; +} + +static void +hwloc_plugins_exit(void) +{ + struct hwloc__plugin_desc *desc, *next; + + if (hwloc_plugins_verbose) + fprintf(stderr, "Closing all plugins\n"); + + desc = hwloc_plugins; + while (desc) { + next = desc->next; + lt_dlclose(desc->handle); + free(desc->name); + free(desc->filename); + free(desc); + desc = next; + } + hwloc_plugins = NULL; + + lt_dlexit(); +} + +static int +hwloc_plugins_init(void) +{ + const char *verboseenv; + const char *path = HWLOC_PLUGINS_PATH; + const char *env; + int err; + + verboseenv = getenv("HWLOC_PLUGINS_VERBOSE"); + hwloc_plugins_verbose = verboseenv ? atoi(verboseenv) : 0; + + hwloc_plugins_blacklist = getenv("HWLOC_PLUGINS_BLACKLIST"); + + err = lt_dlinit(); + if (err) + goto out; + + env = getenv("HWLOC_PLUGINS_PATH"); + if (env) + path = env; + + hwloc_plugins = NULL; + + if (hwloc_plugins_verbose) + fprintf(stderr, "Starting plugin dlforeach in %s\n", path); + err = lt_dlforeachfile(path, hwloc__dlforeach_cb, NULL); + if (err) + goto out_with_init; + + return 0; + + out_with_init: + hwloc_plugins_exit(); + out: + return -1; +} + +#endif /* HWLOC_HAVE_PLUGINS */ + +static const char * +hwloc_disc_component_type_string(hwloc_disc_component_type_t type) +{ + switch (type) { + case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu"; + case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global"; + case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc"; + default: return "**unknown**"; + } +} + +static int +hwloc_disc_component_register(struct hwloc_disc_component *component, + const char *filename) +{ + struct hwloc_disc_component **prev; + + /* check that the component name is valid */ + if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) { + if (hwloc_components_verbose) + fprintf(stderr, "Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n"); + return -1; + } + if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) + || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { + if (hwloc_components_verbose) + fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", + component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); + return -1; + } + /* check that the component type is valid */ + switch ((unsigned) component->type) { + case HWLOC_DISC_COMPONENT_TYPE_CPU: + case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: + case HWLOC_DISC_COMPONENT_TYPE_MISC: + break; + default: + fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n", + component->name, (unsigned) component->type); + return -1; + } + + prev = &hwloc_disc_components; + while (NULL != *prev) { + if (!strcmp((*prev)->name, component->name)) { + /* if two components have the same name, only keep the highest priority one */ + if ((*prev)->priority < component->priority) { + /* drop the existing component */ + if (hwloc_components_verbose) + fprintf(stderr, "Dropping previously registered discovery component `%s', priority %u lower than new one %u\n", + (*prev)->name, (*prev)->priority, component->priority); + *prev = (*prev)->next; + } else { + /* drop the new one */ + if (hwloc_components_verbose) + fprintf(stderr, "Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n", + component->name, component->priority, (*prev)->priority); + return -1; + } + } + prev = &((*prev)->next); + } + if (hwloc_components_verbose) + fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n", + hwloc_disc_component_type_string(component->type), component->name, component->priority, + filename ? "from plugin " : "statically build", filename ? filename : ""); + + prev = &hwloc_disc_components; + while (NULL != *prev) { + if ((*prev)->priority < component->priority) + break; + prev = &((*prev)->next); + } + component->next = *prev; + *prev = component; + return 0; +} + +#include <static-components.h> + +static void (**hwloc_component_finalize_cbs)(unsigned long); +static unsigned hwloc_component_finalize_cb_count; + +void +hwloc_components_init(void) +{ +#ifdef HWLOC_HAVE_PLUGINS + struct hwloc__plugin_desc *desc; +#endif + const char *verboseenv; + unsigned i; + + HWLOC_COMPONENTS_LOCK(); + assert((unsigned) -1 != hwloc_components_users); + if (0 != hwloc_components_users++) { + HWLOC_COMPONENTS_UNLOCK(); + return; + } + + verboseenv = getenv("HWLOC_COMPONENTS_VERBOSE"); + hwloc_components_verbose = verboseenv ? atoi(verboseenv) : 0; + +#ifdef HWLOC_HAVE_PLUGINS + hwloc_plugins_init(); +#endif + + hwloc_component_finalize_cbs = NULL; + hwloc_component_finalize_cb_count = 0; + /* count the max number of finalize callbacks */ + for(i=0; NULL != hwloc_static_components[i]; i++) + hwloc_component_finalize_cb_count++; +#ifdef HWLOC_HAVE_PLUGINS + for(desc = hwloc_plugins; NULL != desc; desc = desc->next) + hwloc_component_finalize_cb_count++; +#endif + if (hwloc_component_finalize_cb_count) { + hwloc_component_finalize_cbs = calloc(hwloc_component_finalize_cb_count, + sizeof(*hwloc_component_finalize_cbs)); + assert(hwloc_component_finalize_cbs); + /* forget that max number and recompute the real one below */ + hwloc_component_finalize_cb_count = 0; + } + + /* hwloc_static_components is created by configure in static-components.h */ + for(i=0; NULL != hwloc_static_components[i]; i++) { + if (hwloc_static_components[i]->flags) { + fprintf(stderr, "Ignoring static component with invalid flags %lx\n", + hwloc_static_components[i]->flags); + continue; + } + + /* initialize the component */ + if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) { + if (hwloc_components_verbose) + fprintf(stderr, "Ignoring static component, failed to initialize\n"); + continue; + } + /* queue ->finalize() callback if any */ + if (hwloc_static_components[i]->finalize) + hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count++] = hwloc_static_components[i]->finalize; + + /* register for real now */ + if (HWLOC_COMPONENT_TYPE_DISC == hwloc_static_components[i]->type) + hwloc_disc_component_register(hwloc_static_components[i]->data, NULL); + else if (HWLOC_COMPONENT_TYPE_XML == hwloc_static_components[i]->type) + hwloc_xml_callbacks_register(hwloc_static_components[i]->data); + else + assert(0); + } + + /* dynamic plugins */ +#ifdef HWLOC_HAVE_PLUGINS + for(desc = hwloc_plugins; NULL != desc; desc = desc->next) { + if (desc->component->flags) { + fprintf(stderr, "Ignoring plugin `%s' component with invalid flags %lx\n", + desc->name, desc->component->flags); + continue; + } + + /* initialize the component */ + if (desc->component->init && desc->component->init(0) < 0) { + if (hwloc_components_verbose) + fprintf(stderr, "Ignoring plugin `%s', failed to initialize\n", desc->name); + continue; + } + /* queue ->finalize() callback if any */ + if (desc->component->finalize) + hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count++] = desc->component->finalize; + + /* register for real now */ + if (HWLOC_COMPONENT_TYPE_DISC == desc->component->type) + hwloc_disc_component_register(desc->component->data, desc->filename); + else if (HWLOC_COMPONENT_TYPE_XML == desc->component->type) + hwloc_xml_callbacks_register(desc->component->data); + else + assert(0); + } +#endif + + HWLOC_COMPONENTS_UNLOCK(); +} + +void +hwloc_backends_init(struct hwloc_topology *topology) +{ + topology->backends = NULL; + topology->backend_excludes = 0; +} + +static struct hwloc_disc_component * +hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */, + const char *name /* name of NULL if any */) +{ + struct hwloc_disc_component *comp = hwloc_disc_components; + while (NULL != comp) { + if ((-1 == type || type == (int) comp->type) + && (NULL == name || !strcmp(name, comp->name))) + return comp; + comp = comp->next; + } + return NULL; +} + +/* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */ +int +hwloc_disc_component_force_enable(struct hwloc_topology *topology, + int envvar_forced, + int type, const char *name, + const void *data1, const void *data2, const void *data3) +{ + struct hwloc_disc_component *comp; + struct hwloc_backend *backend; + + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + comp = hwloc_disc_component_find(type, name); + if (!comp) { + errno = ENOSYS; + return -1; + } + + backend = comp->instantiate(comp, data1, data2, data3); + if (backend) { + backend->envvar_forced = envvar_forced; + if (topology->backends) + hwloc_backends_disable_all(topology); + return hwloc_backend_enable(topology, backend); + } else + return -1; +} + +static int +hwloc_disc_component_try_enable(struct hwloc_topology *topology, + struct hwloc_disc_component *comp, + const char *comparg, + int envvar_forced) +{ + struct hwloc_backend *backend; + + if (topology->backend_excludes & comp->type) { + if (hwloc_components_verbose) + /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. + */ + fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n", + hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes); + return -1; + } + + backend = comp->instantiate(comp, comparg, NULL, NULL); + if (!backend) { + if (hwloc_components_verbose || envvar_forced) + fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); + return -1; + } + + backend->envvar_forced = envvar_forced; + return hwloc_backend_enable(topology, backend); +} + +void +hwloc_disc_components_enable_others(struct hwloc_topology *topology) +{ + struct hwloc_disc_component *comp; + struct hwloc_backend *backend; + int tryall = 1; + const char *_env; + char *env; /* we'll to modify the env value, so duplicate it */ + + _env = getenv("HWLOC_COMPONENTS"); + env = _env ? strdup(_env) : NULL; + + /* enable explicitly listed components */ + if (env) { + char *curenv = env; + size_t s; + + while (*curenv) { + s = strcspn(curenv, HWLOC_COMPONENT_SEPS); + if (s) { + char c; + + /* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */ + if (!strncmp(curenv, "linuxpci", 8) && s == 8) { + curenv[5] = 'i'; + curenv[6] = 'o'; + curenv[7] = *HWLOC_COMPONENT_SEPS; + } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) { + curenv[6] = 'i'; + curenv[7] = 'o'; + curenv[8] = *HWLOC_COMPONENT_SEPS; + /* skip this name, it's a negated one */ + goto nextname; + } + + if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR) + goto nextname; + + if (!strncmp(curenv, HWLOC_COMPONENT_STOP_NAME, s)) { + tryall = 0; + break; + } + + /* save the last char and replace with \0 */ + c = curenv[s]; + curenv[s] = '\0'; + + comp = hwloc_disc_component_find(-1, curenv); + if (comp) { + hwloc_disc_component_try_enable(topology, comp, NULL, 1 /* envvar forced */); + } else { + fprintf(stderr, "Cannot find discovery component `%s'\n", curenv); + } + + /* restore chars (the second loop below needs env to be unmodified) */ + curenv[s] = c; + } + +nextname: + curenv += s; + if (*curenv) + /* Skip comma */ + curenv++; + } + } + + /* env is still the same, the above loop didn't modify it */ + + /* now enable remaining components (except the explicitly '-'-listed ones) */ + if (tryall) { + comp = hwloc_disc_components; + while (NULL != comp) { + if (!comp->enabled_by_default) + goto nextcomp; + /* check if this component was explicitly excluded in env */ + if (env) { + char *curenv = env; + while (*curenv) { + size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); + if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) { + if (hwloc_components_verbose) + fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n", + hwloc_disc_component_type_string(comp->type), comp->name); + goto nextcomp; + } + curenv += s; + if (*curenv) + /* Skip comma */ + curenv++; + } + } + hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */); +nextcomp: + comp = comp->next; + } + } + + if (hwloc_components_verbose) { + /* print a summary */ + int first = 1; + backend = topology->backends; + fprintf(stderr, "Final list of enabled discovery components: "); + while (backend != NULL) { + fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name); + backend = backend->next; + first = 0; + } + fprintf(stderr, "\n"); + } + + free(env); +} + +void +hwloc_components_fini(void) +{ + unsigned i; + + HWLOC_COMPONENTS_LOCK(); + assert(0 != hwloc_components_users); + if (0 != --hwloc_components_users) { + HWLOC_COMPONENTS_UNLOCK(); + return; + } + + for(i=0; i<hwloc_component_finalize_cb_count; i++) + hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count-i-1](0); + free(hwloc_component_finalize_cbs); + hwloc_component_finalize_cbs = NULL; + hwloc_component_finalize_cb_count = 0; + + /* no need to unlink/free the list of components, they'll be unloaded below */ + + hwloc_disc_components = NULL; + hwloc_xml_callbacks_reset(); + +#ifdef HWLOC_HAVE_PLUGINS + hwloc_plugins_exit(); +#endif + + HWLOC_COMPONENTS_UNLOCK(); +} + +struct hwloc_backend * +hwloc_backend_alloc(struct hwloc_disc_component *component) +{ + struct hwloc_backend * backend = malloc(sizeof(*backend)); + if (!backend) { + errno = ENOMEM; + return NULL; + } + backend->component = component; + backend->flags = 0; + backend->discover = NULL; + backend->get_pci_busid_cpuset = NULL; + backend->disable = NULL; + backend->is_thissystem = -1; + backend->next = NULL; + backend->envvar_forced = 0; + return backend; +} + +static void +hwloc_backend_disable(struct hwloc_backend *backend) +{ + if (backend->disable) + backend->disable(backend); + free(backend); +} + +int +hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend) +{ + struct hwloc_backend **pprev; + + /* check backend flags */ + if (backend->flags) { + fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n", + hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags); + return -1; + } + + /* make sure we didn't already enable this backend, we don't want duplicates */ + pprev = &topology->backends; + while (NULL != *pprev) { + if ((*pprev)->component == backend->component) { + if (hwloc_components_verbose) + fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n", + hwloc_disc_component_type_string(backend->component->type), backend->component->name); + hwloc_backend_disable(backend); + errno = EBUSY; + return -1; + } + pprev = &((*pprev)->next); + } + + if (hwloc_components_verbose) + fprintf(stderr, "Enabling %s discovery component `%s'\n", + hwloc_disc_component_type_string(backend->component->type), backend->component->name); + + /* enqueue at the end */ + pprev = &topology->backends; + while (NULL != *pprev) + pprev = &((*pprev)->next); + backend->next = *pprev; + *pprev = backend; + + backend->topology = topology; + topology->backend_excludes |= backend->component->excludes; + return 0; +} + +void +hwloc_backends_is_thissystem(struct hwloc_topology *topology) +{ + struct hwloc_backend *backend; + const char *local_env; + + /* Apply is_thissystem topology flag before we enforce envvar backends. + * If the application changed the backend with set_foo(), + * it may use set_flags() update the is_thissystem flag here. + * If it changes the backend with environment variables below, + * it may use HWLOC_THISSYSTEM envvar below as well. + */ + + topology->is_thissystem = 1; + + /* apply thissystem from normally-given backends (envvar_forced=0, either set_foo() or defaults) */ + backend = topology->backends; + while (backend != NULL) { + if (backend->envvar_forced == 0 && backend->is_thissystem != -1) { + assert(backend->is_thissystem == 0); + topology->is_thissystem = 0; + } + backend = backend->next; + } + + /* override set_foo() with flags */ + if (topology->flags & HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) + topology->is_thissystem = 1; + + /* now apply envvar-forced backend (envvar_forced=1) */ + backend = topology->backends; + while (backend != NULL) { + if (backend->envvar_forced == 1 && backend->is_thissystem != -1) { + assert(backend->is_thissystem == 0); + topology->is_thissystem = 0; + } + backend = backend->next; + } + + /* override with envvar-given flag */ + local_env = getenv("HWLOC_THISSYSTEM"); + if (local_env) + topology->is_thissystem = atoi(local_env); +} + +void +hwloc_backends_find_callbacks(struct hwloc_topology *topology) +{ + struct hwloc_backend *backend = topology->backends; + /* use the first backend's get_pci_busid_cpuset callback */ + topology->get_pci_busid_cpuset_backend = NULL; + while (backend != NULL) { + if (backend->get_pci_busid_cpuset) { + topology->get_pci_busid_cpuset_backend = backend; + return; + } + backend = backend->next; + } + return; +} + +void +hwloc_backends_disable_all(struct hwloc_topology *topology) +{ + struct hwloc_backend *backend; + + while (NULL != (backend = topology->backends)) { + struct hwloc_backend *next = backend->next; + if (hwloc_components_verbose) + fprintf(stderr, "Disabling %s discovery component `%s'\n", + hwloc_disc_component_type_string(backend->component->type), backend->component->name); + hwloc_backend_disable(backend); + topology->backends = next; + } + topology->backends = NULL; + topology->backend_excludes = 0; +} diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c new file mode 100644 index 000000000..00811a7b5 --- /dev/null +++ b/src/3rdparty/hwloc/src/diff.c @@ -0,0 +1,492 @@ +/* + * Copyright © 2013-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <private/private.h> +#include <private/misc.h> + +int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff) +{ + hwloc_topology_diff_t next; + while (diff) { + next = diff->generic.next; + switch (diff->generic.type) { + default: + break; + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: + switch (diff->obj_attr.diff.generic.type) { + default: + break; + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME: + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: + free(diff->obj_attr.diff.string.name); + free(diff->obj_attr.diff.string.oldvalue); + free(diff->obj_attr.diff.string.newvalue); + break; + } + break; + } + free(diff); + diff = next; + } + return 0; +} + +/************************ + * Computing diffs + */ + +static void hwloc_append_diff(hwloc_topology_diff_t newdiff, + hwloc_topology_diff_t *firstdiffp, + hwloc_topology_diff_t *lastdiffp) +{ + if (*firstdiffp) + (*lastdiffp)->generic.next = newdiff; + else + *firstdiffp = newdiff; + *lastdiffp = newdiff; + newdiff->generic.next = NULL; +} + +static int hwloc_append_diff_too_complex(hwloc_obj_t obj1, + hwloc_topology_diff_t *firstdiffp, + hwloc_topology_diff_t *lastdiffp) +{ + hwloc_topology_diff_t newdiff; + newdiff = malloc(sizeof(*newdiff)); + if (!newdiff) + return -1; + + newdiff->too_complex.type = HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX; + newdiff->too_complex.obj_depth = obj1->depth; + newdiff->too_complex.obj_index = obj1->logical_index; + hwloc_append_diff(newdiff, firstdiffp, lastdiffp); + return 0; +} + +static int hwloc_append_diff_obj_attr_string(hwloc_obj_t obj, + hwloc_topology_diff_obj_attr_type_t type, + const char *name, + const char *oldvalue, + const char *newvalue, + hwloc_topology_diff_t *firstdiffp, + hwloc_topology_diff_t *lastdiffp) +{ + hwloc_topology_diff_t newdiff; + newdiff = malloc(sizeof(*newdiff)); + if (!newdiff) + return -1; + + newdiff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR; + newdiff->obj_attr.obj_depth = obj->depth; + newdiff->obj_attr.obj_index = obj->logical_index; + newdiff->obj_attr.diff.string.type = type; + newdiff->obj_attr.diff.string.name = name ? strdup(name) : NULL; + newdiff->obj_attr.diff.string.oldvalue = oldvalue ? strdup(oldvalue) : NULL; + newdiff->obj_attr.diff.string.newvalue = newvalue ? strdup(newvalue) : NULL; + hwloc_append_diff(newdiff, firstdiffp, lastdiffp); + return 0; +} + +static int hwloc_append_diff_obj_attr_uint64(hwloc_obj_t obj, + hwloc_topology_diff_obj_attr_type_t type, + hwloc_uint64_t idx, + hwloc_uint64_t oldvalue, + hwloc_uint64_t newvalue, + hwloc_topology_diff_t *firstdiffp, + hwloc_topology_diff_t *lastdiffp) +{ + hwloc_topology_diff_t newdiff; + newdiff = malloc(sizeof(*newdiff)); + if (!newdiff) + return -1; + + newdiff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR; + newdiff->obj_attr.obj_depth = obj->depth; + newdiff->obj_attr.obj_index = obj->logical_index; + newdiff->obj_attr.diff.uint64.type = type; + newdiff->obj_attr.diff.uint64.index = idx; + newdiff->obj_attr.diff.uint64.oldvalue = oldvalue; + newdiff->obj_attr.diff.uint64.newvalue = newvalue; + hwloc_append_diff(newdiff, firstdiffp, lastdiffp); + return 0; +} + +static int +hwloc_diff_trees(hwloc_topology_t topo1, hwloc_obj_t obj1, + hwloc_topology_t topo2, hwloc_obj_t obj2, + unsigned flags, + hwloc_topology_diff_t *firstdiffp, hwloc_topology_diff_t *lastdiffp) +{ + unsigned i; + int err; + hwloc_obj_t child1, child2; + + if (obj1->depth != obj2->depth) + goto out_too_complex; + + if (obj1->type != obj2->type) + goto out_too_complex; + if ((!obj1->subtype) != (!obj2->subtype) + || (obj1->subtype && strcmp(obj1->subtype, obj2->subtype))) + goto out_too_complex; + + if (obj1->os_index != obj2->os_index) + /* we could allow different os_index for non-PU non-NUMAnode objects + * but it's likely useless anyway */ + goto out_too_complex; + +#define _SETS_DIFFERENT(_set1, _set2) \ + ( ( !(_set1) != !(_set2) ) \ + || ( (_set1) && !hwloc_bitmap_isequal(_set1, _set2) ) ) +#define SETS_DIFFERENT(_set, _obj1, _obj2) _SETS_DIFFERENT((_obj1)->_set, (_obj2)->_set) + if (SETS_DIFFERENT(cpuset, obj1, obj2) + || SETS_DIFFERENT(complete_cpuset, obj1, obj2) + || SETS_DIFFERENT(nodeset, obj1, obj2) + || SETS_DIFFERENT(complete_nodeset, obj1, obj2)) + goto out_too_complex; + + /* no need to check logical_index, sibling_rank, symmetric_subtree, + * the parents did it */ + + /* gp_index don't have to be strictly identical */ + + if ((!obj1->name) != (!obj2->name) + || (obj1->name && strcmp(obj1->name, obj2->name))) { + err = hwloc_append_diff_obj_attr_string(obj1, + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME, + NULL, + obj1->name, + obj2->name, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + + /* type-specific attrs */ + switch (obj1->type) { + default: + break; + case HWLOC_OBJ_NUMANODE: + if (obj1->attr->numanode.local_memory != obj2->attr->numanode.local_memory) { + err = hwloc_append_diff_obj_attr_uint64(obj1, + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE, + 0, + obj1->attr->numanode.local_memory, + obj2->attr->numanode.local_memory, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + /* ignore memory page_types */ + break; + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->cache))) + goto out_too_complex; + break; + case HWLOC_OBJ_GROUP: + if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->group))) + goto out_too_complex; + break; + case HWLOC_OBJ_PCI_DEVICE: + if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->pcidev))) + goto out_too_complex; + break; + case HWLOC_OBJ_BRIDGE: + if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->bridge))) + goto out_too_complex; + break; + case HWLOC_OBJ_OS_DEVICE: + if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->osdev))) + goto out_too_complex; + break; + } + + /* infos */ + if (obj1->infos_count != obj2->infos_count) + goto out_too_complex; + for(i=0; i<obj1->infos_count; i++) { + struct hwloc_info_s *info1 = &obj1->infos[i], *info2 = &obj2->infos[i]; + if (strcmp(info1->name, info2->name)) + goto out_too_complex; + if (strcmp(obj1->infos[i].value, obj2->infos[i].value)) { + err = hwloc_append_diff_obj_attr_string(obj1, + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO, + info1->name, + info1->value, + info2->value, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + } + + /* ignore userdata */ + + /* children */ + for(child1 = obj1->first_child, child2 = obj2->first_child; + child1 != NULL && child2 != NULL; + child1 = child1->next_sibling, child2 = child2->next_sibling) { + err = hwloc_diff_trees(topo1, child1, + topo2, child2, + flags, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + if (child1 || child2) + goto out_too_complex; + + /* memory children */ + for(child1 = obj1->memory_first_child, child2 = obj2->memory_first_child; + child1 != NULL && child2 != NULL; + child1 = child1->next_sibling, child2 = child2->next_sibling) { + err = hwloc_diff_trees(topo1, child1, + topo2, child2, + flags, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + if (child1 || child2) + goto out_too_complex; + + /* I/O children */ + for(child1 = obj1->io_first_child, child2 = obj2->io_first_child; + child1 != NULL && child2 != NULL; + child1 = child1->next_sibling, child2 = child2->next_sibling) { + err = hwloc_diff_trees(topo1, child1, + topo2, child2, + flags, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + if (child1 || child2) + goto out_too_complex; + + /* misc children */ + for(child1 = obj1->misc_first_child, child2 = obj2->misc_first_child; + child1 != NULL && child2 != NULL; + child1 = child1->next_sibling, child2 = child2->next_sibling) { + err = hwloc_diff_trees(topo1, child1, + topo2, child2, + flags, + firstdiffp, lastdiffp); + if (err < 0) + return err; + } + if (child1 || child2) + goto out_too_complex; + + return 0; + +out_too_complex: + hwloc_append_diff_too_complex(obj1, firstdiffp, lastdiffp); + return 0; +} + +int hwloc_topology_diff_build(hwloc_topology_t topo1, + hwloc_topology_t topo2, + unsigned long flags, + hwloc_topology_diff_t *diffp) +{ + hwloc_topology_diff_t lastdiff, tmpdiff; + struct hwloc_internal_distances_s *dist1, *dist2; + unsigned i; + int err; + + if (!topo1->is_loaded || !topo2->is_loaded) { + errno = EINVAL; + return -1; + } + + if (flags != 0) { + errno = EINVAL; + return -1; + } + + *diffp = NULL; + err = hwloc_diff_trees(topo1, hwloc_get_root_obj(topo1), + topo2, hwloc_get_root_obj(topo2), + flags, + diffp, &lastdiff); + if (!err) { + tmpdiff = *diffp; + while (tmpdiff) { + if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) { + err = 1; + break; + } + tmpdiff = tmpdiff->generic.next; + } + } + + if (!err) { + if (SETS_DIFFERENT(allowed_cpuset, topo1, topo2) + || SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) { + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + err = 1; + } + } + + if (!err) { + /* distances */ + hwloc_internal_distances_refresh(topo1); + hwloc_internal_distances_refresh(topo2); + dist1 = topo1->first_dist; + dist2 = topo2->first_dist; + while (dist1 || dist2) { + if (!!dist1 != !!dist2) { + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + err = 1; + break; + } + if (dist1->type != dist2->type + || dist1->nbobjs != dist2->nbobjs + || dist1->kind != dist2->kind + || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + err = 1; + break; + } + for(i=0; i<dist1->nbobjs; i++) + /* gp_index isn't enforced above. so compare logical_index instead, which is enforced. requires distances refresh() above */ + if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) { + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + err = 1; + break; + } + dist1 = dist1->next; + dist2 = dist2->next; + } + } + + return err; +} + +/******************** + * Applying diffs + */ + +static int +hwloc_apply_diff_one(hwloc_topology_t topology, + hwloc_topology_diff_t diff, + unsigned long flags) +{ + int reverse = !!(flags & HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE); + + switch (diff->generic.type) { + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: { + struct hwloc_topology_diff_obj_attr_s *obj_attr = &diff->obj_attr; + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, obj_attr->obj_depth, obj_attr->obj_index); + if (!obj) + return -1; + + switch (obj_attr->diff.generic.type) { + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE: { + hwloc_obj_t tmpobj; + hwloc_uint64_t oldvalue = reverse ? obj_attr->diff.uint64.newvalue : obj_attr->diff.uint64.oldvalue; + hwloc_uint64_t newvalue = reverse ? obj_attr->diff.uint64.oldvalue : obj_attr->diff.uint64.newvalue; + hwloc_uint64_t valuediff = newvalue - oldvalue; + if (obj->type != HWLOC_OBJ_NUMANODE) + return -1; + if (obj->attr->numanode.local_memory != oldvalue) + return -1; + obj->attr->numanode.local_memory = newvalue; + tmpobj = obj; + while (tmpobj) { + tmpobj->total_memory += valuediff; + tmpobj = tmpobj->parent; + } + break; + } + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME: { + const char *oldvalue = reverse ? obj_attr->diff.string.newvalue : obj_attr->diff.string.oldvalue; + const char *newvalue = reverse ? obj_attr->diff.string.oldvalue : obj_attr->diff.string.newvalue; + if (!obj->name || strcmp(obj->name, oldvalue)) + return -1; + free(obj->name); + obj->name = strdup(newvalue); + break; + } + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: { + const char *name = obj_attr->diff.string.name; + const char *oldvalue = reverse ? obj_attr->diff.string.newvalue : obj_attr->diff.string.oldvalue; + const char *newvalue = reverse ? obj_attr->diff.string.oldvalue : obj_attr->diff.string.newvalue; + unsigned i; + int found = 0; + for(i=0; i<obj->infos_count; i++) { + struct hwloc_info_s *info = &obj->infos[i]; + if (!strcmp(info->name, name) + && !strcmp(info->value, oldvalue)) { + free(info->value); + info->value = strdup(newvalue); + found = 1; + break; + } + } + if (!found) + return -1; + break; + } + default: + return -1; + } + + break; + } + default: + return -1; + } + + return 0; +} + +int hwloc_topology_diff_apply(hwloc_topology_t topology, + hwloc_topology_diff_t diff, + unsigned long flags) +{ + hwloc_topology_diff_t tmpdiff, tmpdiff2; + int err, nr; + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) { + errno = EINVAL; + return -1; + } + + tmpdiff = diff; + nr = 0; + while (tmpdiff) { + nr++; + err = hwloc_apply_diff_one(topology, tmpdiff, flags); + if (err < 0) + goto cancel; + tmpdiff = tmpdiff->generic.next; + } + return 0; + +cancel: + tmpdiff2 = tmpdiff; + tmpdiff = diff; + while (tmpdiff != tmpdiff2) { + hwloc_apply_diff_one(topology, tmpdiff, flags ^ HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE); + tmpdiff = tmpdiff->generic.next; + } + errno = EINVAL; + return -nr; /* return the index (starting at 1) of the first element that couldn't be applied */ +} diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c new file mode 100644 index 000000000..f0b91f019 --- /dev/null +++ b/src/3rdparty/hwloc/src/distances.c @@ -0,0 +1,920 @@ +/* + * Copyright © 2010-2018 Inria. All rights reserved. + * Copyright © 2011-2012 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/debug.h> +#include <private/misc.h> + +#include <float.h> +#include <math.h> + +/****************************************************** + * Global init, prepare, destroy, dup + */ + +/* called during topology init() */ +void hwloc_internal_distances_init(struct hwloc_topology *topology) +{ + topology->first_dist = topology->last_dist = NULL; + topology->next_dist_id = 0; +} + +/* called at the beginning of load() */ +void hwloc_internal_distances_prepare(struct hwloc_topology *topology) +{ + char *env; + hwloc_localeswitch_declare; + + topology->grouping = 1; + if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) + topology->grouping = 0; + env = getenv("HWLOC_GROUPING"); + if (env && !atoi(env)) + topology->grouping = 0; + + if (topology->grouping) { + topology->grouping_next_subkind = 0; + + HWLOC_BUILD_ASSERT(sizeof(topology->grouping_accuracies)/sizeof(*topology->grouping_accuracies) == 5); + topology->grouping_accuracies[0] = 0.0f; + topology->grouping_accuracies[1] = 0.01f; + topology->grouping_accuracies[2] = 0.02f; + topology->grouping_accuracies[3] = 0.05f; + topology->grouping_accuracies[4] = 0.1f; + topology->grouping_nbaccuracies = 5; + + hwloc_localeswitch_init(); + env = getenv("HWLOC_GROUPING_ACCURACY"); + if (!env) { + /* only use 0.0 */ + topology->grouping_nbaccuracies = 1; + } else if (strcmp(env, "try")) { + /* use the given value */ + topology->grouping_nbaccuracies = 1; + topology->grouping_accuracies[0] = (float) atof(env); + } /* otherwise try all values */ + hwloc_localeswitch_fini(); + + topology->grouping_verbose = 0; + env = getenv("HWLOC_GROUPING_VERBOSE"); + if (env) + topology->grouping_verbose = atoi(env); + } +} + +static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist) +{ + free(dist->indexes); + free(dist->objs); + free(dist->values); + free(dist); +} + +/* called during topology destroy */ +void hwloc_internal_distances_destroy(struct hwloc_topology * topology) +{ + struct hwloc_internal_distances_s *dist, *next = topology->first_dist; + while ((dist = next) != NULL) { + next = dist->next; + hwloc_internal_distances_free(dist); + } + topology->first_dist = topology->last_dist = NULL; +} + +static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct hwloc_internal_distances_s *olddist) +{ + struct hwloc_tma *tma = new->tma; + struct hwloc_internal_distances_s *newdist; + unsigned nbobjs = olddist->nbobjs; + + newdist = hwloc_tma_malloc(tma, sizeof(*newdist)); + if (!newdist) + return -1; + + newdist->type = olddist->type; + newdist->nbobjs = nbobjs; + newdist->kind = olddist->kind; + newdist->id = olddist->id; + + newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes)); + newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs)); + newdist->objs_are_valid = 0; + newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values)); + if (!newdist->indexes || !newdist->objs || !newdist->values) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + + memcpy(newdist->indexes, olddist->indexes, nbobjs * sizeof(*newdist->indexes)); + memcpy(newdist->values, olddist->values, nbobjs*nbobjs * sizeof(*newdist->values)); + + newdist->next = NULL; + newdist->prev = new->last_dist; + if (new->last_dist) + new->last_dist->next = newdist; + else + new->first_dist = newdist; + new->last_dist = newdist; + + return 0; +} + +/* This function may be called with topology->tma set, it cannot free() or realloc() */ +int hwloc_internal_distances_dup(struct hwloc_topology *new, struct hwloc_topology *old) +{ + struct hwloc_internal_distances_s *olddist; + int err; + new->next_dist_id = old->next_dist_id; + for(olddist = old->first_dist; olddist; olddist = olddist->next) { + err = hwloc_internal_distances_dup_one(new, olddist); + if (err < 0) + return err; + } + return 0; +} + +/****************************************************** + * Remove distances from the topology + */ + +int hwloc_distances_remove(hwloc_topology_t topology) +{ + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + hwloc_internal_distances_destroy(topology); + return 0; +} + +int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) +{ + struct hwloc_internal_distances_s *dist, *next; + hwloc_obj_type_t type; + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + /* switch back to types since we don't support groups for now */ + type = hwloc_get_depth_type(topology, depth); + if (type == (hwloc_obj_type_t)-1) { + errno = EINVAL; + return -1; + } + + next = topology->first_dist; + while ((dist = next) != NULL) { + next = dist->next; + if (dist->type == type) { + if (next) + next->prev = dist->prev; + else + topology->last_dist = dist->prev; + if (dist->prev) + dist->prev->next = dist->next; + else + topology->first_dist = dist->next; + hwloc_internal_distances_free(dist); + } + } + + return 0; +} + +/****************************************************** + * Add distances to the topology + */ + +static void +hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck); + +/* insert a distance matrix in the topology. + * the caller gives us the distances and objs pointers, we'll free them later. + */ +static int +hwloc_internal_distances__add(hwloc_topology_t topology, + hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, + unsigned long kind) +{ + struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist)); + if (!dist) + goto err; + + dist->type = type; + dist->nbobjs = nbobjs; + dist->kind = kind; + + if (!objs) { + assert(indexes); + /* we only have indexes, we'll refresh objs from there */ + dist->indexes = indexes; + dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); + if (!dist->objs) + goto err_with_dist; + dist->objs_are_valid = 0; + + } else { + unsigned i; + assert(!indexes); + /* we only have objs, generate the indexes arrays so that we can refresh objs later */ + dist->objs = objs; + dist->objs_are_valid = 1; + dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); + if (!dist->indexes) + goto err_with_dist; + if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) { + for(i=0; i<nbobjs; i++) + dist->indexes[i] = objs[i]->os_index; + } else { + for(i=0; i<nbobjs; i++) + dist->indexes[i] = objs[i]->gp_index; + } + } + + dist->values = values; + + dist->id = topology->next_dist_id++; + + if (topology->last_dist) + topology->last_dist->next = dist; + else + topology->first_dist = dist; + dist->prev = topology->last_dist; + dist->next = NULL; + topology->last_dist = dist; + return 0; + + err_with_dist: + free(dist); + err: + free(objs); + free(indexes); + free(values); + return -1; +} + +int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, + hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, + unsigned long kind, unsigned long flags) +{ + if (nbobjs < 2) { + errno = EINVAL; + goto err; + } + + /* cannot group without objects, + * and we don't group from XML anyway since the hwloc that generated the XML should have grouped already. + */ + if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) { + errno = EINVAL; + goto err; + } + + return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind); + + err: + free(indexes); + free(values); + return -1; +} + +int hwloc_internal_distances_add(hwloc_topology_t topology, + unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, + unsigned long kind, unsigned long flags) +{ + if (nbobjs < 2) { + errno = EINVAL; + goto err; + } + + if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) { + float full_accuracy = 0.f; + float *accuracies; + unsigned nbaccuracies; + + if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) { + accuracies = topology->grouping_accuracies; + nbaccuracies = topology->grouping_nbaccuracies; + } else { + accuracies = &full_accuracy; + nbaccuracies = 1; + } + + if (topology->grouping_verbose) { + unsigned i, j; + int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU); + fprintf(stderr, "Trying to group objects using distance matrix:\n"); + fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); + for(j=0; j<nbobjs; j++) + fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index)); + fprintf(stderr, "\n"); + for(i=0; i<nbobjs; i++) { + fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index)); + for(j=0; j<nbobjs; j++) + fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]); + fprintf(stderr, "\n"); + } + } + + hwloc__groups_by_distances(topology, nbobjs, objs, values, + kind, nbaccuracies, accuracies, 1 /* check the first matrice */); + } + + return hwloc_internal_distances__add(topology, objs[0]->type, nbobjs, objs, NULL, values, kind); + + err: + free(objs); + free(values); + return -1; +} + +#define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER) +#define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH) +#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL) +#define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) + +/* The actual function exported to the user + */ +int hwloc_distances_add(hwloc_topology_t topology, + unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, + unsigned long kind, unsigned long flags) +{ + hwloc_obj_type_t type; + unsigned i; + uint64_t *_values; + hwloc_obj_t *_objs; + int err; + + if (nbobjs < 2 || !objs || !values || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + if ((kind & ~HWLOC_DISTANCES_KIND_ALL) + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1 + || (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL)) { + errno = EINVAL; + return -1; + } + + /* no strict need to check for duplicates, things shouldn't break */ + + type = objs[0]->type; + if (type == HWLOC_OBJ_GROUP) { + /* not supported yet, would require we save the subkind together with the type. */ + errno = EINVAL; + return -1; + } + + for(i=1; i<nbobjs; i++) + if (!objs[i] || objs[i]->type != type) { + errno = EINVAL; + return -1; + } + + /* copy the input arrays and give them to the topology */ + _objs = malloc(nbobjs*sizeof(hwloc_obj_t)); + _values = malloc(nbobjs*nbobjs*sizeof(*_values)); + if (!_objs || !_values) + goto out_with_arrays; + + memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); + memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); + err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags); + if (err < 0) + goto out; /* _objs and _values freed in hwloc_internal_distances_add() */ + + /* in case we added some groups, see if we need to reconnect */ + hwloc_topology_reconnect(topology, 0); + + return 0; + + out_with_arrays: + free(_values); + free(_objs); + out: + return -1; +} + +/****************************************************** + * Refresh objects in distances + */ + +static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0); + while (obj) { + if (obj->gp_index == gp_index) + return obj; + obj = obj->next_cousin; + } + return NULL; +} + +static void +hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist, + hwloc_obj_t *objs, + unsigned disappeared) +{ + unsigned nbobjs = dist->nbobjs; + unsigned i, newi; + unsigned j, newj; + + for(i=0, newi=0; i<nbobjs; i++) + if (objs[i]) { + for(j=0, newj=0; j<nbobjs; j++) + if (objs[j]) { + dist->values[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j]; + newj++; + } + newi++; + } + + for(i=0, newi=0; i<nbobjs; i++) + if (objs[i]) { + objs[newi] = objs[i]; + dist->indexes[newi] = dist->indexes[i]; + newi++; + } + + dist->nbobjs -= disappeared; +} + +static int +hwloc_internal_distances_refresh_one(hwloc_topology_t topology, + struct hwloc_internal_distances_s *dist) +{ + hwloc_obj_type_t type = dist->type; + unsigned nbobjs = dist->nbobjs; + hwloc_obj_t *objs = dist->objs; + uint64_t *indexes = dist->indexes; + unsigned disappeared = 0; + unsigned i; + + if (dist->objs_are_valid) + return 0; + + for(i=0; i<nbobjs; i++) { + hwloc_obj_t obj; + /* TODO use cpuset/nodeset to find pus/numas from the root? + * faster than traversing the entire level? + */ + if (type == HWLOC_OBJ_PU) + obj = hwloc_get_pu_obj_by_os_index(topology, (unsigned) indexes[i]); + else if (type == HWLOC_OBJ_NUMANODE) + obj = hwloc_get_numanode_obj_by_os_index(topology, (unsigned) indexes[i]); + else + obj = hwloc_find_obj_by_type_and_gp_index(topology, type, indexes[i]); + objs[i] = obj; + if (!obj) + disappeared++; + } + + if (nbobjs-disappeared < 2) + /* became useless, drop */ + return -1; + + if (disappeared) + hwloc_internal_distances_restrict(dist, objs, disappeared); + + dist->objs_are_valid = 1; + return 0; +} + +/* This function may be called with topology->tma set, it cannot free() or realloc() */ +void +hwloc_internal_distances_refresh(hwloc_topology_t topology) +{ + struct hwloc_internal_distances_s *dist, *next; + + for(dist = topology->first_dist; dist; dist = next) { + next = dist->next; + + if (hwloc_internal_distances_refresh_one(topology, dist) < 0) { + assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */ + if (dist->prev) + dist->prev->next = next; + else + topology->first_dist = next; + if (next) + next->prev = dist->prev; + else + topology->last_dist = dist->prev; + hwloc_internal_distances_free(dist); + continue; + } + } +} + +void +hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology) +{ + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) + dist->objs_are_valid = 0; +} + +/****************************************************** + * User API for getting distances + */ + +void +hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused, + struct hwloc_distances_s *distances) +{ + free(distances->values); + free(distances->objs); + free(distances); +} + +static struct hwloc_distances_s * +hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, + struct hwloc_internal_distances_s *dist) +{ + struct hwloc_distances_s *distances; + unsigned nbobjs; + + distances = malloc(sizeof(*distances)); + if (!distances) + return NULL; + + nbobjs = distances->nbobjs = dist->nbobjs; + + distances->objs = malloc(nbobjs * sizeof(hwloc_obj_t)); + if (!distances->objs) + goto out; + memcpy(distances->objs, dist->objs, nbobjs * sizeof(hwloc_obj_t)); + + distances->values = malloc(nbobjs * nbobjs * sizeof(*distances->values)); + if (!distances->values) + goto out_with_objs; + memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values)); + + distances->kind = dist->kind; + return distances; + + out_with_objs: + free(distances->objs); + out: + free(distances); + return NULL; +} + +static int +hwloc__distances_get(hwloc_topology_t topology, + hwloc_obj_type_t type, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags __hwloc_attribute_unused) +{ + struct hwloc_internal_distances_s *dist; + unsigned nr = 0, i; + + /* We could return the internal arrays (as const), + * but it would require to prevent removing distances between get() and free(). + * Not performance critical anyway. + */ + + if (flags) { + errno = EINVAL; + return -1; + } + + /* we could refresh only the distances that match, but we won't have many distances anyway, + * so performance is totally negligible. + * + * This is also useful in multithreaded apps that modify the topology. + * They can call any valid hwloc_distances_get() to force a refresh after + * changing the topology, so that future concurrent get() won't cause + * concurrent refresh(). + */ + hwloc_internal_distances_refresh(topology); + + for(dist = topology->first_dist; dist; dist = dist->next) { + unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL; + unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL; + + if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type) + continue; + + if (kind_from && !(kind_from & dist->kind)) + continue; + if (kind_means && !(kind_means & dist->kind)) + continue; + + if (nr < *nrp) { + struct hwloc_distances_s *distances = hwloc_distances_get_one(topology, dist); + if (!distances) + goto error; + distancesp[nr] = distances; + } + nr++; + } + + for(i=nr; i<*nrp; i++) + distancesp[i] = NULL; + *nrp = nr; + return 0; + + error: + for(i=0; i<nr; i++) + hwloc_distances_release(topology, distancesp[i]); + return -1; +} + +int +hwloc_distances_get(hwloc_topology_t topology, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); +} + +int +hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags) +{ + hwloc_obj_type_t type; + + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + /* switch back to types since we don't support groups for now */ + type = hwloc_get_depth_type(topology, depth); + if (type == (hwloc_obj_type_t)-1) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags); +} + +/****************************************************** + * Grouping objects according to distances + */ + +static void hwloc_report_user_distance_error(const char *msg, int line) +{ + static int reported = 0; + + if (!reported && !hwloc_hide_errors()) { + fprintf(stderr, "****************************************************************************\n"); + fprintf(stderr, "* hwloc %s was given invalid distances by the user.\n", HWLOC_VERSION); + fprintf(stderr, "*\n"); + fprintf(stderr, "* %s\n", msg); + fprintf(stderr, "* Error occurred in topology.c line %d\n", line); + fprintf(stderr, "*\n"); + fprintf(stderr, "* Please make sure that distances given through the programming API\n"); + fprintf(stderr, "* do not contradict any other topology information.\n"); + fprintf(stderr, "* \n"); + fprintf(stderr, "* hwloc will now ignore this invalid topology information and continue.\n"); + fprintf(stderr, "****************************************************************************\n"); + reported = 1; + } +} + +static int hwloc_compare_values(uint64_t a, uint64_t b, float accuracy) +{ + if (accuracy != 0.0f && fabsf((float)a-(float)b) < (float)a * accuracy) + return 0; + return a < b ? -1 : a == b ? 0 : 1; +} + +/* + * Place objects in groups if they are in a transitive graph of minimal values. + * Return how many groups were created, or 0 if some incomplete distance graphs were found. + */ +static unsigned +hwloc__find_groups_by_min_distance(unsigned nbobjs, + uint64_t *_values, + float accuracy, + unsigned *groupids, + int verbose) +{ + uint64_t min_distance = UINT64_MAX; + unsigned groupid = 1; + unsigned i,j,k; + unsigned skipped = 0; + +#define VALUE(i, j) _values[(i) * nbobjs + (j)] + + memset(groupids, 0, nbobjs*sizeof(*groupids)); + + /* find the minimal distance */ + for(i=0; i<nbobjs; i++) + for(j=0; j<nbobjs; j++) /* check the entire matrix, it may not be perfectly symmetric depending on the accuracy */ + if (i != j && VALUE(i, j) < min_distance) /* no accuracy here, we want the real minimal */ + min_distance = VALUE(i, j); + hwloc_debug(" found minimal distance %llu between objects\n", (unsigned long long) min_distance); + + if (min_distance == UINT64_MAX) + return 0; + + /* build groups of objects connected with this distance */ + for(i=0; i<nbobjs; i++) { + unsigned size; + unsigned firstfound; + + /* if already grouped, skip */ + if (groupids[i]) + continue; + + /* start a new group */ + groupids[i] = groupid; + size = 1; + firstfound = i; + + while (firstfound != (unsigned)-1) { + /* we added new objects to the group, the first one was firstfound. + * rescan all connections from these new objects (starting at first found) to any other objects, + * so as to find new objects minimally-connected by transivity. + */ + unsigned newfirstfound = (unsigned)-1; + for(j=firstfound; j<nbobjs; j++) + if (groupids[j] == groupid) + for(k=0; k<nbobjs; k++) + if (!groupids[k] && !hwloc_compare_values(VALUE(j, k), min_distance, accuracy)) { + groupids[k] = groupid; + size++; + if (newfirstfound == (unsigned)-1) + newfirstfound = k; + if (i == j) + hwloc_debug(" object %u is minimally connected to %u\n", k, i); + else + hwloc_debug(" object %u is minimally connected to %u through %u\n", k, i, j); + } + firstfound = newfirstfound; + } + + if (size == 1) { + /* cancel this useless group, ignore this object and try from the next one */ + groupids[i] = 0; + skipped++; + continue; + } + + /* valid this group */ + groupid++; + if (verbose) + fprintf(stderr, " Found transitive graph with %u objects with minimal distance %llu accuracy %f\n", + size, (unsigned long long) min_distance, accuracy); + } + + if (groupid == 2 && !skipped) + /* we created a single group containing all objects, ignore it */ + return 0; + + /* return the last id, since it's also the number of used group ids */ + return groupid-1; +} + +/* check that the matrix is ok */ +static int +hwloc__check_grouping_matrix(unsigned nbobjs, uint64_t *_values, float accuracy, int verbose) +{ + unsigned i,j; + for(i=0; i<nbobjs; i++) { + for(j=i+1; j<nbobjs; j++) { + /* should be symmetric */ + if (hwloc_compare_values(VALUE(i, j), VALUE(j, i), accuracy)) { + if (verbose) + fprintf(stderr, " Distance matrix asymmetric ([%u,%u]=%llu != [%u,%u]=%llu), aborting\n", + i, j, (unsigned long long) VALUE(i, j), j, i, (unsigned long long) VALUE(j, i)); + return -1; + } + /* diagonal is smaller than everything else */ + if (hwloc_compare_values(VALUE(i, j), VALUE(i, i), accuracy) <= 0) { + if (verbose) + fprintf(stderr, " Distance to self not strictly minimal ([%u,%u]=%llu <= [%u,%u]=%llu), aborting\n", + i, j, (unsigned long long) VALUE(i, j), i, i, (unsigned long long) VALUE(i, i)); + return -1; + } + } + } + return 0; +} + +/* + * Look at object physical distances to group them. + */ +static void +hwloc__groups_by_distances(struct hwloc_topology *topology, + unsigned nbobjs, + struct hwloc_obj **objs, + uint64_t *_values, + unsigned long kind, + unsigned nbaccuracies, + float *accuracies, + int needcheck) +{ + HWLOC_VLA(unsigned, groupids, nbobjs); + unsigned nbgroups = 0; + unsigned i,j; + int verbose = topology->grouping_verbose; + + if (nbobjs <= 2) + return; + + if (!(kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) + /* don't know use to use those for grouping */ + /* TODO hwloc__find_groups_by_max_distance() for bandwidth */ + return; + + for(i=0; i<nbaccuracies; i++) { + if (verbose) + fprintf(stderr, "Trying to group %u %s objects according to physical distances with accuracy %f\n", + nbobjs, hwloc_obj_type_string(objs[0]->type), accuracies[i]); + if (needcheck && hwloc__check_grouping_matrix(nbobjs, _values, accuracies[i], verbose) < 0) + continue; + nbgroups = hwloc__find_groups_by_min_distance(nbobjs, _values, accuracies[i], groupids, verbose); + if (nbgroups) + break; + } + if (!nbgroups) + return; + + { + HWLOC_VLA(hwloc_obj_t, groupobjs, nbgroups); + HWLOC_VLA(unsigned, groupsizes, nbgroups); + HWLOC_VLA(uint64_t, groupvalues, nbgroups*nbgroups); + unsigned failed = 0; + + /* create new Group objects and record their size */ + memset(&(groupsizes[0]), 0, sizeof(groupsizes[0]) * nbgroups); + for(i=0; i<nbgroups; i++) { + /* create the Group object */ + hwloc_obj_t group_obj, res_obj; + group_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + group_obj->cpuset = hwloc_bitmap_alloc(); + group_obj->attr->group.kind = HWLOC_GROUP_KIND_DISTANCE; + group_obj->attr->group.subkind = topology->grouping_next_subkind; + for (j=0; j<nbobjs; j++) + if (groupids[j] == i+1) { + /* assemble the group sets */ + hwloc_obj_add_other_obj_sets(group_obj, objs[j]); + groupsizes[i]++; + } + hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n", + groupsizes[i], group_obj->cpuset); + res_obj = hwloc__insert_object_by_cpuset(topology, NULL, group_obj, + (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error); + /* res_obj may be NULL on failure to insert. */ + if (!res_obj) + failed++; + /* or it may be different from groupobjs if we got groups from XML import before grouping */ + groupobjs[i] = res_obj; + } + topology->grouping_next_subkind++; + + if (failed) + /* don't try to group above if we got a NULL group here, just keep this incomplete level */ + return; + + /* factorize values */ + memset(&(groupvalues[0]), 0, sizeof(groupvalues[0]) * nbgroups * nbgroups); +#undef VALUE +#define VALUE(i, j) _values[(i) * nbobjs + (j)] +#define GROUP_VALUE(i, j) groupvalues[(i) * nbgroups + (j)] + for(i=0; i<nbobjs; i++) + if (groupids[i]) + for(j=0; j<nbobjs; j++) + if (groupids[j]) + GROUP_VALUE(groupids[i]-1, groupids[j]-1) += VALUE(i, j); + for(i=0; i<nbgroups; i++) + for(j=0; j<nbgroups; j++) { + unsigned groupsize = groupsizes[i]*groupsizes[j]; + GROUP_VALUE(i, j) /= groupsize; + } +#ifdef HWLOC_DEBUG + hwloc_debug("%s", "generated new distance matrix between groups:\n"); + hwloc_debug("%s", " index"); + for(j=0; j<nbgroups; j++) + hwloc_debug(" % 5d", (int) j); /* print index because os_index is -1 for Groups */ + hwloc_debug("%s", "\n"); + for(i=0; i<nbgroups; i++) { + hwloc_debug(" % 5d", (int) i); + for(j=0; j<nbgroups; j++) + hwloc_debug(" %llu", (unsigned long long) GROUP_VALUE(i, j)); + hwloc_debug("%s", "\n"); + } +#endif + + hwloc__groups_by_distances(topology, nbgroups, groupobjs, groupvalues, kind, nbaccuracies, accuracies, 0 /* no need to check generated matrix */); + } +} diff --git a/src/3rdparty/hwloc/src/misc.c b/src/3rdparty/hwloc/src/misc.c new file mode 100644 index 000000000..16dacf623 --- /dev/null +++ b/src/3rdparty/hwloc/src/misc.c @@ -0,0 +1,166 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <private/private.h> +#include <private/misc.h> + +#include <stdarg.h> +#ifdef HAVE_SYS_UTSNAME_H +#include <sys/utsname.h> +#endif +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <ctype.h> + +#ifdef HAVE_PROGRAM_INVOCATION_NAME +#include <errno.h> +extern char *program_invocation_name; +#endif +#ifdef HAVE___PROGNAME +extern char *__progname; +#endif + +int hwloc_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + static char bin; + size_t fakesize; + char *fakestr; + + /* Some systems crash on str == NULL */ + if (!size) { + str = &bin; + size = 1; + } + + va_start(ap, format); + ret = vsnprintf(str, size, format, ap); + va_end(ap); + + if (ret >= 0 && (size_t) ret != size-1) + return ret; + + /* vsnprintf returned size-1 or -1. That could be a system which reports the + * written data and not the actually required room. Try increasing buffer + * size to get the latter. */ + + fakesize = size; + fakestr = NULL; + do { + fakesize *= 2; + free(fakestr); + fakestr = malloc(fakesize); + if (NULL == fakestr) + return -1; + va_start(ap, format); + errno = 0; + ret = vsnprintf(fakestr, fakesize, format, ap); + va_end(ap); + } while ((size_t) ret == fakesize-1 || (ret < 0 && (!errno || errno == ERANGE))); + + if (ret >= 0 && size) { + if (size > (size_t) ret+1) + size = ret+1; + memcpy(str, fakestr, size-1); + str[size-1] = 0; + } + free(fakestr); + + return ret; +} + +int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n) +{ + size_t i = 0; + while (*haystack && *haystack != ':') { + int ha = *haystack++; + int low_h = tolower(ha); + int ne = *needle++; + int low_n = tolower(ne); + if (low_h != low_n) + return 1; + i++; + } + return i < n; +} + +void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused, + void *cached_uname __hwloc_attribute_unused) +{ +#ifdef HAVE_UNAME + struct utsname _utsname, *utsname; + + if (hwloc_obj_get_info_by_name(topology->levels[0][0], "OSName")) + /* don't annotate twice */ + return; + + if (cached_uname) + utsname = (struct utsname *) cached_uname; + else { + utsname = &_utsname; + if (uname(utsname) < 0) + return; + } + + if (*utsname->sysname) + hwloc_obj_add_info(topology->levels[0][0], "OSName", utsname->sysname); + if (*utsname->release) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", utsname->release); + if (*utsname->version) + hwloc_obj_add_info(topology->levels[0][0], "OSVersion", utsname->version); + if (*utsname->nodename) + hwloc_obj_add_info(topology->levels[0][0], "HostName", utsname->nodename); + if (*utsname->machine) + hwloc_obj_add_info(topology->levels[0][0], "Architecture", utsname->machine); +#endif /* HAVE_UNAME */ +} + +char * +hwloc_progname(struct hwloc_topology *topology __hwloc_attribute_unused) +{ +#if HAVE_DECL_GETMODULEFILENAME + char name[256], *local_basename; + unsigned res = GetModuleFileName(NULL, name, sizeof(name)); + if (res == sizeof(name) || !res) + return NULL; + local_basename = strrchr(name, '\\'); + if (!local_basename) + local_basename = name; + else + local_basename++; + return strdup(local_basename); +#else /* !HAVE_GETMODULEFILENAME */ + const char *name, *local_basename; +#if HAVE_DECL_GETPROGNAME + name = getprogname(); /* FreeBSD, NetBSD, some Solaris */ +#elif HAVE_DECL_GETEXECNAME + name = getexecname(); /* Solaris */ +#elif defined HAVE_PROGRAM_INVOCATION_NAME + name = program_invocation_name; /* Glibc. BGQ CNK. */ + /* could use program_invocation_short_name directly, but we have the code to remove the path below anyway */ +#elif defined HAVE___PROGNAME + name = __progname; /* fallback for most unix, used for OpenBSD */ +#else + /* TODO: _NSGetExecutablePath(path, &size) on Darwin */ + /* TODO: AIX, HPUX */ + name = NULL; +#endif + if (!name) + return NULL; + local_basename = strrchr(name, '/'); + if (!local_basename) + local_basename = name; + else + local_basename++; + return strdup(local_basename); +#endif /* !HAVE_GETMODULEFILENAME */ +} diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c new file mode 100644 index 000000000..00f08a9e7 --- /dev/null +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -0,0 +1,941 @@ +/* + * Copyright © 2009-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <hwloc/plugins.h> +#include <private/private.h> +#include <private/debug.h> +#include <private/misc.h> + +#include <fcntl.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <sys/stat.h> + +#if defined(HWLOC_WIN_SYS) && !defined(__CYGWIN__) +#include <io.h> +#define open _open +#define read _read +#define close _close +#endif + +static void +hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology, + const char *string /* must contain a ' ' */, + unsigned *allocated) +{ + unsigned nr = topology->pci_forced_locality_nr; + unsigned domain, bus_first, bus_last, dummy; + hwloc_bitmap_t set; + char *tmp; + + if (sscanf(string, "%x:%x-%x %x", &domain, &bus_first, &bus_last, &dummy) == 4) { + /* fine */ + } else if (sscanf(string, "%x:%x %x", &domain, &bus_first, &dummy) == 3) { + bus_last = bus_first; + } else if (sscanf(string, "%x %x", &domain, &dummy) == 2) { + bus_first = 0; + bus_last = 255; + } else + return; + + tmp = strchr(string, ' '); + if (!tmp) + return; + tmp++; + + set = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(set, tmp); + + if (!*allocated) { + topology->pci_forced_locality = malloc(sizeof(*topology->pci_forced_locality)); + if (!topology->pci_forced_locality) + goto out_with_set; /* failed to allocate, ignore this forced locality */ + *allocated = 1; + } else if (nr >= *allocated) { + struct hwloc_pci_forced_locality_s *tmplocs; + tmplocs = realloc(topology->pci_forced_locality, + 2 * *allocated * sizeof(*topology->pci_forced_locality)); + if (!tmplocs) + goto out_with_set; /* failed to allocate, ignore this forced locality */ + topology->pci_forced_locality = tmplocs; + *allocated *= 2; + } + + topology->pci_forced_locality[nr].domain = domain; + topology->pci_forced_locality[nr].bus_first = bus_first; + topology->pci_forced_locality[nr].bus_last = bus_last; + topology->pci_forced_locality[nr].cpuset = set; + topology->pci_forced_locality_nr++; + return; + + out_with_set: + hwloc_bitmap_free(set); + return; +} + +static void +hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_env) +{ + char *env = strdup(_env); + unsigned allocated = 0; + char *tmp = env; + + while (1) { + size_t len = strcspn(tmp, ";\r\n"); + char *next = NULL; + + if (tmp[len] != '\0') { + tmp[len] = '\0'; + if (tmp[len+1] != '\0') + next = &tmp[len]+1; + } + + hwloc_pci_forced_locality_parse_one(topology, tmp, &allocated); + + if (next) + tmp = next; + else + break; + } + + free(env); +} + +void +hwloc_pci_discovery_init(struct hwloc_topology *topology) +{ + topology->need_pci_belowroot_apply_locality = 0; + + topology->pci_has_forced_locality = 0; + topology->pci_forced_locality_nr = 0; + topology->pci_forced_locality = NULL; +} + +void +hwloc_pci_discovery_prepare(struct hwloc_topology *topology) +{ + char *env; + + env = getenv("HWLOC_PCI_LOCALITY"); + if (env) { + int fd; + + topology->pci_has_forced_locality = 1; + + fd = open(env, O_RDONLY); + if (fd >= 0) { + struct stat st; + char *buffer; + int err = fstat(fd, &st); + if (!err) { + if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */ + buffer = malloc(st.st_size+1); + if (read(fd, buffer, st.st_size) == st.st_size) { + buffer[st.st_size] = '\0'; + hwloc_pci_forced_locality_parse(topology, buffer); + } + free(buffer); + } else { + fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n", + env, (unsigned long) st.st_size); + } + } + close(fd); + } else + hwloc_pci_forced_locality_parse(topology, env); + } +} + +void +hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused) +{ + unsigned i; + for(i=0; i<topology->pci_forced_locality_nr; i++) + hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset); + free(topology->pci_forced_locality); + + hwloc_pci_discovery_init(topology); +} + +#ifdef HWLOC_DEBUG +static void +hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, + struct hwloc_obj *pcidev) +{ + char busid[14]; + hwloc_obj_t parent; + + /* indent */ + parent = pcidev->parent; + while (parent) { + hwloc_debug("%s", " "); + parent = parent->parent; + } + + snprintf(busid, sizeof(busid), "%04x:%02x:%02x.%01x", + pcidev->attr->pcidev.domain, pcidev->attr->pcidev.bus, pcidev->attr->pcidev.dev, pcidev->attr->pcidev.func); + + if (pcidev->type == HWLOC_OBJ_BRIDGE) { + if (pcidev->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) + hwloc_debug("HostBridge"); + else + hwloc_debug("%s Bridge [%04x:%04x]", busid, + pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id); + hwloc_debug(" to %04x:[%02x:%02x]\n", + pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus); + } else + hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid, + pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id, + pcidev->attr->pcidev.subvendor_id, pcidev->attr->pcidev.subdevice_id, + pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id); +} + +static void +hwloc_pci_traverse(void * cbdata, struct hwloc_obj *tree, + void (*cb)(void * cbdata, struct hwloc_obj *)) +{ + hwloc_obj_t child; + cb(cbdata, tree); + for_each_io_child(child, tree) { + if (child->type == HWLOC_OBJ_BRIDGE) + hwloc_pci_traverse(cbdata, child, cb); + } +} +#endif /* HWLOC_DEBUG */ + +enum hwloc_pci_busid_comparison_e { + HWLOC_PCI_BUSID_LOWER, + HWLOC_PCI_BUSID_HIGHER, + HWLOC_PCI_BUSID_INCLUDED, + HWLOC_PCI_BUSID_SUPERSET +}; + +static enum hwloc_pci_busid_comparison_e +hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b) +{ +#ifdef HWLOC_DEBUG + if (a->type == HWLOC_OBJ_BRIDGE) + assert(a->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); + if (b->type == HWLOC_OBJ_BRIDGE) + assert(b->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); +#endif + + if (a->attr->pcidev.domain < b->attr->pcidev.domain) + return HWLOC_PCI_BUSID_LOWER; + if (a->attr->pcidev.domain > b->attr->pcidev.domain) + return HWLOC_PCI_BUSID_HIGHER; + + if (a->type == HWLOC_OBJ_BRIDGE + && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus + && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus) + return HWLOC_PCI_BUSID_SUPERSET; + if (b->type == HWLOC_OBJ_BRIDGE + && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus + && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus) + return HWLOC_PCI_BUSID_INCLUDED; + + if (a->attr->pcidev.bus < b->attr->pcidev.bus) + return HWLOC_PCI_BUSID_LOWER; + if (a->attr->pcidev.bus > b->attr->pcidev.bus) + return HWLOC_PCI_BUSID_HIGHER; + + if (a->attr->pcidev.dev < b->attr->pcidev.dev) + return HWLOC_PCI_BUSID_LOWER; + if (a->attr->pcidev.dev > b->attr->pcidev.dev) + return HWLOC_PCI_BUSID_HIGHER; + + if (a->attr->pcidev.func < b->attr->pcidev.func) + return HWLOC_PCI_BUSID_LOWER; + if (a->attr->pcidev.func > b->attr->pcidev.func) + return HWLOC_PCI_BUSID_HIGHER; + + /* Should never reach here. Abort on both debug builds and + non-debug builds */ + assert(0); + fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__); + exit(1); +} + +static void +hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_first_child_p, struct hwloc_obj *new) +{ + struct hwloc_obj **curp, **childp; + + curp = parent_io_first_child_p; + while (*curp) { + enum hwloc_pci_busid_comparison_e comp = hwloc_pci_compare_busids(new, *curp); + switch (comp) { + case HWLOC_PCI_BUSID_HIGHER: + /* go further */ + curp = &(*curp)->next_sibling; + continue; + case HWLOC_PCI_BUSID_INCLUDED: + /* insert new below current bridge */ + hwloc_pci_add_object(*curp, &(*curp)->io_first_child, new); + return; + case HWLOC_PCI_BUSID_LOWER: + case HWLOC_PCI_BUSID_SUPERSET: { + /* insert new before current */ + new->next_sibling = *curp; + *curp = new; + new->parent = parent; + if (new->type == HWLOC_OBJ_BRIDGE) { + /* look at remaining siblings and move some below new */ + childp = &new->io_first_child; + curp = &new->next_sibling; + while (*curp) { + hwloc_obj_t cur = *curp; + if (hwloc_pci_compare_busids(new, cur) == HWLOC_PCI_BUSID_LOWER) { + /* this sibling remains under root, after new. */ + if (cur->attr->pcidev.domain > new->attr->pcidev.domain + || cur->attr->pcidev.bus > new->attr->bridge.downstream.pci.subordinate_bus) + /* this sibling is even above new's subordinate bus, no other sibling could go below new */ + return; + curp = &cur->next_sibling; + } else { + /* this sibling goes under new */ + *childp = cur; + *curp = cur->next_sibling; + (*childp)->parent = new; + (*childp)->next_sibling = NULL; + childp = &(*childp)->next_sibling; + } + } + } + return; + } + } + } + /* add to the end of the list if higher than everybody */ + new->parent = parent; + new->next_sibling = NULL; + *curp = new; +} + +void +hwloc_pcidisc_tree_insert_by_busid(struct hwloc_obj **treep, + struct hwloc_obj *obj) +{ + hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj); +} + +int +hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *old_tree) +{ + struct hwloc_obj **next_hb_p; + enum hwloc_type_filter_e bfilter; + + if (!old_tree) + /* found nothing, exit */ + return 0; + +#ifdef HWLOC_DEBUG + hwloc_debug("%s", "\nPCI hierarchy:\n"); + hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb); + hwloc_debug("%s", "\n"); +#endif + + next_hb_p = &hwloc_get_root_obj(topology)->io_first_child; + while (*next_hb_p) + next_hb_p = &((*next_hb_p)->next_sibling); + + bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE]; + if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) { + *next_hb_p = old_tree; + topology->modified = 1; + goto done; + } + + /* + * tree points to all objects connected to any upstream bus in the machine. + * We now create one real hostbridge object per upstream bus. + * It's not actually a PCI device so we have to create it. + */ + while (old_tree) { + /* start a new host bridge */ + struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, HWLOC_UNKNOWN_INDEX); + struct hwloc_obj **dstnextp = &hostbridge->io_first_child; + struct hwloc_obj **srcnextp = &old_tree; + struct hwloc_obj *child = *srcnextp; + unsigned short current_domain = child->attr->pcidev.domain; + unsigned char current_bus = child->attr->pcidev.bus; + unsigned char current_subordinate = current_bus; + + hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus); + + next_child: + /* remove next child from tree */ + *srcnextp = child->next_sibling; + /* append it to hostbridge */ + *dstnextp = child; + child->parent = hostbridge; + child->next_sibling = NULL; + dstnextp = &child->next_sibling; + + /* compute hostbridge secondary/subordinate buses */ + if (child->type == HWLOC_OBJ_BRIDGE + && child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate) + current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus; + + /* use next child if it has the same domains/bus */ + child = *srcnextp; + if (child + && child->attr->pcidev.domain == current_domain + && child->attr->pcidev.bus == current_bus) + goto next_child; + + /* finish setting up this hostbridge */ + hostbridge->attr->bridge.upstream_type = HWLOC_OBJ_BRIDGE_HOST; + hostbridge->attr->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; + hostbridge->attr->bridge.downstream.pci.domain = current_domain; + hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus; + hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate; + hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n", + current_domain, current_bus, current_subordinate); + + *next_hb_p = hostbridge; + next_hb_p = &hostbridge->next_sibling; + topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality() + * or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root. + */ + } + + done: + topology->need_pci_belowroot_apply_locality = 1; + return 0; +} + +static struct hwloc_obj * +hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused, + struct hwloc_pcidev_attr_s *busid, + struct hwloc_obj *parent) +{ + /* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package. + * but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package. + */ + if (parent->depth >= 2 + && parent->type == HWLOC_OBJ_NUMANODE + && parent->sibling_rank == 1 && parent->parent->arity == 2 + && parent->parent->type == HWLOC_OBJ_PACKAGE + && parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) { + const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel"); + if (cpumodel && strstr(cpumodel, "Xeon")) { + if (!hwloc_hide_errors()) { + fprintf(stderr, "****************************************************************************\n"); + fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION); + fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n", + busid->domain, busid->bus); + fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n"); + fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n"); + fprintf(stderr, "*\n"); + fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n"); + fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n", + busid->domain, busid->bus); + fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n"); + fprintf(stderr, "*\n"); + fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n"); + fprintf(stderr, "****************************************************************************\n"); + } + return parent->parent->next_sibling->first_child; + } + } + + return parent; +} + +static struct hwloc_obj * +hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcidev_attr_s *busid) +{ + hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); + hwloc_obj_t parent; + int forced = 0; + int noquirks = 0; + unsigned i; + int err; + + /* try to match a forced locality */ + if (topology->pci_has_forced_locality) { + for(i=0; i<topology->pci_forced_locality_nr; i++) { + if (busid->domain == topology->pci_forced_locality[i].domain + && busid->bus >= topology->pci_forced_locality[i].bus_first + && busid->bus <= topology->pci_forced_locality[i].bus_last) { + hwloc_bitmap_copy(cpuset, topology->pci_forced_locality[i].cpuset); + forced = 1; + break; + } + } + /* if pci locality was forced, even empty, don't let quirks change what the OS reports */ + noquirks = 1; + } + + /* deprecated force locality variables */ + if (!forced) { + const char *env; + char envname[256]; + /* override the cpuset with the environment if given */ + snprintf(envname, sizeof(envname), "HWLOC_PCI_%04x_%02x_LOCALCPUS", + busid->domain, busid->bus); + env = getenv(envname); + if (env) { + static int reported = 0; + if (!topology->pci_has_forced_locality && !reported) { + fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env); + reported = 1; + } + if (*env) { + /* force the cpuset */ + hwloc_debug("Overriding localcpus using %s in the environment\n", envname); + hwloc_bitmap_sscanf(cpuset, env); + forced = 1; + } + /* if env exists, even empty, don't let quirks change what the OS reports */ + noquirks = 1; + } + } + + if (!forced) { + /* get the cpuset by asking the OS backend. */ + struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; + if (backend) + err = backend->get_pci_busid_cpuset(backend, busid, cpuset); + else + err = -1; + if (err < 0) + /* if we got nothing, assume this PCI bus is attached to the top of hierarchy */ + hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology)); + } + + hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset); + + parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); + if (parent) { + if (!noquirks) + /* We found a valid parent. Check that the OS didn't report invalid locality */ + parent = hwloc_pci_fixup_busid_parent(topology, busid, parent); + } else { + /* Fallback to root */ + parent = hwloc_get_root_obj(topology); + } + + hwloc_bitmap_free(cpuset); + return parent; +} + +struct hwloc_obj * +hwloc_pcidisc_find_busid_parent(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) +{ + struct hwloc_pcidev_attr_s busid; + busid.domain = domain; + busid.bus = bus; + busid.dev = dev; + busid.func = func; + return hwloc__pci_find_busid_parent(topology, &busid); +} + +int +hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology) +{ + struct hwloc_obj *root = hwloc_get_root_obj(topology); + struct hwloc_obj **listp, *obj; + + if (!topology->need_pci_belowroot_apply_locality) + return 0; + topology->need_pci_belowroot_apply_locality = 0; + + /* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things. + * insert the PCI trees according to their PCI-locality. + */ + listp = &root->io_first_child; + while ((obj = *listp) != NULL) { + struct hwloc_pcidev_attr_s *busid; + struct hwloc_obj *parent; + + /* skip non-PCI objects */ + if (obj->type != HWLOC_OBJ_PCI_DEVICE + && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) + && !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) { + listp = &obj->next_sibling; + continue; + } + + if (obj->type == HWLOC_OBJ_PCI_DEVICE + || (obj->type == HWLOC_OBJ_BRIDGE + && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) + busid = &obj->attr->pcidev; + else { + /* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */ + hwloc_obj_t child = obj->io_first_child; + if (child && (child->type == HWLOC_OBJ_PCI_DEVICE + || (child->type == HWLOC_OBJ_BRIDGE + && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))) + busid = &obj->io_first_child->attr->pcidev; + else + continue; + } + + /* attach the object (and children) where it belongs */ + parent = hwloc__pci_find_busid_parent(topology, busid); + if (parent == root) { + /* keep this object here */ + listp = &obj->next_sibling; + } else { + /* dequeue this object */ + *listp = obj->next_sibling; + obj->next_sibling = NULL; + hwloc_insert_object_by_parent(topology, parent, obj); + } + } + + return 0; +} + +static struct hwloc_obj * +hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent, + unsigned domain, unsigned bus, unsigned dev, unsigned func) +{ + hwloc_obj_t child; + + for_each_io_child(child, parent) { + if (child->type == HWLOC_OBJ_PCI_DEVICE + || (child->type == HWLOC_OBJ_BRIDGE + && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) { + if (child->attr->pcidev.domain == domain + && child->attr->pcidev.bus == bus + && child->attr->pcidev.dev == dev + && child->attr->pcidev.func == func) + /* that's the right bus id */ + return child; + if (child->attr->pcidev.domain > domain + || (child->attr->pcidev.domain == domain + && child->attr->pcidev.bus > bus)) + /* bus id too high, won't find anything later, return parent */ + return parent; + if (child->type == HWLOC_OBJ_BRIDGE + && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI + && child->attr->bridge.downstream.pci.domain == domain + && child->attr->bridge.downstream.pci.secondary_bus <= bus + && child->attr->bridge.downstream.pci.subordinate_bus >= bus) + /* not the right bus id, but it's included in the bus below that bridge */ + return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + + } else if (child->type == HWLOC_OBJ_BRIDGE + && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI + && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI + /* non-PCI to PCI bridge, just look at the subordinate bus */ + && child->attr->bridge.downstream.pci.domain == domain + && child->attr->bridge.downstream.pci.secondary_bus <= bus + && child->attr->bridge.downstream.pci.subordinate_bus >= bus) { + /* contains our bus, recurse */ + return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func); + } + } + /* didn't find anything, return parent */ + return parent; +} + +struct hwloc_obj * +hwloc_pcidisc_find_by_busid(struct hwloc_topology *topology, + unsigned domain, unsigned bus, unsigned dev, unsigned func) +{ + hwloc_obj_t root = hwloc_get_root_obj(topology); + hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func); + if (parent == root) + return NULL; + else + return parent; +} + +#define HWLOC_PCI_STATUS 0x06 +#define HWLOC_PCI_STATUS_CAP_LIST 0x10 +#define HWLOC_PCI_CAPABILITY_LIST 0x34 +#define HWLOC_PCI_CAP_LIST_ID 0 +#define HWLOC_PCI_CAP_LIST_NEXT 1 + +unsigned +hwloc_pcidisc_find_cap(const unsigned char *config, unsigned cap) +{ + unsigned char seen[256] = { 0 }; + unsigned char ptr; /* unsigned char to make sure we stay within the 256-byte config space */ + + if (!(config[HWLOC_PCI_STATUS] & HWLOC_PCI_STATUS_CAP_LIST)) + return 0; + + for (ptr = config[HWLOC_PCI_CAPABILITY_LIST] & ~3; + ptr; /* exit if next is 0 */ + ptr = config[ptr + HWLOC_PCI_CAP_LIST_NEXT] & ~3) { + unsigned char id; + + /* Looped around! */ + if (seen[ptr]) + break; + seen[ptr] = 1; + + id = config[ptr + HWLOC_PCI_CAP_LIST_ID]; + if (id == cap) + return ptr; + if (id == 0xff) /* exit if id is 0 or 0xff */ + break; + } + return 0; +} + +#define HWLOC_PCI_EXP_LNKSTA 0x12 +#define HWLOC_PCI_EXP_LNKSTA_SPEED 0x000f +#define HWLOC_PCI_EXP_LNKSTA_WIDTH 0x03f0 + +int +hwloc_pcidisc_find_linkspeed(const unsigned char *config, + unsigned offset, float *linkspeed) +{ + unsigned linksta, speed, width; + float lanespeed; + + memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4); + speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */ + width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */ + /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane + * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane + * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane + * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane + */ + + /* lanespeed in Gbit/s */ + if (speed <= 2) + lanespeed = 2.5f * speed * 0.8f; + else + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ + + /* linkspeed in GB/s */ + *linkspeed = lanespeed * width / 8; + return 0; +} + +#define HWLOC_PCI_HEADER_TYPE 0x0e +#define HWLOC_PCI_HEADER_TYPE_BRIDGE 1 +#define HWLOC_PCI_CLASS_BRIDGE_PCI 0x0604 + +hwloc_obj_type_t +hwloc_pcidisc_check_bridge_type(unsigned device_class, const unsigned char *config) +{ + unsigned char headertype; + + if (device_class != HWLOC_PCI_CLASS_BRIDGE_PCI) + return HWLOC_OBJ_PCI_DEVICE; + + headertype = config[HWLOC_PCI_HEADER_TYPE] & 0x7f; + return (headertype == HWLOC_PCI_HEADER_TYPE_BRIDGE) + ? HWLOC_OBJ_BRIDGE : HWLOC_OBJ_PCI_DEVICE; +} + +#define HWLOC_PCI_PRIMARY_BUS 0x18 +#define HWLOC_PCI_SECONDARY_BUS 0x19 +#define HWLOC_PCI_SUBORDINATE_BUS 0x1a + +int +hwloc_pcidisc_setup_bridge_attr(hwloc_obj_t obj, + const unsigned char *config) +{ + struct hwloc_bridge_attr_s *battr = &obj->attr->bridge; + struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci; + + if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) { + /* Sometimes the config space contains 00 instead of the actual primary bus number. + * Always trust the bus ID because it was built by the system which has more information + * to workaround such problems (e.g. ACPI information about PCI parent/children). + */ + hwloc_debug(" %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n", + pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]); + } + + battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI; + battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI; + battr->downstream.pci.domain = pattr->domain; + battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS]; + battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS]; + + if (battr->downstream.pci.secondary_bus <= pattr->bus + || battr->downstream.pci.subordinate_bus <= pattr->bus + || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) { + /* This should catch most cases of invalid bridge information + * (e.g. 00 for secondary and subordinate). + * Ideally we would also check that [secondary-subordinate] is included + * in the parent bridge [secondary+1:subordinate]. But that's hard to do + * because objects may be discovered out of order (especially in the fsroot case). + */ + hwloc_debug(" %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n", + pattr->domain, pattr->bus, pattr->dev, pattr->func, + battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus); + hwloc_free_unlinked_object(obj); + return -1; + } + + return 0; +} + +const char * +hwloc_pci_class_string(unsigned short class_id) +{ + /* See https://pci-ids.ucw.cz/read/PD/ */ + switch ((class_id & 0xff00) >> 8) { + case 0x00: + switch (class_id) { + case 0x0001: return "VGA"; + } + break; + case 0x01: + switch (class_id) { + case 0x0100: return "SCSI"; + case 0x0101: return "IDE"; + case 0x0102: return "Floppy"; + case 0x0103: return "IPI"; + case 0x0104: return "RAID"; + case 0x0105: return "ATA"; + case 0x0106: return "SATA"; + case 0x0107: return "SAS"; + case 0x0108: return "NVMExp"; + } + return "Storage"; + case 0x02: + switch (class_id) { + case 0x0200: return "Ethernet"; + case 0x0201: return "TokenRing"; + case 0x0202: return "FDDI"; + case 0x0203: return "ATM"; + case 0x0204: return "ISDN"; + case 0x0205: return "WorldFip"; + case 0x0206: return "PICMG"; + case 0x0207: return "InfiniBand"; + case 0x0208: return "Fabric"; + } + return "Network"; + case 0x03: + switch (class_id) { + case 0x0300: return "VGA"; + case 0x0301: return "XGA"; + case 0x0302: return "3D"; + } + return "Display"; + case 0x04: + switch (class_id) { + case 0x0400: return "MultimediaVideo"; + case 0x0401: return "MultimediaAudio"; + case 0x0402: return "Telephony"; + case 0x0403: return "AudioDevice"; + } + return "Multimedia"; + case 0x05: + switch (class_id) { + case 0x0500: return "RAM"; + case 0x0501: return "Flash"; + } + return "Memory"; + case 0x06: + switch (class_id) { + case 0x0600: return "HostBridge"; + case 0x0601: return "ISABridge"; + case 0x0602: return "EISABridge"; + case 0x0603: return "MicroChannelBridge"; + case 0x0604: return "PCIBridge"; + case 0x0605: return "PCMCIABridge"; + case 0x0606: return "NubusBridge"; + case 0x0607: return "CardBusBridge"; + case 0x0608: return "RACEwayBridge"; + case 0x0609: return "SemiTransparentPCIBridge"; + case 0x060a: return "InfiniBandPCIHostBridge"; + } + return "Bridge"; + case 0x07: + switch (class_id) { + case 0x0700: return "Serial"; + case 0x0701: return "Parallel"; + case 0x0702: return "MultiportSerial"; + case 0x0703: return "Model"; + case 0x0704: return "GPIB"; + case 0x0705: return "SmartCard"; + } + return "Communication"; + case 0x08: + switch (class_id) { + case 0x0800: return "PIC"; + case 0x0801: return "DMA"; + case 0x0802: return "Timer"; + case 0x0803: return "RTC"; + case 0x0804: return "PCIHotPlug"; + case 0x0805: return "SDHost"; + case 0x0806: return "IOMMU"; + } + return "SystemPeripheral"; + case 0x09: + switch (class_id) { + case 0x0900: return "Keyboard"; + case 0x0901: return "DigitizerPen"; + case 0x0902: return "Mouse"; + case 0x0903: return "Scanern"; + case 0x0904: return "Gameport"; + } + return "Input"; + case 0x0a: + return "DockingStation"; + case 0x0b: + switch (class_id) { + case 0x0b00: return "386"; + case 0x0b01: return "486"; + case 0x0b02: return "Pentium"; +/* 0x0b03 and 0x0b04 might be Pentium and P6 ? */ + case 0x0b10: return "Alpha"; + case 0x0b20: return "PowerPC"; + case 0x0b30: return "MIPS"; + case 0x0b40: return "Co-Processor"; + } + return "Processor"; + case 0x0c: + switch (class_id) { + case 0x0c00: return "FireWire"; + case 0x0c01: return "ACCESS"; + case 0x0c02: return "SSA"; + case 0x0c03: return "USB"; + case 0x0c04: return "FibreChannel"; + case 0x0c05: return "SMBus"; + case 0x0c06: return "InfiniBand"; + case 0x0c07: return "IPMI-SMIC"; + case 0x0c08: return "SERCOS"; + case 0x0c09: return "CANBUS"; + } + return "SerialBus"; + case 0x0d: + switch (class_id) { + case 0x0d00: return "IRDA"; + case 0x0d01: return "ConsumerIR"; + case 0x0d10: return "RF"; + case 0x0d11: return "Bluetooth"; + case 0x0d12: return "Broadband"; + case 0x0d20: return "802.1a"; + case 0x0d21: return "802.1b"; + } + return "Wireless"; + case 0x0e: + switch (class_id) { + case 0x0e00: return "I2O"; + } + return "Intelligent"; + case 0x0f: + return "Satellite"; + case 0x10: + return "Encryption"; + case 0x11: + return "SignalProcessing"; + case 0x12: + return "ProcessingAccelerator"; + case 0x13: + return "Instrumentation"; + case 0x40: + return "Co-Processor"; + } + return "Other"; +} diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c new file mode 100644 index 000000000..6c507f522 --- /dev/null +++ b/src/3rdparty/hwloc/src/shmem.c @@ -0,0 +1,287 @@ +/* + * Copyright © 2017-2018 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <hwloc/shmem.h> +#include <private/private.h> + +#ifndef HWLOC_WIN_SYS + +#include <sys/mman.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <assert.h> + +#define HWLOC_SHMEM_HEADER_VERSION 1 + +struct hwloc_shmem_header { + uint32_t header_version; /* sanity check */ + uint32_t header_length; /* where the actual topology starts in the file/mapping */ + uint64_t mmap_address; /* virtual address to pass to mmap */ + uint64_t mmap_length; /* length to pass to mmap (includes the header) */ +}; + +#define HWLOC_SHMEM_MALLOC_ALIGN 8UL + +static void * +tma_shmem_malloc(struct hwloc_tma * tma, + size_t length) +{ + void *current = tma->data; + tma->data = (char*)tma->data + ((length + HWLOC_SHMEM_MALLOC_ALIGN - 1) & ~(HWLOC_SHMEM_MALLOC_ALIGN - 1)); + return current; + +} + +static void * +tma_get_length_malloc(struct hwloc_tma * tma, + size_t length) +{ + size_t *tma_length = tma->data; + *tma_length += (length + HWLOC_SHMEM_MALLOC_ALIGN - 1) & ~(HWLOC_SHMEM_MALLOC_ALIGN - 1); + return malloc(length); + +} + +int +hwloc_shmem_topology_get_length(hwloc_topology_t topology, + size_t *lengthp, + unsigned long flags) +{ + hwloc_topology_t new; + struct hwloc_tma tma; + size_t length = 0; + unsigned long pagesize = hwloc_getpagesize(); /* round-up to full page for mmap() */ + int err; + + if (flags) { + errno = EINVAL; + return -1; + } + + tma.malloc = tma_get_length_malloc; + tma.dontfree = 0; + tma.data = &length; + + err = hwloc__topology_dup(&new, topology, &tma); + if (err < 0) + return err; + hwloc_topology_destroy(new); + + *lengthp = (sizeof(struct hwloc_shmem_header) + length + pagesize - 1) & ~(pagesize - 1); + return 0; +} + +int +hwloc_shmem_topology_write(hwloc_topology_t topology, + int fd, hwloc_uint64_t fileoffset, + void *mmap_address, size_t length, + unsigned long flags) +{ + hwloc_topology_t new; + struct hwloc_tma tma; + struct hwloc_shmem_header header; + void *mmap_res; + int err; + + if (flags) { + errno = EINVAL; + return -1; + } + + /* refresh old topology distances so that we don't uselessly duplicate invalid distances + * without being able to free() them. + */ + hwloc_internal_distances_refresh(topology); + + header.header_version = HWLOC_SHMEM_HEADER_VERSION; + header.header_length = sizeof(header); + header.mmap_address = (uintptr_t) mmap_address; + header.mmap_length = length; + + err = lseek(fd, fileoffset, SEEK_SET); + if (err < 0) + return -1; + + err = write(fd, &header, sizeof(header)); + if (err != sizeof(header)) + return -1; + + err = ftruncate(fd, fileoffset + length); + if (err < 0) + return -1; + + mmap_res = mmap(mmap_address, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, fileoffset); + if (mmap_res == MAP_FAILED) + return -1; + if (mmap_res != mmap_address) { + munmap(mmap_res, length); + errno = EBUSY; + return -1; + } + + tma.malloc = tma_shmem_malloc; + tma.dontfree = 1; + tma.data = (char *)mmap_res + sizeof(header); + err = hwloc__topology_dup(&new, topology, &tma); + if (err < 0) + return err; + assert((char*)new == (char*)mmap_address + sizeof(header)); + + assert((char *)mmap_res <= (char *)mmap_address + length); + + /* now refresh the new distances so that adopters can use them without refreshing the R/O shmem mapping */ + hwloc_internal_distances_refresh(new); + + /* topology is saved, release resources now */ + munmap(mmap_address, length); + hwloc_components_fini(); + + return 0; +} + +int +hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, + int fd, hwloc_uint64_t fileoffset, + void *mmap_address, size_t length, + unsigned long flags) +{ + hwloc_topology_t new, old; + struct hwloc_shmem_header header; + void *mmap_res; + int err; + + if (flags) { + errno = EINVAL; + return -1; + } + + err = lseek(fd, fileoffset, SEEK_SET); + if (err < 0) + return -1; + + err = read(fd, &header, sizeof(header)); + if (err != sizeof(header)) + return -1; + + if (header.header_version != HWLOC_SHMEM_HEADER_VERSION + || header.header_length != sizeof(header) + || header.mmap_address != (uintptr_t) mmap_address + || header.mmap_length != length) { + errno = EINVAL; + return -1; + } + + mmap_res = mmap(mmap_address, length, PROT_READ, MAP_SHARED, fd, fileoffset); + if (mmap_res == MAP_FAILED) + return -1; + if (mmap_res != mmap_address) { + errno = EBUSY; + goto out_with_mmap; + } + + old = (hwloc_topology_t)((char*)mmap_address + sizeof(header)); + if (hwloc_topology_abi_check(old) < 0) { + errno = EINVAL; + goto out_with_mmap; + } + + /* enforced by dup() inside shmem_topology_write() */ + assert(old->is_loaded); + assert(old->backends == NULL); + assert(old->get_pci_busid_cpuset_backend == NULL); + + hwloc_components_init(); + + /* duplicate the topology object so that we ca change use local binding_hooks + * (those are likely not mapped at the same location in both processes). + */ + new = malloc(sizeof(struct hwloc_topology)); + if (!new) + goto out_with_components; + memcpy(new, old, sizeof(*old)); + new->tma = NULL; + new->adopted_shmem_addr = mmap_address; + new->adopted_shmem_length = length; + new->topology_abi = HWLOC_TOPOLOGY_ABI; + /* setting binding hooks will touch support arrays, so duplicate them too. + * could avoid that by requesting a R/W mmap + */ + new->support.discovery = malloc(sizeof(*new->support.discovery)); + new->support.cpubind = malloc(sizeof(*new->support.cpubind)); + new->support.membind = malloc(sizeof(*new->support.membind)); + memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); + memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); + memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); + hwloc_set_binding_hooks(new); + /* clear userdata callbacks pointing to the writer process' functions */ + new->userdata_export_cb = NULL; + new->userdata_import_cb = NULL; + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(new); + + *topologyp = new; + return 0; + + out_with_components: + hwloc_components_fini(); + out_with_mmap: + munmap(mmap_res, length); + return -1; +} + +void +hwloc__topology_disadopt(hwloc_topology_t topology) +{ + hwloc_components_fini(); + munmap(topology->adopted_shmem_addr, topology->adopted_shmem_length); + free(topology->support.discovery); + free(topology->support.cpubind); + free(topology->support.membind); + free(topology); +} + +#else /* HWLOC_WIN_SYS */ + +int +hwloc_shmem_topology_get_length(hwloc_topology_t topology __hwloc_attribute_unused, + size_t *lengthp __hwloc_attribute_unused, + unsigned long flags __hwloc_attribute_unused) +{ + errno = ENOSYS; + return -1; +} + +int +hwloc_shmem_topology_write(hwloc_topology_t topology __hwloc_attribute_unused, + int fd __hwloc_attribute_unused, hwloc_uint64_t fileoffset __hwloc_attribute_unused, + void *mmap_address __hwloc_attribute_unused, size_t length __hwloc_attribute_unused, + unsigned long flags __hwloc_attribute_unused) +{ + errno = ENOSYS; + return -1; +} + +int +hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp __hwloc_attribute_unused, + int fd __hwloc_attribute_unused, hwloc_uint64_t fileoffset __hwloc_attribute_unused, + void *mmap_address __hwloc_attribute_unused, size_t length __hwloc_attribute_unused, + unsigned long flags __hwloc_attribute_unused) +{ + errno = ENOSYS; + return -1; +} + +void +hwloc__topology_disadopt(hwloc_topology_t topology __hwloc_attribute_unused) +{ +} + +#endif /* HWLOC_WIN_SYS */ diff --git a/src/3rdparty/hwloc/src/static-components.h b/src/3rdparty/hwloc/src/static-components.h new file mode 100644 index 000000000..dac227a60 --- /dev/null +++ b/src/3rdparty/hwloc/src/static-components.h @@ -0,0 +1,15 @@ +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; +static const struct hwloc_component * hwloc_static_components[] = { + &hwloc_noos_component, + &hwloc_xml_component, + &hwloc_synthetic_component, + &hwloc_xml_nolibxml_component, + &hwloc_windows_component, + &hwloc_x86_component, + NULL +}; diff --git a/src/3rdparty/hwloc/src/topology-noos.c b/src/3rdparty/hwloc/src/topology-noos.c new file mode 100644 index 000000000..77871eb17 --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-noos.c @@ -0,0 +1,65 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> + +static int +hwloc_look_noos(struct hwloc_backend *backend) +{ + struct hwloc_topology *topology = backend->topology; + int nbprocs; + + if (topology->levels[0][0]->cpuset) + /* somebody discovered things */ + return -1; + + nbprocs = hwloc_fallback_nbprocessors(topology); + if (nbprocs >= 1) + topology->support.discovery->pu = 1; + else + nbprocs = 1; + + hwloc_alloc_root_sets(topology->levels[0][0]); + hwloc_setup_pu_level(topology, nbprocs); + hwloc_add_uname_info(topology, NULL); + return 0; +} + +static struct hwloc_backend * +hwloc_noos_component_instantiate(struct hwloc_disc_component *component, + const void *_data1 __hwloc_attribute_unused, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + backend = hwloc_backend_alloc(component); + if (!backend) + return NULL; + backend->discover = hwloc_look_noos; + return backend; +} + +static struct hwloc_disc_component hwloc_noos_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_CPU, + "no_os", + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + hwloc_noos_component_instantiate, + 40, /* lower than native OS component, higher than globals */ + 1, + NULL +}; + +const struct hwloc_component hwloc_noos_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_noos_disc_component +}; diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c new file mode 100644 index 000000000..1fe334d1c --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -0,0 +1,1521 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/misc.h> +#include <private/debug.h> + +#include <limits.h> +#include <assert.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +struct hwloc_synthetic_attr_s { + hwloc_obj_type_t type; + unsigned depth; /* For caches/groups */ + hwloc_obj_cache_type_t cachetype; /* For caches */ + hwloc_uint64_t memorysize; /* For caches/memory */ +}; + +struct hwloc_synthetic_indexes_s { + /* the indexes= attribute before parsing */ + const char *string; + unsigned long string_length; + /* the array of explicit indexes after parsing */ + unsigned *array; + + /* used while filling the topology */ + unsigned next; /* id of the next object for that level */ +}; + +struct hwloc_synthetic_level_data_s { + unsigned arity; + unsigned long totalwidth; + + struct hwloc_synthetic_attr_s attr; + struct hwloc_synthetic_indexes_s indexes; + + struct hwloc_synthetic_attached_s { + struct hwloc_synthetic_attr_s attr; + + struct hwloc_synthetic_attached_s *next; + } *attached; +}; + +struct hwloc_synthetic_backend_data_s { + /* synthetic backend parameters */ + char *string; + + unsigned long numa_attached_nr; + struct hwloc_synthetic_indexes_s numa_attached_indexes; + +#define HWLOC_SYNTHETIC_MAX_DEPTH 128 + struct hwloc_synthetic_level_data_s level[HWLOC_SYNTHETIC_MAX_DEPTH]; +}; + +struct hwloc_synthetic_intlv_loop_s { + unsigned step; + unsigned nb; + unsigned level_depth; +}; + +static void +hwloc_synthetic_process_indexes(struct hwloc_synthetic_backend_data_s *data, + struct hwloc_synthetic_indexes_s *indexes, + unsigned long total, + int verbose) +{ + const char *attr = indexes->string; + unsigned long length = indexes->string_length; + unsigned *array = NULL; + size_t i; + + if (!attr) + return; + + array = calloc(total, sizeof(*array)); + if (!array) { + if (verbose) + fprintf(stderr, "Failed to allocate synthetic index array of size %lu\n", total); + goto out; + } + + i = strspn(attr, "0123456789,"); + if (i == length) { + /* explicit array of indexes */ + + for(i=0; i<total; i++) { + const char *next; + unsigned idx = strtoul(attr, (char **) &next, 10); + if (next == attr) { + if (verbose) + fprintf(stderr, "Failed to read synthetic index #%lu at '%s'\n", (unsigned long) i, attr); + goto out_with_array; + } + + array[i] = idx; + if (i != total-1) { + if (*next != ',') { + if (verbose) + fprintf(stderr, "Missing comma after synthetic index #%lu at '%s'\n", (unsigned long) i, attr); + goto out_with_array; + } + attr = next+1; + } else { + attr = next; + } + } + indexes->array = array; + + } else { + /* interleaving */ + unsigned nr_loops = 1, cur_loop; + unsigned minstep = total; + unsigned long nbs = 1; + unsigned j, mul; + const char *tmp; + + tmp = attr; + while (tmp) { + tmp = strchr(tmp, ':'); + if (!tmp || tmp >= attr+length) + break; + nr_loops++; + tmp++; + } + + { + /* nr_loops colon-separated fields, but we may need one more at the end */ + HWLOC_VLA(struct hwloc_synthetic_intlv_loop_s, loops, nr_loops+1); + + if (*attr >= '0' && *attr <= '9') { + /* interleaving as x*y:z*t:... */ + unsigned step, nb; + + tmp = attr; + cur_loop = 0; + while (tmp) { + char *tmp2, *tmp3; + step = (unsigned) strtol(tmp, &tmp2, 0); + if (tmp2 == tmp || *tmp2 != '*') { + if (verbose) + fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp); + goto out_with_array; + } + if (!step) { + if (verbose) + fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp); + goto out_with_array; + } + tmp2++; + nb = (unsigned) strtol(tmp2, &tmp3, 0); + if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) { + if (verbose) + fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp); + goto out_with_array; + } + if (!nb) { + if (verbose) + fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2); + goto out_with_array; + } + loops[cur_loop].step = step; + loops[cur_loop].nb = nb; + if (step < minstep) + minstep = step; + nbs *= nb; + cur_loop++; + if (*tmp3 == ')' || *tmp3 == ' ') + break; + tmp = (const char*) (tmp3+1); + } + + } else { + /* interleaving as type1:type2:... */ + hwloc_obj_type_t type; + union hwloc_obj_attr_u attrs; + int err; + + /* find level depths for each interleaving loop */ + tmp = attr; + cur_loop = 0; + while (tmp) { + err = hwloc_type_sscanf(tmp, &type, &attrs, sizeof(attrs)); + if (err < 0) { + if (verbose) + fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp); + goto out_with_array; + } + if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { + if (verbose) + fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp); + goto out_with_array; + } + for(i=0; ; i++) { + if (!data->level[i].arity) { + loops[cur_loop].level_depth = (unsigned)-1; + break; + } + if (type != data->level[i].attr.type) + continue; + if (type == HWLOC_OBJ_GROUP + && attrs.group.depth != (unsigned) -1 + && attrs.group.depth != data->level[i].attr.depth) + continue; + loops[cur_loop].level_depth = (unsigned)i; + break; + } + if (loops[cur_loop].level_depth == (unsigned)-1) { + if (verbose) + fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s'\n", + tmp); + goto out_with_array; + } + tmp = strchr(tmp, ':'); + if (!tmp || tmp > attr+length) + break; + tmp++; + cur_loop++; + } + + /* compute actual loop step/nb */ + for(cur_loop=0; cur_loop<nr_loops; cur_loop++) { + unsigned mydepth = loops[cur_loop].level_depth; + unsigned prevdepth = 0; + unsigned step, nb; + for(i=0; i<nr_loops; i++) { + if (loops[i].level_depth == mydepth && i != cur_loop) { + if (verbose) + fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr); + goto out_with_array; + } + if (loops[i].level_depth < mydepth + && loops[i].level_depth > prevdepth) + prevdepth = loops[i].level_depth; + } + step = total / data->level[mydepth].totalwidth; /* number of objects below us */ + nb = data->level[mydepth].totalwidth / data->level[prevdepth].totalwidth; /* number of us within parent */ + + loops[cur_loop].step = step; + loops[cur_loop].nb = nb; + assert(nb); + assert(step); + if (step < minstep) + minstep = step; + nbs *= nb; + } + } + assert(nbs); + + if (nbs != total) { + /* one loop of total/nbs steps is missing, add it if it's just the smallest one */ + if (minstep == total/nbs) { + loops[nr_loops].step = 1; + loops[nr_loops].nb = total/nbs; + nr_loops++; + } else { + if (verbose) + fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total); + goto out_with_array; + } + } + + /* generate the array of indexes */ + mul = 1; + for(i=0; i<nr_loops; i++) { + unsigned step = loops[i].step; + unsigned nb = loops[i].nb; + for(j=0; j<total; j++) + array[j] += ((j / step) % nb) * mul; + mul *= nb; + } + + /* check that we have the right values (cannot pass total, cannot give duplicate 0) */ + for(j=0; j<total; j++) { + if (array[j] >= total) { + if (verbose) + fprintf(stderr, "Invalid index interleaving generates out-of-range index %u\n", array[j]); + goto out_with_array; + } + if (!array[j] && j) { + if (verbose) + fprintf(stderr, "Invalid index interleaving generates duplicate index values\n"); + goto out_with_array; + } + } + + indexes->array = array; + } + } + + return; + + out_with_array: + free(array); + out: + return; +} + +static hwloc_uint64_t +hwloc_synthetic_parse_memory_attr(const char *attr, const char **endp) +{ + const char *endptr; + hwloc_uint64_t size; + size = strtoull(attr, (char **) &endptr, 0); + if (!hwloc_strncasecmp(endptr, "TB", 2)) { + size <<= 40; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "GB", 2)) { + size <<= 30; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "MB", 2)) { + size <<= 20; + endptr += 2; + } else if (!hwloc_strncasecmp(endptr, "kB", 2)) { + size <<= 10; + endptr += 2; + } + *endp = endptr; + return size; +} + +static int +hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp, + struct hwloc_synthetic_attr_s *sattr, + struct hwloc_synthetic_indexes_s *sind, + int verbose) +{ + hwloc_obj_type_t type = sattr->type; + const char *next_pos; + hwloc_uint64_t memorysize = 0; + const char *index_string = NULL; + size_t index_string_length = 0; + + next_pos = (const char *) strchr(attrs, ')'); + if (!next_pos) { + if (verbose) + fprintf(stderr, "Missing attribute closing bracket in synthetic string doesn't have a number of objects at '%s'\n", attrs); + errno = EINVAL; + return -1; + } + + while (')' != *attrs) { + int iscache = hwloc__obj_type_is_cache(type); + + if (iscache && !strncmp("size=", attrs, 5)) { + memorysize = hwloc_synthetic_parse_memory_attr(attrs+5, &attrs); + + } else if (!iscache && !strncmp("memory=", attrs, 7)) { + memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs); + + } else if (!strncmp("indexes=", attrs, 8)) { + index_string = attrs+8; + attrs += 8; + index_string_length = strcspn(attrs, " )"); + attrs += index_string_length; + + } else { + if (verbose) + fprintf(stderr, "Unknown attribute at '%s'\n", attrs); + errno = EINVAL; + return -1; + } + + if (' ' == *attrs) + attrs++; + else if (')' != *attrs) { + if (verbose) + fprintf(stderr, "Missing parameter separator at '%s'\n", attrs); + errno = EINVAL; + return -1; + } + } + + sattr->memorysize = memorysize; + + if (index_string) { + if (sind->string && verbose) + fprintf(stderr, "Overwriting duplicate indexes attribute with last occurence\n"); + sind->string = index_string; + sind->string_length = (unsigned long)index_string_length; + } + + *next_posp = next_pos+1; + return 0; +} + +/* frees level until arity = 0 */ +static void +hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data) +{ + unsigned i; + for(i=0; i<HWLOC_SYNTHETIC_MAX_DEPTH; i++) { + struct hwloc_synthetic_level_data_s *curlevel = &data->level[i]; + struct hwloc_synthetic_attached_s **pprev = &curlevel->attached; + while (*pprev) { + struct hwloc_synthetic_attached_s *cur = *pprev; + *pprev = cur->next; + free(cur); + } + free(curlevel->indexes.array); + if (!curlevel->arity) + break; + } + free(data->numa_attached_indexes.array); +} + +/* Read from description a series of integers describing a symmetrical + topology and update the hwloc_synthetic_backend_data_s accordingly. On + success, return zero. */ +static int +hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, + const char *description) +{ + const char *pos, *next_pos; + unsigned long item, count; + unsigned i; + int type_count[HWLOC_OBJ_TYPE_MAX]; + unsigned unset; + int verbose = 0; + const char *env = getenv("HWLOC_SYNTHETIC_VERBOSE"); + int err; + unsigned long totalarity = 1; + + if (env) + verbose = atoi(env); + + data->numa_attached_nr = 0; + data->numa_attached_indexes.array = NULL; + + /* default values before we add root attributes */ + data->level[0].totalwidth = 1; + data->level[0].attr.type = HWLOC_OBJ_MACHINE; + data->level[0].indexes.string = NULL; + data->level[0].indexes.array = NULL; + data->level[0].attr.memorysize = 0; + data->level[0].attached = NULL; + type_count[HWLOC_OBJ_MACHINE] = 1; + if (*description == '(') { + err = hwloc_synthetic_parse_attrs(description+1, &description, &data->level[0].attr, &data->level[0].indexes, verbose); + if (err < 0) + return err; + } + + data->numa_attached_indexes.string = NULL; + data->numa_attached_indexes.array = NULL; + + for (pos = description, count = 1; *pos; pos = next_pos) { + hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; + union hwloc_obj_attr_u attrs; + + /* initialize parent arity to 0 so that the levels are not infinite */ + data->level[count-1].arity = 0; + + while (*pos == ' ') + pos++; + + if (!*pos) + break; + + if (*pos == '[') { + /* attached */ + struct hwloc_synthetic_attached_s *attached, **pprev; + char *attr; + + pos++; + + if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { + if (verbose) + fprintf(stderr, "Synthetic string with unknown attached object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + if (type != HWLOC_OBJ_NUMANODE) { + if (verbose) + fprintf(stderr, "Synthetic string with disallowed attached object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + data->numa_attached_nr += data->level[count-1].totalwidth; + + attached = malloc(sizeof(*attached)); + if (attached) { + attached->attr.type = type; + attached->attr.memorysize = 0; + /* attached->attr.depth and .cachetype unused */ + attached->next = NULL; + pprev = &data->level[count-1].attached; + while (*pprev) + pprev = &((*pprev)->next); + *pprev = attached; + } + + next_pos = strchr(pos, ']'); + if (!next_pos) { + if (verbose) + fprintf(stderr,"Synthetic string doesn't have a closing `]' after attached object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + + attr = strchr(pos, '('); + if (attr && attr < next_pos && attached) { + const char *dummy; + err = hwloc_synthetic_parse_attrs(attr+1, &dummy, &attached->attr, &data->numa_attached_indexes, verbose); + if (err < 0) + goto error; + } + + next_pos++; + continue; + } + + /* normal level */ + + /* reset defaults */ + data->level[count].indexes.string = NULL; + data->level[count].indexes.array = NULL; + data->level[count].attached = NULL; + + if (*pos < '0' || *pos > '9') { + if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { + if (!strncmp(pos, "Die", 3) || !strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + type = HWLOC_OBJ_GROUP; + } else { + /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ + if (verbose) + fprintf(stderr, "Synthetic string with unknown object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + } + if (type == HWLOC_OBJ_MACHINE || type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { + if (verbose) + fprintf(stderr, "Synthetic string with disallowed object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + + next_pos = strchr(pos, ':'); + if (!next_pos) { + if (verbose) + fprintf(stderr,"Synthetic string doesn't have a `:' after object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } + pos = next_pos + 1; + } + + data->level[count].attr.type = type; + data->level[count].attr.depth = (unsigned) -1; + data->level[count].attr.cachetype = (hwloc_obj_cache_type_t) -1; + if (hwloc__obj_type_is_cache(type)) { + /* these are always initialized */ + data->level[count].attr.depth = attrs.cache.depth; + data->level[count].attr.cachetype = attrs.cache.type; + } else if (type == HWLOC_OBJ_GROUP) { + /* could be -1 but will be set below */ + data->level[count].attr.depth = attrs.group.depth; + } + + /* number of normal children */ + item = strtoul(pos, (char **)&next_pos, 0); + if (next_pos == pos) { + if (verbose) + fprintf(stderr,"Synthetic string doesn't have a number of objects at '%s'\n", pos); + errno = EINVAL; + goto error; + } + if (!item) { + if (verbose) + fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos); + errno = EINVAL; + goto error; + } + + totalarity *= item; + data->level[count].totalwidth = totalarity; + data->level[count].indexes.string = NULL; + data->level[count].indexes.array = NULL; + data->level[count].attr.memorysize = 0; + if (*next_pos == '(') { + err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose); + if (err < 0) + goto error; + } + + if (count + 1 >= HWLOC_SYNTHETIC_MAX_DEPTH) { + if (verbose) + fprintf(stderr,"Too many synthetic levels, max %d\n", HWLOC_SYNTHETIC_MAX_DEPTH); + errno = EINVAL; + goto error; + } + if (item > UINT_MAX) { + if (verbose) + fprintf(stderr,"Too big arity, max %u\n", UINT_MAX); + errno = EINVAL; + goto error; + } + + data->level[count-1].arity = (unsigned)item; + count++; + } + + if (data->level[count-1].attr.type != HWLOC_OBJ_TYPE_NONE && data->level[count-1].attr.type != HWLOC_OBJ_PU) { + if (verbose) + fprintf(stderr, "Synthetic string cannot use non-PU type for last level\n"); + errno = EINVAL; + return -1; + } + data->level[count-1].attr.type = HWLOC_OBJ_PU; + + for(i=HWLOC_OBJ_TYPE_MIN; i<HWLOC_OBJ_TYPE_MAX; i++) { + type_count[i] = 0; + } + for(i=count-1; i>0; i--) { + hwloc_obj_type_t type = data->level[i].attr.type; + if (type != HWLOC_OBJ_TYPE_NONE) { + type_count[type]++; + } + } + + /* sanity checks */ + if (!type_count[HWLOC_OBJ_PU]) { + if (verbose) + fprintf(stderr, "Synthetic string missing ending number of PUs\n"); + errno = EINVAL; + return -1; + } else if (type_count[HWLOC_OBJ_PU] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several PU levels\n"); + errno = EINVAL; + return -1; + } + if (type_count[HWLOC_OBJ_PACKAGE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several package levels\n"); + errno = EINVAL; + return -1; + } + if (type_count[HWLOC_OBJ_NUMANODE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n"); + errno = EINVAL; + return -1; + } + if (type_count[HWLOC_OBJ_NUMANODE] && data->numa_attached_nr) { + if (verbose) + fprintf(stderr,"Synthetic string cannot have NUMA nodes both as a level and attached\n"); + errno = EINVAL; + return -1; + } + if (type_count[HWLOC_OBJ_CORE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several core levels\n"); + errno = EINVAL; + return -1; + } + + /* deal with missing intermediate levels */ + unset = 0; + for(i=1; i<count-1; i++) { + if (data->level[i].attr.type == HWLOC_OBJ_TYPE_NONE) + unset++; + } + if (unset && unset != count-2) { + if (verbose) + fprintf(stderr, "Synthetic string cannot mix unspecified and specified types for levels\n"); + errno = EINVAL; + return -1; + } + if (unset) { + /* we want in priority: numa, package, core, up to 3 caches, groups */ + unsigned _count = count; + unsigned neednuma = 0; + unsigned needpack = 0; + unsigned needcore = 0; + unsigned needcaches = 0; + unsigned needgroups = 0; + /* 2 levels for machine and PU */ + _count -= 2; + + neednuma = (_count >= 1 && !data->numa_attached_nr); + _count -= neednuma; + + needpack = (_count >= 1); + _count -= needpack; + + needcore = (_count >= 1); + _count -= needcore; + + needcaches = (_count > 4 ? 4 : _count); + _count -= needcaches; + + needgroups = _count; + + /* we place them in order: groups, package, numa, caches, core */ + for(i = 0; i < needgroups; i++) { + unsigned depth = 1 + i; + data->level[depth].attr.type = HWLOC_OBJ_GROUP; + type_count[HWLOC_OBJ_GROUP]++; + } + if (needpack) { + unsigned depth = 1 + needgroups; + data->level[depth].attr.type = HWLOC_OBJ_PACKAGE; + type_count[HWLOC_OBJ_PACKAGE] = 1; + } + if (neednuma) { + unsigned depth = 1 + needgroups + needpack; + data->level[depth].attr.type = HWLOC_OBJ_NUMANODE; + type_count[HWLOC_OBJ_NUMANODE] = 1; + } + if (needcaches) { + /* priority: l2, l1, l3, l1i */ + /* order: l3, l2, l1, l1i */ + unsigned l3depth = 1 + needgroups + needpack + neednuma; + unsigned l2depth = l3depth + (needcaches >= 3); + unsigned l1depth = l2depth + 1; + unsigned l1idepth = l1depth + 1; + if (needcaches >= 3) { + data->level[l3depth].attr.type = HWLOC_OBJ_L3CACHE; + data->level[l3depth].attr.depth = 3; + data->level[l3depth].attr.cachetype = HWLOC_OBJ_CACHE_UNIFIED; + type_count[HWLOC_OBJ_L3CACHE] = 1; + } + data->level[l2depth].attr.type = HWLOC_OBJ_L2CACHE; + data->level[l2depth].attr.depth = 2; + data->level[l2depth].attr.cachetype = HWLOC_OBJ_CACHE_UNIFIED; + type_count[HWLOC_OBJ_L2CACHE] = 1; + if (needcaches >= 2) { + data->level[l1depth].attr.type = HWLOC_OBJ_L1CACHE; + data->level[l1depth].attr.depth = 1; + data->level[l1depth].attr.cachetype = HWLOC_OBJ_CACHE_DATA; + type_count[HWLOC_OBJ_L1CACHE] = 1; + } + if (needcaches >= 4) { + data->level[l1idepth].attr.type = HWLOC_OBJ_L1ICACHE; + data->level[l1idepth].attr.depth = 1; + data->level[l1idepth].attr.cachetype = HWLOC_OBJ_CACHE_INSTRUCTION; + type_count[HWLOC_OBJ_L1ICACHE] = 1; + } + } + if (needcore) { + unsigned depth = 1 + needgroups + needpack + neednuma + needcaches; + data->level[depth].attr.type = HWLOC_OBJ_CORE; + type_count[HWLOC_OBJ_CORE] = 1; + } + } + + /* enforce a NUMA level */ + if (!type_count[HWLOC_OBJ_NUMANODE] && !data->numa_attached_nr) { + /* insert a NUMA level below the automatic machine root */ + if (verbose) + fprintf(stderr, "Inserting a NUMA level with a single object at depth 1\n"); + /* move existing levels by one */ + memmove(&data->level[2], &data->level[1], count*sizeof(struct hwloc_synthetic_level_data_s)); + data->level[1].attr.type = HWLOC_OBJ_NUMANODE; + data->level[1].indexes.string = NULL; + data->level[1].indexes.array = NULL; + data->level[1].attr.memorysize = 0; + data->level[1].totalwidth = data->level[0].totalwidth; + /* update arity to insert a single NUMA node per parent */ + data->level[1].arity = data->level[0].arity; + data->level[0].arity = 1; + count++; + } + + for (i=0; i<count; i++) { + struct hwloc_synthetic_level_data_s *curlevel = &data->level[i]; + hwloc_obj_type_t type = curlevel->attr.type; + + if (type == HWLOC_OBJ_GROUP) { + if (curlevel->attr.depth == (unsigned)-1) + curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--; + + } else if (hwloc__obj_type_is_cache(type)) { + if (!curlevel->attr.memorysize) { + if (1 == curlevel->attr.depth) + /* 32Kb in L1 */ + curlevel->attr.memorysize = 32*1024; + else + /* *4 at each level, starting from 1MB for L2, unified */ + curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth); + } + + } else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) { + /* 1GB in memory nodes. */ + curlevel->attr.memorysize = 1024*1024*1024; + } + + hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose); + } + + hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose); + + data->string = strdup(description); + data->level[count-1].arity = 0; + return 0; + + error: + hwloc_synthetic_free_levels(data); + return -1; +} + +static void +hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr, + hwloc_obj_t obj) +{ + switch (obj->type) { + case HWLOC_OBJ_GROUP: + obj->attr->group.kind = HWLOC_GROUP_KIND_SYNTHETIC; + obj->attr->group.subkind = sattr->depth-1; + break; + case HWLOC_OBJ_MACHINE: + break; + case HWLOC_OBJ_NUMANODE: + obj->attr->numanode.local_memory = sattr->memorysize; + obj->attr->numanode.page_types_len = 1; + obj->attr->numanode.page_types = malloc(sizeof(*obj->attr->numanode.page_types)); + memset(obj->attr->numanode.page_types, 0, sizeof(*obj->attr->numanode.page_types)); + obj->attr->numanode.page_types[0].size = 4096; + obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; + break; + case HWLOC_OBJ_PACKAGE: + break; + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + obj->attr->cache.depth = sattr->depth; + obj->attr->cache.linesize = 64; + obj->attr->cache.type = sattr->cachetype; + obj->attr->cache.size = sattr->memorysize; + break; + case HWLOC_OBJ_CORE: + break; + case HWLOC_OBJ_PU: + break; + default: + /* Should never happen */ + assert(0); + break; + } +} + +static unsigned +hwloc_synthetic_next_index(struct hwloc_synthetic_indexes_s *indexes, hwloc_obj_type_t type) +{ + unsigned os_index = indexes->next++; + + if (indexes->array) + os_index = indexes->array[os_index]; + else if (hwloc__obj_type_is_cache(type) || type == HWLOC_OBJ_GROUP) + /* don't enforce useless os_indexes for Caches and Groups */ + os_index = HWLOC_UNKNOWN_INDEX; + + return os_index; +} + +static void +hwloc_synthetic_insert_attached(struct hwloc_topology *topology, + struct hwloc_synthetic_backend_data_s *data, + struct hwloc_synthetic_attached_s *attached, + hwloc_bitmap_t set) +{ + hwloc_obj_t child; + unsigned attached_os_index; + + if (!attached) + return; + + assert(attached->attr.type == HWLOC_OBJ_NUMANODE); + + attached_os_index = hwloc_synthetic_next_index(&data->numa_attached_indexes, HWLOC_OBJ_NUMANODE); + + child = hwloc_alloc_setup_object(topology, attached->attr.type, attached_os_index); + child->cpuset = hwloc_bitmap_dup(set); + + child->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(child->nodeset, attached_os_index); + + hwloc_synthetic_set_attr(&attached->attr, child); + + hwloc_insert_object_by_cpuset(topology, child); + + hwloc_synthetic_insert_attached(topology, data, attached->next, set); +} + +/* + * Recursively build objects whose cpu start at first_cpu + * - level gives where to look in the type, arity and id arrays + * - the id array is used as a variable to get unique IDs for a given level. + * - generated memory should be added to *memory_kB. + * - generated cpus should be added to parent_cpuset. + * - next cpu number to be used should be returned. + */ +static void +hwloc__look_synthetic(struct hwloc_topology *topology, + struct hwloc_synthetic_backend_data_s *data, + int level, + hwloc_bitmap_t parent_cpuset) +{ + hwloc_obj_t obj; + unsigned i; + struct hwloc_synthetic_level_data_s *curlevel = &data->level[level]; + hwloc_obj_type_t type = curlevel->attr.type; + hwloc_bitmap_t set; + unsigned os_index; + + assert(hwloc__obj_type_is_normal(type) || type == HWLOC_OBJ_NUMANODE); + assert(type != HWLOC_OBJ_MACHINE); + + os_index = hwloc_synthetic_next_index(&curlevel->indexes, type); + + set = hwloc_bitmap_alloc(); + if (!curlevel->arity) { + hwloc_bitmap_set(set, os_index); + } else { + for (i = 0; i < curlevel->arity; i++) + hwloc__look_synthetic(topology, data, level + 1, set); + } + + hwloc_bitmap_or(parent_cpuset, parent_cpuset, set); + + if (hwloc_filter_check_keep_object_type(topology, type)) { + obj = hwloc_alloc_setup_object(topology, type, os_index); + obj->cpuset = hwloc_bitmap_dup(set); + + if (type == HWLOC_OBJ_NUMANODE) { + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, os_index); + } + + hwloc_synthetic_set_attr(&curlevel->attr, obj); + + hwloc_insert_object_by_cpuset(topology, obj); + } + + hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set); + + hwloc_bitmap_free(set); +} + +static int +hwloc_look_synthetic(struct hwloc_backend *backend) +{ + struct hwloc_topology *topology = backend->topology; + struct hwloc_synthetic_backend_data_s *data = backend->private_data; + hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); + unsigned i; + + assert(!topology->levels[0][0]->cpuset); + + hwloc_alloc_root_sets(topology->levels[0][0]); + + topology->support.discovery->pu = 1; + topology->support.discovery->numa = 1; /* we add a single NUMA node if none is given */ + topology->support.discovery->numa_memory = 1; /* specified or default size */ + + /* start with os_index 0 for each level */ + for (i = 0; data->level[i].arity > 0; i++) + data->level[i].indexes.next = 0; + data->numa_attached_indexes.next = 0; + /* ... including the last one */ + data->level[i].indexes.next = 0; + + /* update first level type according to the synthetic type array */ + topology->levels[0][0]->type = data->level[0].attr.type; + hwloc_synthetic_set_attr(&data->level[0].attr, topology->levels[0][0]); + + for (i = 0; i < data->level[0].arity; i++) + hwloc__look_synthetic(topology, data, 1, cpuset); + + hwloc_synthetic_insert_attached(topology, data, data->level[0].attached, cpuset); + + hwloc_bitmap_free(cpuset); + + hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic"); + hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", data->string); + return 0; +} + +static void +hwloc_synthetic_backend_disable(struct hwloc_backend *backend) +{ + struct hwloc_synthetic_backend_data_s *data = backend->private_data; + hwloc_synthetic_free_levels(data); + free(data->string); + free(data); +} + +static struct hwloc_backend * +hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component, + const void *_data1, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + struct hwloc_synthetic_backend_data_s *data; + int err; + + if (!_data1) { + const char *env = getenv("HWLOC_SYNTHETIC"); + if (env) { + /* 'synthetic' was given in HWLOC_COMPONENTS without a description */ + _data1 = env; + } else { + errno = EINVAL; + goto out; + } + } + + backend = hwloc_backend_alloc(component); + if (!backend) + goto out; + + data = malloc(sizeof(*data)); + if (!data) { + errno = ENOMEM; + goto out_with_backend; + } + + err = hwloc_backend_synthetic_init(data, (const char *) _data1); + if (err < 0) + goto out_with_data; + + backend->private_data = data; + backend->discover = hwloc_look_synthetic; + backend->disable = hwloc_synthetic_backend_disable; + backend->is_thissystem = 0; + + return backend; + + out_with_data: + free(data); + out_with_backend: + free(backend); + out: + return NULL; +} + +static struct hwloc_disc_component hwloc_synthetic_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + "synthetic", + ~0, + hwloc_synthetic_component_instantiate, + 30, + 1, + NULL +}; + +const struct hwloc_component hwloc_synthetic_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_synthetic_disc_component +}; + +static __hwloc_inline int +hwloc__export_synthetic_update_status(int *ret, char **tmp, ssize_t *tmplen, int res) +{ + if (res < 0) + return -1; + *ret += res; + if (res >= *tmplen) + res = *tmplen>0 ? (int)(*tmplen) - 1 : 0; + *tmp += res; + *tmplen -= res; + return 0; +} + +static __hwloc_inline void +hwloc__export_synthetic_add_char(int *ret, char **tmp, ssize_t *tmplen, char c) +{ + if (*tmplen > 1) { + (*tmp)[0] = c; + (*tmp)[1] = '\0'; + (*tmp)++; + (*tmplen)--; + } + (*ret)++; +} + +static int +hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total, + char *buffer, size_t buflen) +{ + unsigned step = 1; + unsigned nr_loops = 0; + struct hwloc_synthetic_intlv_loop_s *loops = NULL, *tmploops; + hwloc_obj_t cur; + unsigned i, j; + ssize_t tmplen = buflen; + char *tmp = buffer; + int res, ret = 0; + + /* must start with 0 */ + if (level[0]->os_index) + goto exportall; + + while (step != total) { + /* must be a divider of the total */ + if (total % step) + goto exportall; + + /* look for os_index == step */ + for(i=1; i<total; i++) + if (level[i]->os_index == step) + break; + if (i == total) + goto exportall; + for(j=2; j<total/i; j++) + if (level[i*j]->os_index != step*j) + break; + + nr_loops++; + tmploops = realloc(loops, nr_loops*sizeof(*loops)); + if (!tmploops) + goto exportall; + loops = tmploops; + loops[nr_loops-1].step = i; + loops[nr_loops-1].nb = j; + step *= j; + } + + /* check this interleaving */ + for(i=0; i<total; i++) { + unsigned ind = 0; + unsigned mul = 1; + for(j=0; j<nr_loops; j++) { + ind += (i / loops[j].step) % loops[j].nb * mul; + mul *= loops[j].nb; + } + if (level[i]->os_index != ind) + goto exportall; + } + + /* success, print it */ + for(j=0; j<nr_loops; j++) { + res = hwloc_snprintf(tmp, tmplen, "%u*%u%s", loops[j].step, loops[j].nb, + j == nr_loops-1 ? ")" : ":"); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) { + free(loops); + return -1; + } + } + + free(loops); + return ret; + + exportall: + free(loops); + + /* dump all indexes */ + cur = level[0]; + while (cur) { + res = hwloc_snprintf(tmp, tmplen, "%u%s", cur->os_index, + cur->next_cousin ? "," : ")"); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + cur = cur->next_cousin; + } + return ret; +} + +static int +hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, + hwloc_obj_t obj, + char *buffer, size_t buflen) +{ + const char * separator = " "; + const char * prefix = "("; + char cachesize[64] = ""; + char memsize[64] = ""; + int needindexes = 0; + + if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) { + snprintf(cachesize, sizeof(cachesize), "%ssize=%llu", + prefix, (unsigned long long) obj->attr->cache.size); + prefix = separator; + } + if (obj->type == HWLOC_OBJ_NUMANODE && obj->attr->numanode.local_memory) { + snprintf(memsize, sizeof(memsize), "%smemory=%llu", + prefix, (unsigned long long) obj->attr->numanode.local_memory); + prefix = separator; + } + if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */ + && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) { + hwloc_obj_t cur = obj; + while (cur) { + if (cur->os_index != cur->logical_index) { + needindexes = 1; + break; + } + cur = cur->next_cousin; + } + } + if (*cachesize || *memsize || needindexes) { + ssize_t tmplen = buflen; + char *tmp = buffer; + int res, ret = 0; + + res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")"); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + + if (needindexes) { + unsigned total; + hwloc_obj_t *level; + + if (obj->depth < 0) { + assert(obj->depth == HWLOC_TYPE_DEPTH_NUMANODE); + total = topology->slevels[HWLOC_SLEVEL_NUMANODE].nbobjs; + level = topology->slevels[HWLOC_SLEVEL_NUMANODE].objs; + } else { + total = topology->level_nbobjects[obj->depth]; + level = topology->levels[obj->depth]; + } + + res = hwloc_snprintf(tmp, tmplen, "%sindexes=", prefix); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + + res = hwloc__export_synthetic_indexes(level, total, tmp, tmplen); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + } + return ret; + } else { + return 0; + } +} + +static int +hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flags, + hwloc_obj_t obj, unsigned arity, + char *buffer, size_t buflen) +{ + char aritys[12] = ""; + ssize_t tmplen = buflen; + char *tmp = buffer; + int res, ret = 0; + + /* <type>:<arity>, except for root */ + if (arity != (unsigned)-1) + snprintf(aritys, sizeof(aritys), ":%u", arity); + if (hwloc__obj_type_is_cache(obj->type) + && (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES)) { + /* v1 uses generic "Cache" for non-extended type name */ + res = hwloc_snprintf(tmp, tmplen, "Cache%s", aritys); + + } else if (obj->type == HWLOC_OBJ_PACKAGE + && (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) { + /* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */ + res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys); + + } else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */ + || flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) { + res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys); + } else { + char types[64]; + hwloc_obj_type_snprintf(types, sizeof(types), obj, 1); + res = hwloc_snprintf(tmp, tmplen, "%s%s", types, aritys); + } + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + + if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { + /* obj attributes */ + res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + } + + return ret; +} + +static int +hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsigned long flags, + hwloc_obj_t parent, + char *buffer, size_t buflen, + int needprefix, int verbose) +{ + hwloc_obj_t mchild; + ssize_t tmplen = buflen; + char *tmp = buffer; + int res, ret = 0; + + mchild = parent->memory_first_child; + if (!mchild) + return 0; + + if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { + /* v1: export a single NUMA child */ + if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) { + /* not supported */ + if (verbose) + fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n"); + errno = EINVAL; + return -1; + } + + if (needprefix) + hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); + + res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + return ret; + } + + while (mchild) { + /* v2: export all NUMA children */ + + assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ + + if (needprefix) + hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); + + hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, '['); + + res = hwloc__export_synthetic_obj(topology, flags, mchild, (unsigned)-1, tmp, tmplen); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + + hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ']'); + + needprefix = 1; + mchild = mchild->next_sibling; + } + + return ret; +} + +static int +hwloc_check_memory_symmetric(struct hwloc_topology * topology) +{ + hwloc_bitmap_t remaining_nodes; + + remaining_nodes = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->nodeset); + if (!remaining_nodes) + /* assume asymmetric */ + return -1; + + while (!hwloc_bitmap_iszero(remaining_nodes)) { + unsigned idx; + hwloc_obj_t node; + hwloc_obj_t first_parent; + unsigned i; + + idx = hwloc_bitmap_first(remaining_nodes); + node = hwloc_get_numanode_obj_by_os_index(topology, idx); + assert(node); + + first_parent = node->parent; + assert(hwloc__obj_type_is_normal(first_parent->type)); /* only depth-1 memory children for now */ + + /* check whether all object on parent's level have same number of NUMA children */ + for(i=0; i<hwloc_get_nbobjs_by_depth(topology, first_parent->depth); i++) { + hwloc_obj_t parent, mchild; + + parent = hwloc_get_obj_by_depth(topology, first_parent->depth, i); + assert(parent); + + /* must have same memory arity */ + if (parent->memory_arity != first_parent->memory_arity) + goto out_with_bitmap; + + /* clear these NUMA children from remaining_nodes */ + mchild = parent->memory_first_child; + while (mchild) { + assert(mchild->type == HWLOC_OBJ_NUMANODE); /* only NUMA node memory children for now */ + hwloc_bitmap_clr(remaining_nodes, mchild->os_index); /* cannot use parent->nodeset, some normal children may have other NUMA nodes */ + mchild = mchild->next_sibling; + } + } + } + + hwloc_bitmap_free(remaining_nodes); + return 0; + + out_with_bitmap: + hwloc_bitmap_free(remaining_nodes); + return -1; +} + +int +hwloc_topology_export_synthetic(struct hwloc_topology * topology, + char *buffer, size_t buflen, + unsigned long flags) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + ssize_t tmplen = buflen; + char *tmp = buffer; + int res, ret = 0; + unsigned arity; + int needprefix = 0; + int verbose = 0; + const char *env = getenv("HWLOC_SYNTHETIC_VERBOSE"); + + if (env) + verbose = atoi(env); + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + if (flags & ~(HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1 + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY)) { + errno = EINVAL; + return -1; + } + + /* TODO: add a flag to ignore symmetric_subtree and I/Os. + * just assume things are symmetric with the left branches of the tree. + * but the number of objects per level may be wrong, what to do with OS index array in this case? + * only allow ignoring symmetric_subtree if the level width remains OK? + */ + + /* TODO: add a root object by default, with a prefix such as tree= + * so that we can backward-compatibly recognize whether there's a root or not. + * and add a flag to disable it. + */ + + /* TODO: flag to force all indexes, not only for PU and NUMA? */ + + if (!obj->symmetric_subtree) { + if (verbose) + fprintf(stderr, "Cannot export to synthetic unless topology is symmetric (root->symmetric_subtree must be set).\n"); + errno = EINVAL; + return -1; + } + + if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY) + && hwloc_check_memory_symmetric(topology) < 0) { + if (verbose) + fprintf(stderr, "Cannot export to synthetic unless memory is attached symmetrically.\n"); + errno = EINVAL; + return -1; + } + + if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { + /* v1 requires all NUMA at the same level */ + hwloc_obj_t node; + signed pdepth; + + node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ + pdepth = node->parent->depth; + + while ((node = node->next_cousin) != NULL) { + assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ + if (node->parent->depth != pdepth) { + if (verbose) + fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n"); + errno = EINVAL; + return -1; + } + } + } + + /* we're good, start exporting */ + + if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { + /* obj attributes */ + res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); + if (res > 0) + needprefix = 1; + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + } + + if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY)) { + res = hwloc__export_synthetic_memory_children(topology, flags, obj, tmp, tmplen, needprefix, verbose); + if (res > 0) + needprefix = 1; + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + } + + arity = obj->arity; + while (arity) { + /* for each level */ + obj = obj->first_child; + + if (needprefix) + hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); + + res = hwloc__export_synthetic_obj(topology, flags, obj, arity, tmp, tmplen); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + + if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY)) { + res = hwloc__export_synthetic_memory_children(topology, flags, obj, tmp, tmplen, 1, verbose); + if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) + return -1; + } + + /* next level */ + needprefix = 1; + arity = obj->arity; + } + + return ret; +} diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c new file mode 100644 index 000000000..d03645c0f --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -0,0 +1,1189 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/* To try to get all declarations duplicated below. */ +#define _WIN32_WINNT 0x0601 + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/debug.h> + +#include <windows.h> + +#ifndef HAVE_KAFFINITY +typedef ULONG_PTR KAFFINITY, *PKAFFINITY; +#endif + +#ifndef HAVE_PROCESSOR_CACHE_TYPE +typedef enum _PROCESSOR_CACHE_TYPE { + CacheUnified, + CacheInstruction, + CacheData, + CacheTrace +} PROCESSOR_CACHE_TYPE; +#endif + +#ifndef CACHE_FULLY_ASSOCIATIVE +#define CACHE_FULLY_ASSOCIATIVE 0xFF +#endif + +#ifndef MAXIMUM_PROC_PER_GROUP /* missing in MinGW */ +#define MAXIMUM_PROC_PER_GROUP 64 +#endif + +#ifndef HAVE_CACHE_DESCRIPTOR +typedef struct _CACHE_DESCRIPTOR { + BYTE Level; + BYTE Associativity; + WORD LineSize; + DWORD Size; /* in bytes */ + PROCESSOR_CACHE_TYPE Type; +} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR; +#endif + +#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP +typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP { + RelationProcessorCore, + RelationNumaNode, + RelationCache, + RelationProcessorPackage, + RelationGroup, + RelationAll = 0xffff +} LOGICAL_PROCESSOR_RELATIONSHIP; +#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ +# ifndef HAVE_RELATIONPROCESSORPACKAGE +# define RelationProcessorPackage 3 +# define RelationGroup 4 +# define RelationAll 0xffff +# endif /* HAVE_RELATIONPROCESSORPACKAGE */ +#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ + +#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION +typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION { + ULONG_PTR ProcessorMask; + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + _ANONYMOUS_UNION + union { + struct { + BYTE flags; + } ProcessorCore; + struct { + DWORD NodeNumber; + } NumaNode; + CACHE_DESCRIPTOR Cache; + ULONGLONG Reserved[2]; + } DUMMYUNIONNAME; +} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION; +#endif + +/* Extended interface, for group support */ + +#ifndef HAVE_GROUP_AFFINITY +typedef struct _GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY, *PGROUP_AFFINITY; +#endif + +#ifndef HAVE_PROCESSOR_RELATIONSHIP +typedef struct _PROCESSOR_RELATIONSHIP { + BYTE Flags; + BYTE Reserved[21]; + WORD GroupCount; + GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; +} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; +#endif + +#ifndef HAVE_NUMA_NODE_RELATIONSHIP +typedef struct _NUMA_NODE_RELATIONSHIP { + DWORD NodeNumber; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; +#endif + +#ifndef HAVE_CACHE_RELATIONSHIP +typedef struct _CACHE_RELATIONSHIP { + BYTE Level; + BYTE Associativity; + WORD LineSize; + DWORD CacheSize; + PROCESSOR_CACHE_TYPE Type; + BYTE Reserved[20]; + GROUP_AFFINITY GroupMask; +} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; +#endif + +#ifndef HAVE_PROCESSOR_GROUP_INFO +typedef struct _PROCESSOR_GROUP_INFO { + BYTE MaximumProcessorCount; + BYTE ActiveProcessorCount; + BYTE Reserved[38]; + KAFFINITY ActiveProcessorMask; +} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO; +#endif + +#ifndef HAVE_GROUP_RELATIONSHIP +typedef struct _GROUP_RELATIONSHIP { + WORD MaximumGroupCount; + WORD ActiveGroupCount; + ULONGLONG Reserved[2]; + PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY]; +} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; +#endif + +#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX +typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { + LOGICAL_PROCESSOR_RELATIONSHIP Relationship; + DWORD Size; + _ANONYMOUS_UNION + union { + PROCESSOR_RELATIONSHIP Processor; + NUMA_NODE_RELATIONSHIP NumaNode; + CACHE_RELATIONSHIP Cache; + GROUP_RELATIONSHIP Group; + /* Odd: no member to tell the cpu mask of the package... */ + } DUMMYUNIONNAME; +} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; +#endif + +#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK +typedef union _PSAPI_WORKING_SET_EX_BLOCK { + ULONG_PTR Flags; + struct { + unsigned Valid :1; + unsigned ShareCount :3; + unsigned Win32Protection :11; + unsigned Shared :1; + unsigned Node :6; + unsigned Locked :1; + unsigned LargePage :1; + }; +} PSAPI_WORKING_SET_EX_BLOCK; +#endif + +#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION +typedef struct _PSAPI_WORKING_SET_EX_INFORMATION { + PVOID VirtualAddress; + PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes; +} PSAPI_WORKING_SET_EX_INFORMATION; +#endif + +#ifndef HAVE_PROCESSOR_NUMBER +typedef struct _PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; +#endif + +/* Function pointers */ + +typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void); +static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc; + +static unsigned long nr_processor_groups = 1; +static unsigned long max_numanode_index = 0; + +typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD); +static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc; + +typedef DWORD (WINAPI *PFN_GETCURRENTPROCESSORNUMBER)(void); +static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc; + +typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER); +static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc; + +typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength); +static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc; + +typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength); +static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc; + +typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity); +static PFN_SETTHREADGROUPAFFINITY SetThreadGroupAffinityProc; + +typedef BOOL (WINAPI *PFN_GETTHREADGROUPAFFINITY)(HANDLE hThread, PGROUP_AFFINITY GroupAffinity); +static PFN_GETTHREADGROUPAFFINITY GetThreadGroupAffinityProc; + +typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODE)(UCHAR Node, PULONGLONG AvailableBytes); +static PFN_GETNUMAAVAILABLEMEMORYNODE GetNumaAvailableMemoryNodeProc; + +typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODEEX)(USHORT Node, PULONGLONG AvailableBytes); +static PFN_GETNUMAAVAILABLEMEMORYNODEEX GetNumaAvailableMemoryNodeExProc; + +typedef LPVOID (WINAPI *PFN_VIRTUALALLOCEXNUMA)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred); +static PFN_VIRTUALALLOCEXNUMA VirtualAllocExNumaProc; + +typedef BOOL (WINAPI *PFN_VIRTUALFREEEX)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType); +static PFN_VIRTUALFREEEX VirtualFreeExProc; + +typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb); +static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc; + +static void hwloc_win_get_function_ptrs(void) +{ + HMODULE kernel32; + + kernel32 = LoadLibrary("kernel32.dll"); + if (kernel32) { + GetActiveProcessorGroupCountProc = + (PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount"); + GetActiveProcessorCountProc = + (PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount"); + GetLogicalProcessorInformationProc = + (PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation"); + GetCurrentProcessorNumberProc = + (PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber"); + GetCurrentProcessorNumberExProc = + (PFN_GETCURRENTPROCESSORNUMBEREX) GetProcAddress(kernel32, "GetCurrentProcessorNumberEx"); + SetThreadGroupAffinityProc = + (PFN_SETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "SetThreadGroupAffinity"); + GetThreadGroupAffinityProc = + (PFN_GETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "GetThreadGroupAffinity"); + GetNumaAvailableMemoryNodeProc = + (PFN_GETNUMAAVAILABLEMEMORYNODE) GetProcAddress(kernel32, "GetNumaAvailableMemoryNode"); + GetNumaAvailableMemoryNodeExProc = + (PFN_GETNUMAAVAILABLEMEMORYNODEEX) GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx"); + GetLogicalProcessorInformationExProc = + (PFN_GETLOGICALPROCESSORINFORMATIONEX)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx"); + QueryWorkingSetExProc = + (PFN_QUERYWORKINGSETEX) GetProcAddress(kernel32, "K32QueryWorkingSetEx"); + VirtualAllocExNumaProc = + (PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "VirtualAllocExNuma"); + VirtualFreeExProc = + (PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx"); + } + + if (GetActiveProcessorGroupCountProc) + nr_processor_groups = GetActiveProcessorGroupCountProc(); + + if (!QueryWorkingSetExProc) { + HMODULE psapi = LoadLibrary("psapi.dll"); + if (psapi) + QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx"); + } +} + +/* + * ULONG_PTR and DWORD_PTR are 64/32bits depending on the arch + * while bitmaps use unsigned long (always 32bits) + */ + +static void hwloc_bitmap_from_ULONG_PTR(hwloc_bitmap_t set, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_from_ulong(set, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 1, mask >> 32); +#else + hwloc_bitmap_from_ulong(set, mask); +#endif +} + +static void hwloc_bitmap_from_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_from_ith_ulong(set, 2*i, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32); +#else + hwloc_bitmap_from_ith_ulong(set, i, mask); +#endif +} + +static void hwloc_bitmap_set_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask) +{ +#if SIZEOF_VOID_P == 8 + hwloc_bitmap_set_ith_ulong(set, 2*i, mask & 0xffffffff); + hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32); +#else + hwloc_bitmap_set_ith_ulong(set, i, mask); +#endif +} + +static ULONG_PTR hwloc_bitmap_to_ULONG_PTR(hwloc_const_bitmap_t set) +{ +#if SIZEOF_VOID_P == 8 + ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 1); + up <<= 32; + up |= hwloc_bitmap_to_ulong(set); + return up; +#else + return hwloc_bitmap_to_ulong(set); +#endif +} + +static ULONG_PTR hwloc_bitmap_to_ith_ULONG_PTR(hwloc_const_bitmap_t set, unsigned i) +{ +#if SIZEOF_VOID_P == 8 + ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 2*i+1); + up <<= 32; + up |= hwloc_bitmap_to_ith_ulong(set, 2*i); + return up; +#else + return hwloc_bitmap_to_ith_ulong(set, i); +#endif +} + +/* convert set into index+mask if all set bits are in the same ULONG. + * otherwise return -1. + */ +static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask) +{ + unsigned first_ulp, last_ulp; + if (hwloc_bitmap_weight(set) == -1) + return -1; + first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8); + last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8); + if (first_ulp != last_ulp) + return -1; + *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp); + *index = first_ulp; + return 0; +} + +/************************************************************** + * hwloc PU numbering with respect to Windows processor groups + * + * Everywhere below we reserve 64 physical indexes per processor groups because that's + * the maximum (MAXIMUM_PROC_PER_GROUP). Windows may actually use less bits than that + * in some groups (either to avoid splitting NUMA nodes across groups, or because of OS + * tweaks such as "bcdedit /set groupsize 8") but we keep some unused indexes for simplicity. + * That means PU physical indexes and cpusets may be non-contigous. + * That also means hwloc_fallback_nbprocessors() below must return the last PU index + 1 + * instead the actual number of processors. + */ + +/******************** + * last_cpu_location + */ + +static int +hwloc_win_get_thisthread_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + assert(GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1)); + + if (nr_processor_groups > 1 || !GetCurrentProcessorNumberProc) { + PROCESSOR_NUMBER num; + GetCurrentProcessorNumberExProc(&num); + hwloc_bitmap_from_ith_ULONG_PTR(set, num.Group, ((ULONG_PTR)1) << num.Number); + return 0; + } + + hwloc_bitmap_from_ith_ULONG_PTR(set, 0, ((ULONG_PTR)1) << GetCurrentProcessorNumberProc()); + return 0; +} + +/* TODO: hwloc_win_get_thisproc_last_cpu_location() using + * CreateToolhelp32Snapshot(), Thread32First/Next() + * th.th32OwnerProcessID == GetCurrentProcessId() for filtering within process + * OpenThread(THREAD_SET_INFORMATION|THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID) to get a handle. + */ + + +/****************************** + * set cpu/membind for threads + */ + +/* TODO: SetThreadIdealProcessor{,Ex} */ + +static int +hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR mask; + unsigned group; + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + if (hwloc_bitmap_to_single_ULONG_PTR(hwloc_set, &group, &mask) < 0) { + errno = ENOSYS; + return -1; + } + + assert(nr_processor_groups == 1 || SetThreadGroupAffinityProc); + + if (nr_processor_groups > 1) { + GROUP_AFFINITY aff; + memset(&aff, 0, sizeof(aff)); /* we get Invalid Parameter error if Reserved field isn't cleared */ + aff.Group = group; + aff.Mask = mask; + if (!SetThreadGroupAffinityProc(thread, &aff, NULL)) + return -1; + + } else { + /* SetThreadAffinityMask() only changes the mask inside the current processor group */ + /* The resulting binding is always strict */ + if (!SetThreadAffinityMask(thread, mask)) + return -1; + } + return 0; +} + +static int +hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) +{ + return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags); +} + +static int +hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + int ret; + hwloc_const_cpuset_t cpuset; + hwloc_cpuset_t _cpuset = NULL; + + if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) + || flags & HWLOC_MEMBIND_NOCPUBIND) { + errno = ENOSYS; + return -1; + } + + if (policy == HWLOC_MEMBIND_DEFAULT) { + cpuset = hwloc_topology_get_complete_cpuset(topology); + } else { + cpuset = _cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset); + } + + ret = hwloc_win_set_thisthread_cpubind(topology, cpuset, + (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0); + hwloc_bitmap_free(_cpuset); + return ret; +} + + +/****************************** + * get cpu/membind for threads + */ + +static int +hwloc_win_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + GROUP_AFFINITY aff; + + assert(GetThreadGroupAffinityProc); + + if (!GetThreadGroupAffinityProc(thread, &aff)) + return -1; + hwloc_bitmap_from_ith_ULONG_PTR(set, aff.Group, aff.Mask); + return 0; +} + +static int +hwloc_win_get_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused) +{ + return hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), set, flags); +} + +static int +hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags); + if (!ret) { + *policy = HWLOC_MEMBIND_BIND; + hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); + } + hwloc_bitmap_free(cpuset); + return ret; +} + + +/******************************** + * set cpu/membind for processes + */ + +static int +hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR mask; + + assert(nr_processor_groups == 1); + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + /* TODO: SetThreadGroupAffinity() for all threads doesn't enforce the whole process affinity, + * maybe because of process-specific resource locality */ + /* TODO: if we are in a single group (check with GetProcessGroupAffinity()), + * SetProcessAffinityMask() changes the binding within that same group. + */ + /* TODO: NtSetInformationProcess() works very well for binding to any mask in a single group, + * but it's an internal routine. + */ + /* TODO: checks whether hwloc-bind.c needs to pass INHERIT_PARENT_AFFINITY to CreateProcess() instead of execvp(). */ + + /* The resulting binding is always strict */ + mask = hwloc_bitmap_to_ULONG_PTR(hwloc_set); + if (!SetProcessAffinityMask(proc, mask)) + return -1; + return 0; +} + +static int +hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) +{ + return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags); +} + +static int +hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + int ret; + hwloc_const_cpuset_t cpuset; + hwloc_cpuset_t _cpuset = NULL; + + if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND) + || flags & HWLOC_MEMBIND_NOCPUBIND) { + errno = ENOSYS; + return -1; + } + + if (policy == HWLOC_MEMBIND_DEFAULT) { + cpuset = hwloc_topology_get_complete_cpuset(topology); + } else { + cpuset = _cpuset = hwloc_bitmap_alloc(); + hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset); + } + + ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset, + (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0); + hwloc_bitmap_free(_cpuset); + return ret; +} + +static int +hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); +} + + +/******************************** + * get cpu/membind for processes + */ + +static int +hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags) +{ + DWORD_PTR proc_mask, sys_mask; + + assert(nr_processor_groups == 1); + + if (flags & HWLOC_CPUBIND_NOMEMBIND) { + errno = ENOSYS; + return -1; + } + + /* TODO: if we are in a single group (check with GetProcessGroupAffinity()), + * GetProcessAffinityMask() gives the mask within that group. + */ + /* TODO: if we are in multiple groups, GetProcessGroupAffinity() gives their IDs, + * but we don't know their masks. + */ + /* TODO: GetThreadGroupAffinity() for all threads can be smaller than the whole process affinity, + * maybe because of process-specific resource locality. + */ + + if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask)) + return -1; + hwloc_bitmap_from_ULONG_PTR(hwloc_set, proc_mask); + return 0; +} + +static int +hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + int ret; + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, + (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0); + if (!ret) { + *policy = HWLOC_MEMBIND_BIND; + hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); + } + hwloc_bitmap_free(cpuset); + return ret; +} + +static int +hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags) +{ + return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags); +} + +static int +hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) +{ + return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags); +} + + +/************************ + * membind alloc/free + */ + +static void * +hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) { + return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static void * +hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { + int node; + + switch (policy) { + case HWLOC_MEMBIND_DEFAULT: + case HWLOC_MEMBIND_BIND: + break; + default: + errno = ENOSYS; + return hwloc_alloc_or_fail(topology, len, flags); + } + + if (flags & HWLOC_MEMBIND_STRICT) { + errno = ENOSYS; + return NULL; + } + + if (policy == HWLOC_MEMBIND_DEFAULT + || hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) + return hwloc_win_alloc(topology, len); + + if (hwloc_bitmap_weight(nodeset) != 1) { + /* Not a single node, can't do this */ + errno = EXDEV; + return hwloc_alloc_or_fail(topology, len, flags); + } + + node = hwloc_bitmap_first(nodeset); + return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node); +} + +static int +hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) { + if (!addr) + return 0; + if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE)) + return -1; + return 0; +} + + +/********************** + * membind for areas + */ + +static int +hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags __hwloc_attribute_unused) +{ + SYSTEM_INFO SystemInfo; + DWORD page_size; + uintptr_t start; + unsigned nb; + PSAPI_WORKING_SET_EX_INFORMATION *pv; + unsigned i; + + GetSystemInfo(&SystemInfo); + page_size = SystemInfo.dwPageSize; + + start = (((uintptr_t) addr) / page_size) * page_size; + nb = (unsigned)((((uintptr_t) addr + len - start) + page_size - 1) / page_size); + + if (!nb) + nb = 1; + + pv = calloc(nb, sizeof(*pv)); + if (!pv) + return -1; + + for (i = 0; i < nb; i++) + pv[i].VirtualAddress = (void*) (start + i * page_size); + if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) { + free(pv); + return -1; + } + + for (i = 0; i < nb; i++) { + if (pv[i].VirtualAttributes.Valid) + hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node); + } + + free(pv); + return 0; +} + + +/************************* + * discovery + */ + +static int +hwloc_look_windows(struct hwloc_backend *backend) +{ + struct hwloc_topology *topology = backend->topology; + hwloc_bitmap_t groups_pu_set = NULL; + SYSTEM_INFO SystemInfo; + DWORD length; + int gotnuma = 0; + int gotnumamemory = 0; + + if (topology->levels[0][0]->cpuset) + /* somebody discovered things */ + return -1; + + hwloc_alloc_root_sets(topology->levels[0][0]); + + GetSystemInfo(&SystemInfo); + + if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo; + unsigned id; + unsigned i; + struct hwloc_obj *obj; + hwloc_obj_type_t type; + + length = 0; + procInfo = NULL; + + while (1) { + if (GetLogicalProcessorInformationProc(procInfo, &length)) + break; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return -1; + tmpprocInfo = realloc(procInfo, length); + if (!tmpprocInfo) { + free(procInfo); + goto out; + } + procInfo = tmpprocInfo; + } + + assert(!length || procInfo); + + for (i = 0; i < length / sizeof(*procInfo); i++) { + + /* Ignore unknown caches */ + if (procInfo->Relationship == RelationCache + && procInfo->Cache.Type != CacheUnified + && procInfo->Cache.Type != CacheData + && procInfo->Cache.Type != CacheInstruction) + continue; + + id = HWLOC_UNKNOWN_INDEX; + switch (procInfo[i].Relationship) { + case RelationNumaNode: + type = HWLOC_OBJ_NUMANODE; + id = procInfo[i].NumaNode.NodeNumber; + gotnuma++; + if (id > max_numanode_index) + max_numanode_index = id; + break; + case RelationProcessorPackage: + type = HWLOC_OBJ_PACKAGE; + break; + case RelationCache: + type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1; + break; + case RelationProcessorCore: + type = HWLOC_OBJ_CORE; + break; + case RelationGroup: + default: + type = HWLOC_OBJ_GROUP; + break; + } + + if (!hwloc_filter_check_keep_object_type(topology, type)) + continue; + + obj = hwloc_alloc_setup_object(topology, type, id); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_debug("%s#%u mask %llx\n", hwloc_obj_type_string(type), id, (unsigned long long) procInfo[i].ProcessorMask); + /* ProcessorMask is a ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask); + hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); + + switch (type) { + case HWLOC_OBJ_NUMANODE: + { + ULONGLONG avail; + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, id); + if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) + || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) { + obj->attr->numanode.local_memory = avail; + gotnumamemory++; + } + obj->attr->numanode.page_types_len = 2; + obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types)); + memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types)); + obj->attr->numanode.page_types_len = 1; + obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize; +#if HAVE_DECL__SC_LARGE_PAGESIZE + obj->attr->numanode.page_types_len++; + obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); +#endif + break; + } + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + obj->attr->cache.size = procInfo[i].Cache.Size; + obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ; + obj->attr->cache.linesize = procInfo[i].Cache.LineSize; + obj->attr->cache.depth = procInfo[i].Cache.Level; + switch (procInfo->Cache.Type) { + case CacheUnified: + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + break; + case CacheData: + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + break; + case CacheInstruction: + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + break; + default: + hwloc_free_unlinked_object(obj); + continue; + } + break; + case HWLOC_OBJ_GROUP: + obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN; + break; + default: + break; + } + hwloc_insert_object_by_cpuset(topology, obj); + } + + free(procInfo); + } + + if (GetLogicalProcessorInformationExProc) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo; + unsigned id; + struct hwloc_obj *obj; + hwloc_obj_type_t type; + + length = 0; + procInfoTotal = NULL; + + while (1) { + if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length)) + break; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return -1; + tmpprocInfoTotal = realloc(procInfoTotal, length); + if (!tmpprocInfoTotal) { + free(procInfoTotal); + goto out; + } + procInfoTotal = tmpprocInfoTotal; + } + + for (procInfo = procInfoTotal; + (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); + procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { + unsigned num, i; + GROUP_AFFINITY *GroupMask; + + /* Ignore unknown caches */ + if (procInfo->Relationship == RelationCache + && procInfo->Cache.Type != CacheUnified + && procInfo->Cache.Type != CacheData + && procInfo->Cache.Type != CacheInstruction) + continue; + + id = HWLOC_UNKNOWN_INDEX; + switch (procInfo->Relationship) { + case RelationNumaNode: + type = HWLOC_OBJ_NUMANODE; + num = 1; + GroupMask = &procInfo->NumaNode.GroupMask; + id = procInfo->NumaNode.NodeNumber; + gotnuma++; + if (id > max_numanode_index) + max_numanode_index = id; + break; + case RelationProcessorPackage: + type = HWLOC_OBJ_PACKAGE; + num = procInfo->Processor.GroupCount; + GroupMask = procInfo->Processor.GroupMask; + break; + case RelationCache: + type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1; + num = 1; + GroupMask = &procInfo->Cache.GroupMask; + break; + case RelationProcessorCore: + type = HWLOC_OBJ_CORE; + num = procInfo->Processor.GroupCount; + GroupMask = procInfo->Processor.GroupMask; + break; + case RelationGroup: + /* So strange an interface... */ + for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) { + KAFFINITY mask; + hwloc_bitmap_t set; + + set = hwloc_bitmap_alloc(); + mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask; + hwloc_debug("group %u %d cpus mask %lx\n", id, + procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask); + /* KAFFINITY is ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask); + /* FIXME: what if running 32bits on a 64bits windows with 64-processor groups? + * ULONG_PTR is 32bits, so half the group is invisible? + * maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned? + */ + hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set); + + /* save the set of PUs so that we can create them at the end */ + if (!groups_pu_set) + groups_pu_set = hwloc_bitmap_alloc(); + hwloc_bitmap_or(groups_pu_set, groups_pu_set, set); + + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); + obj->cpuset = set; + obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP; + hwloc_insert_object_by_cpuset(topology, obj); + } else + hwloc_bitmap_free(set); + } + continue; + default: + /* Don't know how to get the mask. */ + hwloc_debug("unknown relation %d\n", procInfo->Relationship); + continue; + } + + if (!hwloc_filter_check_keep_object_type(topology, type)) + continue; + + obj = hwloc_alloc_setup_object(topology, type, id); + obj->cpuset = hwloc_bitmap_alloc(); + for (i = 0; i < num; i++) { + hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_obj_type_string(type), id, i, GroupMask[i].Group, GroupMask[i].Mask); + /* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask); + /* FIXME: scale id to id*8/sizeof(ULONG_PTR) as above? */ + } + hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); + switch (type) { + case HWLOC_OBJ_NUMANODE: + { + ULONGLONG avail; + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, id); + if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) + || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) { + obj->attr->numanode.local_memory = avail; + gotnumamemory++; + } + obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types)); + memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types)); + obj->attr->numanode.page_types_len = 1; + obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize; +#if HAVE_DECL__SC_LARGE_PAGESIZE + obj->attr->numanode.page_types_len++; + obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); +#endif + break; + } + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + obj->attr->cache.size = procInfo->Cache.CacheSize; + obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ; + obj->attr->cache.linesize = procInfo->Cache.LineSize; + obj->attr->cache.depth = procInfo->Cache.Level; + switch (procInfo->Cache.Type) { + case CacheUnified: + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + break; + case CacheData: + obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; + break; + case CacheInstruction: + obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; + break; + default: + hwloc_free_unlinked_object(obj); + continue; + } + break; + default: + break; + } + hwloc_insert_object_by_cpuset(topology, obj); + } + free(procInfoTotal); + } + + topology->support.discovery->pu = 1; + topology->support.discovery->numa = gotnuma; + topology->support.discovery->numa_memory = gotnumamemory; + + if (groups_pu_set) { + /* the system supports multiple Groups. + * PU indexes may be discontiguous, especially if Groups contain less than 64 procs. + */ + hwloc_obj_t obj; + unsigned idx; + hwloc_bitmap_foreach_begin(idx, groups_pu_set) { + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, idx); + hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", + idx, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } hwloc_bitmap_foreach_end(); + hwloc_bitmap_free(groups_pu_set); + } else { + /* no processor groups */ + SYSTEM_INFO sysinfo; + hwloc_obj_t obj; + unsigned idx; + GetSystemInfo(&sysinfo); + for(idx=0; idx<32; idx++) + if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<<idx)) { + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, idx); + hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", + idx, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + } + + out: + hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); + hwloc_add_uname_info(topology, NULL); + return 0; +} + +void +hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks, + struct hwloc_topology_support *support) +{ + if (GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1)) + hooks->get_thisthread_last_cpu_location = hwloc_win_get_thisthread_last_cpu_location; + + if (nr_processor_groups == 1) { + hooks->set_proc_cpubind = hwloc_win_set_proc_cpubind; + hooks->get_proc_cpubind = hwloc_win_get_proc_cpubind; + hooks->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind; + hooks->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind; + hooks->set_proc_membind = hwloc_win_set_proc_membind; + hooks->get_proc_membind = hwloc_win_get_proc_membind; + hooks->set_thisproc_membind = hwloc_win_set_thisproc_membind; + hooks->get_thisproc_membind = hwloc_win_get_thisproc_membind; + } + if (nr_processor_groups == 1 || SetThreadGroupAffinityProc) { + hooks->set_thread_cpubind = hwloc_win_set_thread_cpubind; + hooks->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind; + hooks->set_thisthread_membind = hwloc_win_set_thisthread_membind; + } + if (GetThreadGroupAffinityProc) { + hooks->get_thread_cpubind = hwloc_win_get_thread_cpubind; + hooks->get_thisthread_cpubind = hwloc_win_get_thisthread_cpubind; + hooks->get_thisthread_membind = hwloc_win_get_thisthread_membind; + } + + if (VirtualAllocExNumaProc) { + hooks->alloc_membind = hwloc_win_alloc_membind; + hooks->alloc = hwloc_win_alloc; + hooks->free_membind = hwloc_win_free_membind; + support->membind->bind_membind = 1; + } + + if (QueryWorkingSetExProc && max_numanode_index <= 63 /* PSAPI_WORKING_SET_EX_BLOCK.Node is 6 bits only */) + hooks->get_area_memlocation = hwloc_win_get_area_memlocation; +} + +static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused) +{ + hwloc_win_get_function_ptrs(); + return 0; +} + +static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused) +{ +} + +static struct hwloc_backend * +hwloc_windows_component_instantiate(struct hwloc_disc_component *component, + const void *_data1 __hwloc_attribute_unused, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + backend = hwloc_backend_alloc(component); + if (!backend) + return NULL; + backend->discover = hwloc_look_windows; + return backend; +} + +static struct hwloc_disc_component hwloc_windows_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_CPU, + "windows", + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + hwloc_windows_component_instantiate, + 50, + 1, + NULL +}; + +const struct hwloc_component hwloc_windows_component = { + HWLOC_COMPONENT_ABI, + hwloc_windows_component_init, hwloc_windows_component_finalize, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_windows_disc_component +}; + +int +hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { + int n; + SYSTEM_INFO sysinfo; + + /* by default, ignore groups (return only the number in the current group) */ + GetSystemInfo(&sysinfo); + n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */ + + if (nr_processor_groups > 1) { + /* assume n-1 groups are complete, since that's how we store things in cpusets */ + if (GetActiveProcessorCountProc) + n = MAXIMUM_PROC_PER_GROUP*(nr_processor_groups-1) + + GetActiveProcessorCountProc((WORD)nr_processor_groups-1); + else + n = MAXIMUM_PROC_PER_GROUP*nr_processor_groups; + } + + return n; +} diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c new file mode 100644 index 000000000..4aefdcf1f --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -0,0 +1,1583 @@ +/* + * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2013 Université Bordeaux + * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + * + * + * This backend is only used when the operating system does not export + * the necessary hardware topology information to user-space applications. + * Currently, only the FreeBSD backend relies on this x86 backend. + * + * Other backends such as Linux have their own way to retrieve various + * pieces of hardware topology information from the operating system + * on various architectures, without having to use this x86-specific code. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/debug.h> +#include <private/misc.h> + +#include <private/cpuid-x86.h> + +#include <sys/types.h> +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#endif +#ifdef HAVE_VALGRIND_VALGRIND_H +#include <valgrind/valgrind.h> +#endif + +struct hwloc_x86_backend_data_s { + unsigned nbprocs; + hwloc_bitmap_t apicid_set; + int apicid_unique; + char *src_cpuiddump_path; + int is_knl; +}; + +/************************************ + * Management of cpuid dump as input + */ + +struct cpuiddump { + unsigned nr; + struct cpuiddump_entry { + unsigned inmask; /* which of ine[abcd]x are set on input */ + unsigned ineax; + unsigned inebx; + unsigned inecx; + unsigned inedx; + unsigned outeax; + unsigned outebx; + unsigned outecx; + unsigned outedx; + } *entries; +}; + +static void +cpuiddump_free(struct cpuiddump *cpuiddump) +{ + if (cpuiddump->nr) + free(cpuiddump->entries); + free(cpuiddump); +} + +static struct cpuiddump * +cpuiddump_read(const char *dirpath, unsigned idx) +{ + struct cpuiddump *cpuiddump; + struct cpuiddump_entry *cur; + FILE *file; + char line[128]; + unsigned nr; + + cpuiddump = malloc(sizeof(*cpuiddump)); + if (!cpuiddump) { + fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx); + goto out; + } + + { + size_t filenamelen = strlen(dirpath) + 15; + HWLOC_VLA(char, filename, filenamelen); + snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx); + file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); + goto out_with_dump; + } + } + + nr = 0; + while (fgets(line, sizeof(line), file)) + nr++; + cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry)); + if (!cpuiddump->entries) { + fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx); + goto out_with_file; + } + + fseek(file, 0, SEEK_SET); + cur = &cpuiddump->entries[0]; + nr = 0; + while (fgets(line, sizeof(line), file)) { + if (*line == '#') + continue; + if (sscanf(line, "%x %x %x %x %x => %x %x %x %x", + &cur->inmask, + &cur->ineax, &cur->inebx, &cur->inecx, &cur->inedx, + &cur->outeax, &cur->outebx, &cur->outecx, &cur->outedx) == 9) { + cur++; + nr++; + } + } + + cpuiddump->nr = nr; + fclose(file); + return cpuiddump; + + out_with_file: + fclose(file); + out_with_dump: + free(cpuiddump); + out: + return NULL; +} + +static void +cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *cpuiddump) +{ + unsigned i; + + for(i=0; i<cpuiddump->nr; i++) { + struct cpuiddump_entry *entry = &cpuiddump->entries[i]; + if ((entry->inmask & 0x1) && *eax != entry->ineax) + continue; + if ((entry->inmask & 0x2) && *ebx != entry->inebx) + continue; + if ((entry->inmask & 0x4) && *ecx != entry->inecx) + continue; + if ((entry->inmask & 0x8) && *edx != entry->inedx) + continue; + *eax = entry->outeax; + *ebx = entry->outebx; + *ecx = entry->outecx; + *edx = entry->outedx; + return; + } + + fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n", + *eax, *ebx, *ecx, *edx); + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; +} + +static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *src_cpuiddump) +{ + if (src_cpuiddump) { + cpuiddump_find_by_input(eax, ebx, ecx, edx, src_cpuiddump); + } else { + hwloc_x86_cpuid(eax, ebx, ecx, edx); + } +} + +/******************************* + * Core detection routines and structures + */ + +#define has_topoext(features) ((features)[6] & (1 << 22)) +#define has_x2apic(features) ((features)[4] & (1 << 21)) + +struct cacheinfo { + hwloc_obj_cache_type_t type; + unsigned level; + unsigned nbthreads_sharing; + unsigned cacheid; + + unsigned linesize; + unsigned linepart; + int inclusive; + int ways; + unsigned sets; + unsigned long size; +}; + +struct procinfo { + unsigned present; + unsigned apicid; + unsigned packageid; + unsigned dieid; + unsigned nodeid; + unsigned unitid; + unsigned threadid; + unsigned coreid; + unsigned *otherids; + unsigned levels; + unsigned numcaches; + struct cacheinfo *cache; + char cpuvendor[13]; + char cpumodel[3*4*4+1]; + unsigned cpustepping; + unsigned cpumodelnumber; + unsigned cpufamilynumber; +}; + +enum cpuid_type { + intel, + amd, + zhaoxin, + hygon, + unknown +}; + +static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid) +{ + struct cacheinfo *cache, *tmpcaches; + unsigned cachenum; + unsigned long size = 0; + + if (level == 1) + size = ((cpuid >> 24)) << 10; + else if (level == 2) + size = ((cpuid >> 16)) << 10; + else if (level == 3) + size = ((cpuid >> 18)) << 19; + if (!size) + return; + + tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache)); + if (!tmpcaches) + /* failed to allocated, ignore that cache */ + return; + infos->cache = tmpcaches; + cachenum = infos->numcaches++; + + cache = &infos->cache[cachenum]; + + cache->type = type; + cache->level = level; + cache->nbthreads_sharing = nbthreads_sharing; + cache->linesize = cpuid & 0xff; + cache->linepart = 0; + cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */ + + if (level == 1) { + cache->ways = (cpuid >> 16) & 0xff; + if (cache->ways == 0xff) + /* Fully associative */ + cache->ways = -1; + } else { + static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 }; + unsigned ways = (cpuid >> 12) & 0xf; + cache->ways = ways_tab[ways]; + } + cache->size = size; + cache->sets = 0; + + hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); +} + +static void look_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) +{ + unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; + unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ + unsigned eax, ebx, ecx = 0, edx; + int apic_packageshift = 0; + + for (level = 0; ; level++) { + ecx = level; + eax = leaf; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (!eax && !ebx) + break; + apic_packageshift = eax & 0x1f; + } + + if (level) { + infos->otherids = malloc(level * sizeof(*infos->otherids)); + if (infos->otherids) { + infos->levels = level; + for (level = 0; ; level++) { + ecx = level; + eax = leaf; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (!eax && !ebx) + break; + apic_nextshift = eax & 0x1f; + apic_number = ebx & 0xffff; + apic_type = (ecx & 0xff00) >> 8; + apic_id = edx; + id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1); + hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id); + infos->apicid = apic_id; + infos->otherids[level] = UINT_MAX; + switch (apic_type) { + case 1: + threadid = id; + /* apic_number is the actual number of threads per core */ + break; + case 2: + infos->coreid = id; + /* apic_number is the actual number of threads per module */ + break; + case 5: + infos->dieid = id; + /* apic_number is the actual number of threads per package */ + break; + default: + hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type); + infos->otherids[level] = apic_id >> apic_shift; + break; + } + apic_shift = apic_nextshift; + } + infos->apicid = apic_id; + infos->packageid = apic_id >> apic_shift; + hwloc_debug("x2APIC remainder: %u\n", infos->packageid); + hwloc_debug("this is thread %u of core %u\n", threadid, infos->coreid); + } + } +} + +/* Fetch information from the processor itself thanks to cpuid and store it in + * infos for summarize to analyze them globally */ +static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) +{ + struct hwloc_x86_backend_data_s *data = backend->private_data; + unsigned eax, ebx, ecx = 0, edx; + unsigned cachenum; + struct cacheinfo *cache; + unsigned regs[4]; + unsigned legacy_max_log_proc; /* not valid on Intel processors with > 256 threads, or when cpuid 0x80000008 is supported */ + unsigned legacy_log_proc_id; + unsigned _model, _extendedmodel, _family, _extendedfamily; + + infos->present = 1; + + /* Get apicid, legacy_max_log_proc, packageid, legacy_log_proc_id from cpuid 0x01 */ + eax = 0x01; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->apicid = ebx >> 24; + if (edx & (1 << 28)) + legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1); + else + legacy_max_log_proc = 1; + hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc); + infos->packageid = infos->apicid / legacy_max_log_proc; + legacy_log_proc_id = infos->apicid % legacy_max_log_proc; + hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id); + + /* Get cpu model/family/stepping numbers from same cpuid */ + _model = (eax>>4) & 0xf; + _extendedmodel = (eax>>16) & 0xf; + _family = (eax>>8) & 0xf; + _extendedfamily = (eax>>20) & 0xff; + if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == hygon) && _family == 0xf) { + infos->cpufamilynumber = _family + _extendedfamily; + } else { + infos->cpufamilynumber = _family; + } + if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf)) + || ((cpuid_type == amd || cpuid_type == hygon) && _family == 0xf) + || (cpuid_type == zhaoxin && (_family == 0x6 || _family == 0x7))) { + infos->cpumodelnumber = _model + (_extendedmodel << 4); + } else { + infos->cpumodelnumber = _model; + } + infos->cpustepping = eax & 0xf; + + if (cpuid_type == intel && infos->cpufamilynumber == 0x6 && + (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85)) + data->is_knl = 1; /* KNM is the same as KNL */ + + /* Get cpu vendor string from cpuid 0x00 */ + memset(regs, 0, sizeof(regs)); + regs[0] = 0; + cpuid_or_from_dump(®s[0], ®s[1], ®s[3], ®s[2], src_cpuiddump); + memcpy(infos->cpuvendor, regs+1, 4*3); + /* infos was calloc'ed, already ends with \0 */ + + /* Get cpu model string from cpuid 0x80000002-4 */ + if (highest_ext_cpuid >= 0x80000004) { + memset(regs, 0, sizeof(regs)); + regs[0] = 0x80000002; + cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump); + memcpy(infos->cpumodel, regs, 4*4); + regs[0] = 0x80000003; + cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump); + memcpy(infos->cpumodel + 4*4, regs, 4*4); + regs[0] = 0x80000004; + cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump); + memcpy(infos->cpumodel + 4*4*2, regs, 4*4); + /* infos was calloc'ed, already ends with \0 */ + } + + /* Get core/thread information from cpuid 0x80000008 + * (not supported on Intel) + */ + if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) { + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned coreidsize; + unsigned logprocid; + eax = 0x80000008; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + coreidsize = (ecx >> 12) & 0xf; + hwloc_debug("core ID size: %u\n", coreidsize); + if (!coreidsize) { + max_nbcores = (ecx & 0xff) + 1; + } else + max_nbcores = 1 << coreidsize; + hwloc_debug("Thus max # of cores: %u\n", max_nbcores); + /* Still no multithreaded AMD */ + max_nbthreads = 1 ; + hwloc_debug("and max # of threads: %u\n", max_nbthreads); + /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores, + * which is the maximum number of cores that the processor could theoretically support + * (see "Multiple Core Calculation" in the AMD CPUID specification). + * Recompute packageid/threadid/coreid accordingly. + */ + infos->packageid = infos->apicid / max_nbcores; + logprocid = infos->apicid % max_nbcores; + infos->threadid = logprocid % max_nbthreads; + infos->coreid = logprocid / max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); + } + + infos->numcaches = 0; + infos->cache = NULL; + + /* Get apicid, nodeid, unitid from cpuid 0x8000001e + * and cache information from cpuid 0x8000001d + * (AMD topology extension) + */ + if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) { + unsigned apic_id, node_id, nodes_per_proc; + + /* the code below doesn't want any other cache yet */ + assert(!infos->numcaches); + + eax = 0x8000001e; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->apicid = apic_id = eax; + + if (infos->cpufamilynumber == 0x16) { + /* ecx is reserved */ + node_id = 0; + nodes_per_proc = 1; + } else { + /* AMD other families or Hygon family 18h */ + node_id = ecx & 0xff; + nodes_per_proc = ((ecx >> 8) & 7) + 1; + } + infos->nodeid = node_id; + if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) + || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { + hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); + } + + if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ + unsigned unit_id, cores_per_unit; + infos->unitid = unit_id = ebx & 0xff; + cores_per_unit = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id); + /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). + * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively). + * It's not clear if we should do this as well. + */ + } else { + unsigned core_id, threads_per_core; + infos->coreid = core_id = ebx & 0xff; + threads_per_core = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id); + } + + for (cachenum = 0; ; cachenum++) { + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if ((eax & 0x1f) == 0) + break; + infos->numcaches++; + } + + cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); + if (cache) { + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x8000001d; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = (eax >> 5) & 0x7; + /* Note: actually number of cores */ + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + + cache++; + } + } else { + infos->numcaches = 0; + } + } else { + /* If there's no topoext, + * get cache information from cpuid 0x80000005 and 0x80000006 + * (not supported on Intel) + */ + if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) { + eax = 0x80000005; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */ + fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */ + } + if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) { + eax = 0x80000006; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + if (ecx & 0xf000) + /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11. + * Could be useful if some Intels (at least before Core micro-architecture) + * support this leaf without leaf 0x4. + */ + fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */ + if (edx & 0xf000) + fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */ + } + } + + /* Get thread/core + cache information from cpuid 0x04 + * (not supported on AMD) + */ + if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) { + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned level; + struct cacheinfo *tmpcaches; + unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ + + for (cachenum = 0; ; cachenum++) { + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f); + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + infos->numcaches++; + + if (!cachenum) { + /* by the way, get thread/core information from the first cache */ + max_nbcores = ((eax >> 26) & 0x3f) + 1; + max_nbthreads = legacy_max_log_proc / max_nbcores; + hwloc_debug("thus %u threads\n", max_nbthreads); + infos->threadid = legacy_log_proc_id % max_nbthreads; + infos->coreid = legacy_log_proc_id / max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); + } + } + + tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); + if (!tmpcaches) { + infos->numcaches = oldnumcaches; + } else { + infos->cache = tmpcaches; + cache = &infos->cache[oldnumcaches]; + + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; + eax = 0x04; + ecx = cachenum; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + + if ((eax & 0x1f) == 0) + break; + level = (eax >> 5) & 0x7; + if (data->is_knl && level == 3) + /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */ + break; + switch (eax & 0x1f) { + case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break; + case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break; + default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break; + } + + cache->level = level; + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + ways = ((ebx >> 22) & 0x3ff) + 1; + if (eax & (1 << 9)) + /* Fully associative */ + cache->ways = -1; + else + cache->ways = ways; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + cache->inclusive = edx & 0x2; + + hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", + cachenum, cache->level, + cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u', + cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10); + cache++; + } + } + } + + if ((cpuid_type == intel) && highest_cpuid >= 0x1f) { + /* Get package/die/module/tile/core/thread information from cpuid 0x1f + * (Intel v2 Extended Topology Enumeration) + */ + look_exttopoenum(infos, 0x1f, src_cpuiddump); + + } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) { + /* Get package/core/thread information from cpuid 0x0b + * (Intel v1 Extended Topology Enumeration) + */ + look_exttopoenum(infos, 0x0b, src_cpuiddump); + } + + /* Now that we have all info, compute cacheids and apply quirks */ + for (cachenum = 0; cachenum < infos->numcaches; cachenum++) { + cache = &infos->cache[cachenum]; + + /* default cacheid value */ + cache->cacheid = infos->apicid / cache->nbthreads_sharing; + + if (cpuid_type == amd) { + /* AMD quirks */ + if (infos->cpufamilynumber == 0x17 + && cache->level == 3 && cache->nbthreads_sharing == 6) { + /* AMD family 0x17 always shares L3 between 8 APIC ids, + * even when only 6 APIC ids are enabled and reported in nbthreads_sharing + * (on 24-core CPUs). + */ + cache->cacheid = infos->apicid / 8; + + } else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9 + && cache->level == 3 + && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) { + /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores. + * The L3 (and its associativity) is actually split into two halves). + */ + if (cache->nbthreads_sharing == 16) + cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */ + cache->nbthreads_sharing /= 2; + cache->size /= 2; + if (cache->ways != -1) + cache->ways /= 2; + /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB.... + * among first L3 (A), second L3 (B), and unexisting cores (.). + * On multi-socket servers, L3 in non-first sockets may have APIC id ranges + * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6). + * That means, we can't just compare apicid/nbthreads_sharing to identify siblings. + */ + cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / legacy_max_log_proc); /* add 2 caches per previous package */ + + } else if (infos->cpufamilynumber == 0x15 + && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */) + && cache->level == 3 && cache->nbthreads_sharing == 6) { + /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours below, + * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here. + */ + cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ + + 2 * (infos->apicid / legacy_max_log_proc); /* add 2 cache per previous package */ + } + } else if (cpuid_type == hygon) { + if (infos->cpufamilynumber == 0x18 + && cache->level == 3 && cache->nbthreads_sharing == 6) { + /* Hygon family 0x18 always shares L3 between 8 APIC ids, + * even when only 6 APIC ids are enabled and reported in nbthreads_sharing + * (on 24-core CPUs). + */ + cache->cacheid = infos->apicid / 8; + } + } + } + + if (hwloc_bitmap_isset(data->apicid_set, infos->apicid)) + data->apicid_unique = 0; + else + hwloc_bitmap_set(data->apicid_set, infos->apicid); +} + +static void +hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int replace) +{ + char number[12]; + if (info->cpuvendor[0]) + hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUVendor", info->cpuvendor, replace); + snprintf(number, sizeof(number), "%u", info->cpufamilynumber); + hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUFamilyNumber", number, replace); + snprintf(number, sizeof(number), "%u", info->cpumodelnumber); + hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUModelNumber", number, replace); + if (info->cpumodel[0]) { + const char *c = info->cpumodel; + while (*c == ' ') + c++; + hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUModel", c, replace); + } + snprintf(number, sizeof(number), "%u", info->cpustepping); + hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUStepping", number, replace); +} + +/* Analyse information stored in infos, and build/annotate topology levels accordingly */ +static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery) +{ + struct hwloc_topology *topology = backend->topology; + struct hwloc_x86_backend_data_s *data = backend->private_data; + unsigned nbprocs = data->nbprocs; + hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc(); + unsigned i, j, l, level; + int one = -1; + hwloc_bitmap_t remaining_cpuset; + int gotnuma = 0; + + for (i = 0; i < nbprocs; i++) + if (infos[i].present) { + hwloc_bitmap_set(complete_cpuset, i); + one = i; + } + + if (one == -1) { + hwloc_bitmap_free(complete_cpuset); + return; + } + + remaining_cpuset = hwloc_bitmap_alloc(); + + /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet. + * But what if the x86 and the native backends disagree because one is buggy? Which one to trust? + * We only add missing caches, and annotate other existing objects for now. + */ + + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { + /* Look for packages */ + hwloc_obj_t package; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + if (fulldiscovery) { + unsigned packageid = infos[i].packageid; + hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc(); + + for (j = i; j < nbprocs; j++) { + if (infos[j].packageid == packageid) { + hwloc_bitmap_set(package_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + package = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, packageid); + package->cpuset = package_cpuset; + + hwloc_x86_add_cpuinfos(package, &infos[i], 0); + + hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", + packageid, package_cpuset); + hwloc_insert_object_by_cpuset(topology, package); + + } else { + /* Annotate packages previously-existing packages */ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + hwloc_bitmap_set(set, i); + package = hwloc_get_next_obj_covering_cpuset_by_type(topology, set, HWLOC_OBJ_PACKAGE, NULL); + hwloc_bitmap_free(set); + if (package) { + /* Found package above that PU, annotate if no such attribute yet */ + hwloc_x86_add_cpuinfos(package, &infos[i], 1); + hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, package->cpuset); + } else { + /* No package, annotate the root object */ + hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[i], 1); + break; + } + } + } + } + + /* Look for Numa nodes inside packages (cannot be filtered-out) */ + if (fulldiscovery && getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { + hwloc_bitmap_t node_cpuset; + hwloc_obj_t node; + + /* FIXME: if there's memory inside the root object, divide it into NUMA nodes? */ + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].packageid; + unsigned nodeid = infos[i].nodeid; + + if (nodeid == (unsigned)-1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + node_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].nodeid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) { + hwloc_bitmap_set(node_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, nodeid); + node->cpuset = node_cpuset; + node->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(node->nodeset, nodeid); + hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", + nodeid, node_cpuset); + hwloc_insert_object_by_cpuset(topology, node); + gotnuma++; + } + } + + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { + if (fulldiscovery) { + char *env; + int dont_merge; + hwloc_bitmap_t unit_cpuset, die_cpuset; + hwloc_obj_t unit, die; + + /* Look for Compute units inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].packageid; + unsigned unitid = infos[i].unitid; + + if (unitid == (unsigned)-1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + unit_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].unitid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].packageid == packageid && infos[j].unitid == unitid) { + hwloc_bitmap_set(unit_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid); + unit->cpuset = unit_cpuset; + unit->subtype = strdup("ComputeUnit"); + unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT; + hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n", + unitid, unit_cpuset); + hwloc_insert_object_by_cpuset(topology, unit); + } + + /* Look for Dies inside packages */ + env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); + dont_merge = env && atoi(env); + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].packageid; + unsigned dieid = infos[i].dieid; + + if (dieid == (unsigned)-1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + die_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].dieid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].packageid == packageid && infos[j].dieid == dieid) { + hwloc_bitmap_set(die_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, dieid); + die->cpuset = die_cpuset; + die->subtype = strdup("Die"); + die->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; + die->attr->group.dont_merge = dont_merge; + hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", + dieid, die_cpuset); + hwloc_insert_object_by_cpuset(topology, die); + } + + /* Look for unknown objects */ + if (infos[one].otherids) { + for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) { + if (infos[one].otherids[level] != UINT_MAX) { + hwloc_bitmap_t unknown_cpuset; + hwloc_obj_t unknown_obj; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned unknownid = infos[i].otherids[level]; + + unknown_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].otherids[level] == unknownid) { + hwloc_bitmap_set(unknown_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + unknown_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unknownid); + unknown_obj->cpuset = unknown_cpuset; + unknown_obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_EXTTOPOENUM_UNKNOWN; + unknown_obj->attr->group.subkind = level; + hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n", + level, unknownid, unknown_cpuset); + hwloc_insert_object_by_cpuset(topology, unknown_obj); + } + } + } + } + } + } + + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { + /* Look for cores */ + if (fulldiscovery) { + hwloc_bitmap_t core_cpuset; + hwloc_obj_t core; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].packageid; + unsigned nodeid = infos[i].nodeid; + unsigned coreid = infos[i].coreid; + + if (coreid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + core_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].coreid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) { + hwloc_bitmap_set(core_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + core = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, coreid); + core->cpuset = core_cpuset; + hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", + coreid, core_cpuset); + hwloc_insert_object_by_cpuset(topology, core); + } + } + } + + /* Look for PUs (cannot be filtered-out) */ + if (fulldiscovery) { + hwloc_debug("%s", "\n\n * CPU cpusets *\n\n"); + for (i=0; i<nbprocs; i++) + if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */ + struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, i); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, i); + hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + } + + /* Look for caches */ + /* First find max level */ + level = 0; + for (i = 0; i < nbprocs; i++) + for (j = 0; j < infos[i].numcaches; j++) + if (infos[i].cache[j].level > level) + level = infos[i].cache[j].level; + while (level > 0) { + hwloc_obj_cache_type_t type; + HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_DATA == HWLOC_OBJ_CACHE_UNIFIED+1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_INSTRUCTION == HWLOC_OBJ_CACHE_DATA+1); + for (type = HWLOC_OBJ_CACHE_UNIFIED; type <= HWLOC_OBJ_CACHE_INSTRUCTION; type++) { + /* Look for caches of that type at level level */ + hwloc_obj_type_t otype; + hwloc_obj_t cache; + + otype = hwloc_cache_type_by_depth_type(level, type); + if (otype == HWLOC_OBJ_TYPE_NONE) + continue; + if (!hwloc_filter_check_keep_object_type(topology, otype)) + continue; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + hwloc_bitmap_t puset; + + for (l = 0; l < infos[i].numcaches; l++) { + if (infos[i].cache[l].level == level && infos[i].cache[l].type == type) + break; + } + if (l == infos[i].numcaches) { + /* no cache Llevel of that type in i */ + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + puset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(puset, i); + cache = hwloc_get_next_obj_covering_cpuset_by_type(topology, puset, otype, NULL); + hwloc_bitmap_free(puset); + + if (cache) { + /* Found cache above that PU, annotate if no such attribute yet */ + if (!hwloc_obj_get_info_by_name(cache, "Inclusive")) + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); + hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset); + } else { + /* Add the missing cache */ + hwloc_bitmap_t cache_cpuset; + unsigned packageid = infos[i].packageid; + unsigned cacheid = infos[i].cache[l].cacheid; + /* Now look for others sharing it */ + cache_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + unsigned l2; + for (l2 = 0; l2 < infos[j].numcaches; l2++) { + if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type) + break; + } + if (l2 == infos[j].numcaches) { + /* no cache Llevel of that type in j */ + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) { + hwloc_bitmap_set(cache_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX); + cache->attr->cache.depth = level; + cache->attr->cache.size = infos[i].cache[l].size; + cache->attr->cache.linesize = infos[i].cache[l].linesize; + cache->attr->cache.associativity = infos[i].cache[l].ways; + cache->attr->cache.type = infos[i].cache[l].type; + cache->cpuset = cache_cpuset; + hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); + hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", + level, cacheid, cache_cpuset); + hwloc_insert_object_by_cpuset(topology, cache); + } + } + } + level--; + } + + /* FIXME: if KNL and L2 disabled, add tiles instead of L2 */ + + hwloc_bitmap_free(remaining_cpuset); + hwloc_bitmap_free(complete_cpuset); + + if (gotnuma) + topology->support.discovery->numa = 1; +} + +static int +look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery, + unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, + int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags), + int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags)) +{ + struct hwloc_x86_backend_data_s *data = backend->private_data; + struct hwloc_topology *topology = backend->topology; + unsigned nbprocs = data->nbprocs; + hwloc_bitmap_t orig_cpuset = NULL; + hwloc_bitmap_t set = NULL; + unsigned i; + + if (!data->src_cpuiddump_path) { + orig_cpuset = hwloc_bitmap_alloc(); + if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) { + hwloc_bitmap_free(orig_cpuset); + return -1; + } + set = hwloc_bitmap_alloc(); + } + + for (i = 0; i < nbprocs; i++) { + struct cpuiddump *src_cpuiddump = NULL; + if (data->src_cpuiddump_path) { + src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i); + if (!src_cpuiddump) + continue; + } else { + hwloc_bitmap_only(set, i); + hwloc_debug("binding to CPU%u\n", i); + if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) { + hwloc_debug("could not bind to CPU%u: %s\n", i, strerror(errno)); + continue; + } + } + + look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + + if (data->src_cpuiddump_path) { + cpuiddump_free(src_cpuiddump); + } + } + + if (!data->src_cpuiddump_path) { + set_cpubind(topology, orig_cpuset, 0); + hwloc_bitmap_free(set); + hwloc_bitmap_free(orig_cpuset); + } + + if (!data->apicid_unique) + fulldiscovery = 0; + else + summarize(backend, infos, fulldiscovery); + return 0; +} + +#if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID +#include <sys/param.h> +#include <sys/cpuset.h> +typedef cpusetid_t hwloc_x86_os_state_t; +static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump) +{ + if (!src_cpuiddump) { + /* temporary make all cpus available during discovery */ + cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state); + cpuset_setid(CPU_WHICH_PID, -1, 0); + } +} +static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump) +{ + if (!src_cpuiddump) { + /* restore initial cpuset */ + cpuset_setid(CPU_WHICH_PID, -1, *state); + } +} +#else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */ +typedef void * hwloc_x86_os_state_t; +static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { } +static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { } +#endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */ + +/* GenuineIntel */ +#define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24)) +#define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24)) +#define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24)) + +/* AuthenticAMD */ +#define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24)) +#define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24)) +#define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24)) + +/* HYGON "HygonGenuine" */ +#define HYGON_EBX ('H' | ('y'<<8) | ('g'<<16) | ('o'<<24)) +#define HYGON_EDX ('n' | ('G'<<8) | ('e'<<16) | ('n'<<24)) +#define HYGON_ECX ('u' | ('i'<<8) | ('n'<<16) | ('e'<<24)) + +/* (Zhaoxin) CentaurHauls */ +#define ZX_EBX ('C' | ('e'<<8) | ('n'<<16) | ('t'<<24)) +#define ZX_EDX ('a' | ('u'<<8) | ('r'<<16) | ('H'<<24)) +#define ZX_ECX ('a' | ('u'<<8) | ('l'<<16) | ('s'<<24)) +/* (Zhaoxin) Shanghai */ +#define SH_EBX (' ' | (' '<<8) | ('S'<<16) | ('h'<<24)) +#define SH_EDX ('a' | ('n'<<8) | ('g'<<16) | ('h'<<24)) +#define SH_ECX ('a' | ('i'<<8) | (' '<<16) | (' '<<24)) + +/* fake cpubind for when nbprocs=1 and no binding support */ +static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_cpuset_t set __hwloc_attribute_unused, + int flags __hwloc_attribute_unused) +{ + return 0; +} +static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_const_cpuset_t set __hwloc_attribute_unused, + int flags __hwloc_attribute_unused) +{ + return 0; +} + +static +int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery) +{ + struct hwloc_x86_backend_data_s *data = backend->private_data; + unsigned nbprocs = data->nbprocs; + unsigned eax, ebx, ecx = 0, edx; + unsigned i; + unsigned highest_cpuid; + unsigned highest_ext_cpuid; + /* This stores cpuid features with the same indexing as Linux */ + unsigned features[10] = { 0 }; + struct procinfo *infos = NULL; + enum cpuid_type cpuid_type = unknown; + hwloc_x86_os_state_t os_state; + struct hwloc_binding_hooks hooks; + struct hwloc_topology_support support; + struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused; + int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL; + int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL; + struct cpuiddump *src_cpuiddump = NULL; + int ret = -1; + + if (data->src_cpuiddump_path) { + /* just read cpuid from the dump */ + src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0); + if (!src_cpuiddump) + goto out; + + } else { + /* otherwise check if binding works */ + memset(&hooks, 0, sizeof(hooks)); + support.membind = &memsupport; + hwloc_set_native_binding_hooks(&hooks, &support); + if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) { + get_cpubind = hooks.get_thisthread_cpubind; + set_cpubind = hooks.set_thisthread_cpubind; + } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) { + /* FIXME: if called by a multithreaded program, we will restore the original process binding + * for each thread instead of their own original thread binding. + * See issue #158. + */ + get_cpubind = hooks.get_thisproc_cpubind; + set_cpubind = hooks.set_thisproc_cpubind; + } else { + /* we need binding support if there are multiple PUs */ + if (nbprocs > 1) + goto out; + get_cpubind = fake_get_cpubind; + set_cpubind = fake_set_cpubind; + } + } + + if (!src_cpuiddump && !hwloc_have_x86_cpuid()) + goto out; + + infos = calloc(nbprocs, sizeof(struct procinfo)); + if (NULL == infos) + goto out; + for (i = 0; i < nbprocs; i++) { + infos[i].nodeid = (unsigned) -1; + infos[i].packageid = (unsigned) -1; + infos[i].dieid = (unsigned) -1; + infos[i].unitid = (unsigned) -1; + infos[i].coreid = (unsigned) -1; + infos[i].threadid = (unsigned) -1; + } + + eax = 0x00; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + highest_cpuid = eax; + if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX) + cpuid_type = intel; + else if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX) + cpuid_type = amd; + else if ((ebx == ZX_EBX && ecx == ZX_ECX && edx == ZX_EDX) + || (ebx == SH_EBX && ecx == SH_ECX && edx == SH_EDX)) + cpuid_type = zhaoxin; + else if (ebx == HYGON_EBX && ecx == HYGON_ECX && edx == HYGON_EDX) + cpuid_type = hygon; + + hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type); + if (highest_cpuid < 0x01) { + goto out_with_infos; + } + + eax = 0x01; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + features[0] = edx; + features[4] = ecx; + + eax = 0x80000000; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + highest_ext_cpuid = eax; + + hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid); + + if (highest_cpuid >= 0x7) { + eax = 0x7; + ecx = 0; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + features[9] = ebx; + } + + if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) { + eax = 0x80000001; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + features[1] = edx; + features[6] = ecx; + } + + hwloc_x86_os_state_save(&os_state, src_cpuiddump); + + ret = look_procs(backend, infos, fulldiscovery, + highest_cpuid, highest_ext_cpuid, features, cpuid_type, + get_cpubind, set_cpubind); + if (!ret) + /* success, we're done */ + goto out_with_os_state; + + if (nbprocs == 1) { + /* only one processor, no need to bind */ + look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump); + summarize(backend, infos, fulldiscovery); + ret = 0; + } + +out_with_os_state: + hwloc_x86_os_state_restore(&os_state, src_cpuiddump); + +out_with_infos: + if (NULL != infos) { + for (i = 0; i < nbprocs; i++) { + free(infos[i].cache); + free(infos[i].otherids); + } + free(infos); + } + +out: + if (src_cpuiddump) + cpuiddump_free(src_cpuiddump); + return ret; +} + +static int +hwloc_x86_discover(struct hwloc_backend *backend) +{ + struct hwloc_x86_backend_data_s *data = backend->private_data; + struct hwloc_topology *topology = backend->topology; + int alreadypus = 0; + int ret; + +#if HAVE_DECL_RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) { + fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n" + "May be reenabled by dumping CPUIDs with hwloc-gather-cpuid\n" + "and reloading them under Valgrind with HWLOC_CPUID_PATH.\n"); + return 0; + } +#endif + + if (data->src_cpuiddump_path) { + assert(data->nbprocs > 0); /* enforced by hwloc_x86_component_instantiate() */ + topology->support.discovery->pu = 1; + } else { + int nbprocs = hwloc_fallback_nbprocessors(topology); + if (nbprocs >= 1) + topology->support.discovery->pu = 1; + else + nbprocs = 1; + data->nbprocs = (unsigned) nbprocs; + } + + if (topology->levels[0][0]->cpuset) { + /* somebody else discovered things */ + if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) { + /* only PUs were discovered, as much as we would, complete the topology with everything else */ + alreadypus = 1; + goto fulldiscovery; + } + + /* several object types were added, we can't easily complete, just do partial discovery */ + hwloc_topology_reconnect(topology, 0); + ret = hwloc_look_x86(backend, 0); + if (ret) + hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); + return 0; + } else { + /* topology is empty, initialize it */ + hwloc_alloc_root_sets(topology->levels[0][0]); + } + +fulldiscovery: + if (hwloc_look_x86(backend, 1) < 0) { + /* if failed, create PUs */ + if (!alreadypus) + hwloc_setup_pu_level(topology, data->nbprocs); + } + + hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); + + if (!data->src_cpuiddump_path) { /* CPUID dump works for both x86 and x86_64 */ +#ifdef HAVE_UNAME + hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */ +#else + /* uname isn't available, manually setup the "Architecture" info */ +#ifdef HWLOC_X86_64_ARCH + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64"); +#else + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86"); +#endif +#endif + } + + return 1; +} + +static int +hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t set) +{ + +#if !(defined HWLOC_WIN_SYS && !defined __MINGW32__ && !defined __CYGWIN__) /* needs a lot of work */ + struct dirent *dirent; + DIR *dir; + FILE *file; + char line [32]; + + dir = opendir(src_cpuiddump_path); + if (!dir) + return -1; + + char path[strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1]; + sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path); + file = fopen(path, "r"); + if (!file) { + fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path); + goto out_with_dir; + } + if (!fgets(line, sizeof(line), file)) { + fprintf(stderr, "Found read dumped cpuid summary in %s\n", path); + fclose(file); + goto out_with_dir; + } + fclose(file); + if (strcmp(line, "Architecture: x86\n")) { + fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line); + goto out_with_dir; + } + + while ((dirent = readdir(dir)) != NULL) { + if (!strncmp(dirent->d_name, "pu", 2)) { + char *end; + unsigned long idx = strtoul(dirent->d_name+2, &end, 10); + if (!*end) + hwloc_bitmap_set(set, idx); + else + fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n", + dirent->d_name, src_cpuiddump_path); + } + } + closedir(dir); + + if (hwloc_bitmap_iszero(set)) { + fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n", + src_cpuiddump_path); + return -1; + } else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) { + /* The x86 backends enforces contigous set of PUs starting at 0 so far */ + fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n", + src_cpuiddump_path); + return -1; + } + + return 0; + +out_with_dir: + closedir(dir); +#endif /* HWLOC_WIN_SYS & !__MINGW32__ needs a lot of work */ + return -1; +} + +static void +hwloc_x86_backend_disable(struct hwloc_backend *backend) +{ + struct hwloc_x86_backend_data_s *data = backend->private_data; + hwloc_bitmap_free(data->apicid_set); + free(data->src_cpuiddump_path); + free(data); +} + +static struct hwloc_backend * +hwloc_x86_component_instantiate(struct hwloc_disc_component *component, + const void *_data1 __hwloc_attribute_unused, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + struct hwloc_x86_backend_data_s *data; + const char *src_cpuiddump_path; + + backend = hwloc_backend_alloc(component); + if (!backend) + goto out; + + data = malloc(sizeof(*data)); + if (!data) { + errno = ENOMEM; + goto out_with_backend; + } + + backend->private_data = data; + backend->discover = hwloc_x86_discover; + backend->disable = hwloc_x86_backend_disable; + + /* default values */ + data->is_knl = 0; + data->apicid_set = hwloc_bitmap_alloc(); + data->apicid_unique = 1; + data->src_cpuiddump_path = NULL; + + src_cpuiddump_path = getenv("HWLOC_CPUID_PATH"); + if (src_cpuiddump_path) { + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + if (!hwloc_x86_check_cpuiddump_input(src_cpuiddump_path, set)) { + backend->is_thissystem = 0; + data->src_cpuiddump_path = strdup(src_cpuiddump_path); + assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */ + data->nbprocs = hwloc_bitmap_weight(set); + } else { + fprintf(stderr, "Ignoring dumped cpuid directory.\n"); + } + hwloc_bitmap_free(set); + } + + return backend; + + out_with_backend: + free(backend); + out: + return NULL; +} + +static struct hwloc_disc_component hwloc_x86_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_CPU, + "x86", + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + hwloc_x86_component_instantiate, + 45, /* between native and no_os */ + 1, + NULL +}; + +const struct hwloc_component hwloc_x86_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_x86_disc_component +}; diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c new file mode 100644 index 000000000..5a0d02da4 --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -0,0 +1,919 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <hwloc/plugins.h> +#include <private/private.h> +#include <private/misc.h> +#include <private/xml.h> +#include <private/debug.h> + +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/******************* + * Import routines * + *******************/ + +struct hwloc__nolibxml_backend_data_s { + size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */ + char *buffer; /* allocated and filled during backend_init() */ + char *copy; /* allocated during backend_init(), used later during actual parsing */ +}; + +typedef struct hwloc__nolibxml_import_state_data_s { + char *tagbuffer; /* buffer containing the next tag */ + char *attrbuffer; /* buffer containing the next attribute of the current node */ + char *tagname; /* tag name of the current node */ + int closed; /* set if the current node is auto-closing */ +} __hwloc_attribute_may_alias * hwloc__nolibxml_import_state_data_t; + +static char * +hwloc__nolibxml_import_ignore_spaces(char *buffer) +{ + return buffer + strspn(buffer, " \t\n"); +} + +static int +hwloc__nolibxml_import_next_attr(hwloc__xml_import_state_t state, char **namep, char **valuep) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + size_t namelen; + size_t len, escaped; + char *buffer, *value, *end; + + if (!nstate->attrbuffer) + return -1; + + /* find the beginning of an attribute */ + buffer = hwloc__nolibxml_import_ignore_spaces(nstate->attrbuffer); + namelen = strspn(buffer, "abcdefghijklmnopqrstuvwxyz_"); + if (buffer[namelen] != '=' || buffer[namelen+1] != '\"') + return -1; + buffer[namelen] = '\0'; + *namep = buffer; + + /* find the beginning of its value, and unescape it */ + *valuep = value = buffer+namelen+2; + len = 0; escaped = 0; + while (value[len+escaped] != '\"') { + if (value[len+escaped] == '&') { + if (!strncmp(&value[1+len+escaped], "#10;", 4)) { + escaped += 4; + value[len] = '\n'; + } else if (!strncmp(&value[1+len+escaped], "#13;", 4)) { + escaped += 4; + value[len] = '\r'; + } else if (!strncmp(&value[1+len+escaped], "#9;", 3)) { + escaped += 3; + value[len] = '\t'; + } else if (!strncmp(&value[1+len+escaped], "quot;", 5)) { + escaped += 5; + value[len] = '\"'; + } else if (!strncmp(&value[1+len+escaped], "lt;", 3)) { + escaped += 3; + value[len] = '<'; + } else if (!strncmp(&value[1+len+escaped], "gt;", 3)) { + escaped += 3; + value[len] = '>'; + } else if (!strncmp(&value[1+len+escaped], "amp;", 4)) { + escaped += 4; + value[len] = '&'; + } else { + return -1; + } + } else { + value[len] = value[len+escaped]; + } + len++; + if (value[len+escaped] == '\0') + return -1; + } + value[len] = '\0'; + + /* find next attribute */ + end = &value[len+escaped+1]; /* skip the ending " */ + nstate->attrbuffer = hwloc__nolibxml_import_ignore_spaces(end); + return 0; +} + +static int +hwloc__nolibxml_import_find_child(hwloc__xml_import_state_t state, + hwloc__xml_import_state_t childstate, + char **tagp) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + hwloc__nolibxml_import_state_data_t nchildstate = (void*) childstate->data; + char *buffer = nstate->tagbuffer; + char *end; + char *tag; + size_t namelen; + + childstate->parent = state; + childstate->global = state->global; + + /* auto-closed tags have no children */ + if (nstate->closed) + return 0; + + /* find the beginning of the tag */ + buffer = hwloc__nolibxml_import_ignore_spaces(buffer); + if (buffer[0] != '<') + return -1; + buffer++; + + /* if closing tag, return nothing and do not advance */ + if (buffer[0] == '/') + return 0; + + /* normal tag */ + tag = nchildstate->tagname = buffer; + + /* find the end, mark it and return it */ + end = strchr(buffer, '>'); + if (!end) + return -1; + end[0] = '\0'; + nchildstate->tagbuffer = end+1; + + /* handle auto-closing tags */ + if (end[-1] == '/') { + nchildstate->closed = 1; + end[-1] = '\0'; + } else + nchildstate->closed = 0; + + /* find attributes */ + namelen = strspn(buffer, "abcdefghijklmnopqrstuvwxyz1234567890_"); + + if (buffer[namelen] == '\0') { + /* no attributes */ + nchildstate->attrbuffer = NULL; + *tagp = tag; + return 1; + } + + if (buffer[namelen] != ' ') + return -1; + + /* found a space, likely starting attributes */ + buffer[namelen] = '\0'; + nchildstate->attrbuffer = buffer+namelen+1; + *tagp = tag; + return 1; +} + +static int +hwloc__nolibxml_import_close_tag(hwloc__xml_import_state_t state) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + char *buffer = nstate->tagbuffer; + char *end; + + /* auto-closed tags need nothing */ + if (nstate->closed) + return 0; + + /* find the beginning of the tag */ + buffer = hwloc__nolibxml_import_ignore_spaces(buffer); + if (buffer[0] != '<') + return -1; + buffer++; + + /* find the end, mark it and return it to the parent */ + end = strchr(buffer, '>'); + if (!end) + return -1; + end[0] = '\0'; + nstate->tagbuffer = end+1; + + /* if closing tag, return nothing */ + if (buffer[0] != '/' || strcmp(buffer+1, nstate->tagname) ) + return -1; + return 0; +} + +static void +hwloc__nolibxml_import_close_child(hwloc__xml_import_state_t state) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + hwloc__nolibxml_import_state_data_t nparent = (void*) state->parent->data; + nparent->tagbuffer = nstate->tagbuffer; +} + +static int +hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, + char **beginp, size_t expected_length) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + char *buffer = nstate->tagbuffer; + size_t length; + char *end; + + /* auto-closed tags have no content */ + if (nstate->closed) { + if (expected_length) + return -1; + *beginp = (char *) ""; + return 0; + } + + /* find the next tag, where the content ends */ + end = strchr(buffer, '<'); + if (!end) + return -1; + + length = (size_t) (end-buffer); + if (length != expected_length) + return -1; + nstate->tagbuffer = end; + *end = '\0'; /* mark as 0-terminated for now */ + *beginp = buffer; + return 1; +} + +static void +hwloc__nolibxml_import_close_content(hwloc__xml_import_state_t state) +{ + /* put back the '<' that we overwrote to 0-terminate the content */ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + if (!nstate->closed) + *nstate->tagbuffer = '<'; +} + +static int +hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata, + struct hwloc__xml_import_state_s *state) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; + unsigned major, minor; + char *end; + char *buffer; + + HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data)); + + /* use a copy in the temporary buffer, we may modify during parsing */ + buffer = nbdata->copy; + memcpy(buffer, nbdata->buffer, nbdata->buflen); + + /* skip headers */ + while (!strncmp(buffer, "<?xml ", 6) || !strncmp(buffer, "<!DOCTYPE ", 10)) { + buffer = strchr(buffer, '\n'); + if (!buffer) + goto failed; + buffer++; + } + + /* find topology tag */ + if (sscanf(buffer, "<topology version=\"%u.%u\">", &major, &minor) == 2) { + bdata->version_major = major; + bdata->version_minor = minor; + end = strchr(buffer, '>') + 1; + } else if (!strncmp(buffer, "<topology>", 10)) { + bdata->version_major = 1; + bdata->version_minor = 0; + end = buffer + 10; + } else if (!strncmp(buffer, "<root>", 6)) { + bdata->version_major = 0; + bdata->version_minor = 9; + end = buffer + 6; + } else + goto failed; + + state->global->next_attr = hwloc__nolibxml_import_next_attr; + state->global->find_child = hwloc__nolibxml_import_find_child; + state->global->close_tag = hwloc__nolibxml_import_close_tag; + state->global->close_child = hwloc__nolibxml_import_close_child; + state->global->get_content = hwloc__nolibxml_import_get_content; + state->global->close_content = hwloc__nolibxml_import_close_content; + state->parent = NULL; + nstate->closed = 0; + nstate->tagbuffer = end; + nstate->tagname = (char *) "topology"; + nstate->attrbuffer = NULL; + return 0; /* success */ + + failed: + return -1; /* failed */ +} + +/* can be called at the end of the import (to cleanup things early), + * or by backend_exit() if load failed for other reasons. + */ +static void +hwloc_nolibxml_free_buffers(struct hwloc_xml_backend_data_s *bdata) +{ + struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; + if (nbdata->buffer) { + free(nbdata->buffer); + nbdata->buffer = NULL; + } + if (nbdata->copy) { + free(nbdata->copy); + nbdata->copy = NULL; + } +} + +static void +hwloc_nolibxml_look_done(struct hwloc_xml_backend_data_s *bdata, int result) +{ + hwloc_nolibxml_free_buffers(bdata); + + if (result < 0 && hwloc__xml_verbose()) + fprintf(stderr, "Failed to parse XML input with the minimalistic parser. If it was not\n" + "generated by hwloc, try enabling full XML support with libxml2.\n"); +} + +/******************** + * Backend routines * + ********************/ + +static void +hwloc_nolibxml_backend_exit(struct hwloc_xml_backend_data_s *bdata) +{ + struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data; + hwloc_nolibxml_free_buffers(bdata); + free(nbdata); +} + +static int +hwloc_nolibxml_read_file(const char *xmlpath, char **bufferp, size_t *buflenp) +{ + FILE * file; + size_t buflen, offset, readlen; + struct stat statbuf; + char *buffer, *tmp; + size_t ret; + + if (!strcmp(xmlpath, "-")) + xmlpath = "/dev/stdin"; + + file = fopen(xmlpath, "r"); + if (!file) + goto out; + + /* find the required buffer size for regular files, or use 4k when unknown, we'll realloc later if needed */ + buflen = 4096; + if (!stat(xmlpath, &statbuf)) + if (S_ISREG(statbuf.st_mode)) + buflen = statbuf.st_size+1; /* one additional byte so that the first fread() gets EOF too */ + + buffer = malloc(buflen+1); /* one more byte for the ending \0 */ + if (!buffer) + goto out_with_file; + + offset = 0; readlen = buflen; + while (1) { + ret = fread(buffer+offset, 1, readlen, file); + + offset += ret; + buffer[offset] = 0; + + if (ret != readlen) + break; + + buflen *= 2; + tmp = realloc(buffer, buflen+1); + if (!tmp) + goto out_with_buffer; + buffer = tmp; + readlen = buflen/2; + } + + fclose(file); + *bufferp = buffer; + *buflenp = offset+1; + return 0; + + out_with_buffer: + free(buffer); + out_with_file: + fclose(file); + out: + return -1; +} + +static int +hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata, + const char *xmlpath, const char *xmlbuffer, int xmlbuflen) +{ + struct hwloc__nolibxml_backend_data_s *nbdata = malloc(sizeof(*nbdata)); + + if (!nbdata) + goto out; + bdata->data = nbdata; + + if (xmlbuffer) { + nbdata->buffer = malloc(xmlbuflen+1); + if (!nbdata->buffer) + goto out_with_nbdata; + nbdata->buflen = xmlbuflen+1; + memcpy(nbdata->buffer, xmlbuffer, xmlbuflen); + nbdata->buffer[xmlbuflen] = '\0'; + + } else { + int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen); + if (err < 0) + goto out_with_nbdata; + } + + /* allocate a temporary copy buffer that we may modify during parsing */ + nbdata->copy = malloc(nbdata->buflen+1); + if (!nbdata->copy) + goto out_with_buffer; + nbdata->copy[nbdata->buflen] = '\0'; + + bdata->look_init = hwloc_nolibxml_look_init; + bdata->look_done = hwloc_nolibxml_look_done; + bdata->backend_exit = hwloc_nolibxml_backend_exit; + return 0; + +out_with_buffer: + free(nbdata->buffer); +out_with_nbdata: + free(nbdata); +out: + return -1; +} + +static int +hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state, + const char *xmlpath, const char *xmlbuffer, int xmlbuflen, + hwloc_topology_diff_t *firstdiffp, char **refnamep) +{ + hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; + struct hwloc__xml_import_state_s childstate; + char *refname = NULL; + char *buffer, *tmp, *tag; + size_t buflen; + int ret; + + HWLOC_BUILD_ASSERT(sizeof(*nstate) <= sizeof(state->data)); + + if (xmlbuffer) { + buffer = malloc(xmlbuflen); + if (!buffer) + goto out; + memcpy(buffer, xmlbuffer, xmlbuflen); + buflen = xmlbuflen; + + } else { + ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen); + if (ret < 0) + goto out; + } + + /* skip headers */ + tmp = buffer; + while (!strncmp(tmp, "<?xml ", 6) || !strncmp(tmp, "<!DOCTYPE ", 10)) { + tmp = strchr(tmp, '\n'); + if (!tmp) + goto out_with_buffer; + tmp++; + } + + state->global->next_attr = hwloc__nolibxml_import_next_attr; + state->global->find_child = hwloc__nolibxml_import_find_child; + state->global->close_tag = hwloc__nolibxml_import_close_tag; + state->global->close_child = hwloc__nolibxml_import_close_child; + state->global->get_content = hwloc__nolibxml_import_get_content; + state->global->close_content = hwloc__nolibxml_import_close_content; + state->parent = NULL; + nstate->closed = 0; + nstate->tagbuffer = tmp; + nstate->tagname = NULL; + nstate->attrbuffer = NULL; + + /* find root */ + ret = hwloc__nolibxml_import_find_child(state, &childstate, &tag); + if (ret < 0) + goto out_with_buffer; + if (!tag || strcmp(tag, "topologydiff")) + goto out_with_buffer; + + while (1) { + char *attrname, *attrvalue; + if (hwloc__nolibxml_import_next_attr(&childstate, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "refname")) { + free(refname); + refname = strdup(attrvalue); + } else + goto out_with_buffer; + } + + ret = hwloc__xml_import_diff(&childstate, firstdiffp); + if (refnamep && !ret) + *refnamep = refname; + else + free(refname); + + free(buffer); + return ret; + +out_with_buffer: + free(buffer); + free(refname); +out: + return -1; +} + +/******************* + * Export routines * + *******************/ + +typedef struct hwloc__nolibxml_export_state_data_s { + char *buffer; /* (moving) buffer where to write */ + size_t written; /* how many bytes were written (or would have be written if not truncated) */ + size_t remaining; /* how many bytes are still available in the buffer */ + unsigned indent; /* indentation level for the next line */ + unsigned nr_children; + unsigned has_content; +} __hwloc_attribute_may_alias * hwloc__nolibxml_export_state_data_t; + +static void +hwloc__nolibxml_export_update_buffer(hwloc__nolibxml_export_state_data_t ndata, int res) +{ + if (res >= 0) { + ndata->written += res; + if (res >= (int) ndata->remaining) + res = ndata->remaining>0 ? (int)ndata->remaining-1 : 0; + ndata->buffer += res; + ndata->remaining -= res; + } +} + +static char * +hwloc__nolibxml_export_escape_string(const char *src) +{ + size_t fulllen, sublen; + char *escaped, *dst; + + fulllen = strlen(src); + + sublen = strcspn(src, "\n\r\t\"<>&"); + if (sublen == fulllen) + return NULL; /* nothing to escape */ + + escaped = malloc(fulllen*6+1); /* escaped chars are replaced by at most 6 char */ + dst = escaped; + + memcpy(dst, src, sublen); + src += sublen; + dst += sublen; + + while (*src) { + int replen; + switch (*src) { + case '\n': strcpy(dst, " "); replen=5; break; + case '\r': strcpy(dst, " "); replen=5; break; + case '\t': strcpy(dst, "	"); replen=4; break; + case '\"': strcpy(dst, """); replen=6; break; + case '<': strcpy(dst, "<"); replen=4; break; + case '>': strcpy(dst, ">"); replen=4; break; + case '&': strcpy(dst, "&"); replen=5; break; + default: replen=0; break; + } + dst+=replen; src++; + + sublen = strcspn(src, "\n\r\t\"<>&"); + memcpy(dst, src, sublen); + src += sublen; + dst += sublen; + } + + *dst = 0; + return escaped; +} + +static void +hwloc__nolibxml_export_new_child(hwloc__xml_export_state_t parentstate, + hwloc__xml_export_state_t state, + const char *name) +{ + hwloc__nolibxml_export_state_data_t npdata = (void *) parentstate->data; + hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; + int res; + + assert(!npdata->has_content); + if (!npdata->nr_children) { + res = hwloc_snprintf(npdata->buffer, npdata->remaining, ">\n"); + hwloc__nolibxml_export_update_buffer(npdata, res); + } + npdata->nr_children++; + + state->parent = parentstate; + state->new_child = parentstate->new_child; + state->new_prop = parentstate->new_prop; + state->add_content = parentstate->add_content; + state->end_object = parentstate->end_object; + state->global = parentstate->global; + + ndata->buffer = npdata->buffer; + ndata->written = npdata->written; + ndata->remaining = npdata->remaining; + ndata->indent = npdata->indent + 2; + + ndata->nr_children = 0; + ndata->has_content = 0; + + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%*s<%s", (int) npdata->indent, "", name); + hwloc__nolibxml_export_update_buffer(ndata, res); +} + +static void +hwloc__nolibxml_export_new_prop(hwloc__xml_export_state_t state, const char *name, const char *value) +{ + hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; + char *escaped = hwloc__nolibxml_export_escape_string(value); + int res = hwloc_snprintf(ndata->buffer, ndata->remaining, " %s=\"%s\"", name, escaped ? (const char *) escaped : value); + hwloc__nolibxml_export_update_buffer(ndata, res); + free(escaped); +} + +static void +hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *name) +{ + hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; + hwloc__nolibxml_export_state_data_t npdata = (void *) state->parent->data; + int res; + + assert (!(ndata->has_content && ndata->nr_children)); + if (ndata->has_content) { + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "</%s>\n", name); + } else if (ndata->nr_children) { + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%*s</%s>\n", (int) npdata->indent, "", name); + } else { + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "/>\n"); + } + hwloc__nolibxml_export_update_buffer(ndata, res); + + npdata->buffer = ndata->buffer; + npdata->written = ndata->written; + npdata->remaining = ndata->remaining; +} + +static void +hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length) +{ + hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; + int res; + + assert(!ndata->nr_children); + if (!ndata->has_content) { + res = hwloc_snprintf(ndata->buffer, ndata->remaining, ">"); + hwloc__nolibxml_export_update_buffer(ndata, res); + } + ndata->has_content = 1; + + res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length); + hwloc__nolibxml_export_update_buffer(ndata, res); +} + +static size_t +hwloc___nolibxml_prepare_export(hwloc_topology_t topology, struct hwloc__xml_export_data_s *edata, + char *xmlbuffer, int buflen, unsigned long flags) +{ + struct hwloc__xml_export_state_s state, childstate; + hwloc__nolibxml_export_state_data_t ndata = (void *) &state.data; + int v1export = flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1; + int res; + + HWLOC_BUILD_ASSERT(sizeof(*ndata) <= sizeof(state.data)); + + state.new_child = hwloc__nolibxml_export_new_child; + state.new_prop = hwloc__nolibxml_export_new_prop; + state.add_content = hwloc__nolibxml_export_add_content; + state.end_object = hwloc__nolibxml_export_end_object; + state.global = edata; + + ndata->indent = 0; + ndata->written = 0; + ndata->buffer = xmlbuffer; + ndata->remaining = buflen; + + ndata->nr_children = 1; /* don't close a non-existing previous tag when opening the topology tag */ + ndata->has_content = 0; + + res = hwloc_snprintf(ndata->buffer, ndata->remaining, + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE topology SYSTEM \"%s\">\n", v1export ? "hwloc.dtd" : "hwloc2.dtd"); + hwloc__nolibxml_export_update_buffer(ndata, res); + hwloc__nolibxml_export_new_child(&state, &childstate, "topology"); + if (!(flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1)) + hwloc__nolibxml_export_new_prop(&childstate, "version", "2.0"); + hwloc__xml_export_topology (&childstate, topology, flags); + hwloc__nolibxml_export_end_object(&childstate, "topology"); + + return ndata->written+1; /* ending \0 */ +} + +static int +hwloc_nolibxml_export_buffer(hwloc_topology_t topology, struct hwloc__xml_export_data_s *edata, + char **bufferp, int *buflenp, unsigned long flags) +{ + char *buffer; + size_t bufferlen, res; + + bufferlen = 16384; /* random guess for large enough default */ + buffer = malloc(bufferlen); + if (!buffer) + return -1; + res = hwloc___nolibxml_prepare_export(topology, edata, buffer, (int)bufferlen, flags); + + if (res > bufferlen) { + char *tmp = realloc(buffer, res); + if (!tmp) { + free(buffer); + return -1; + } + buffer = tmp; + hwloc___nolibxml_prepare_export(topology, edata, buffer, (int)res, flags); + } + + *bufferp = buffer; + *buflenp = (int)res; + return 0; +} + +static int +hwloc_nolibxml_export_file(hwloc_topology_t topology, struct hwloc__xml_export_data_s *edata, + const char *filename, unsigned long flags) +{ + FILE *file; + char *buffer; + int bufferlen; + int ret; + + ret = hwloc_nolibxml_export_buffer(topology, edata, &buffer, &bufferlen, flags); + if (ret < 0) + return -1; + + if (!strcmp(filename, "-")) { + file = stdout; + } else { + file = fopen(filename, "w"); + if (!file) { + free(buffer); + return -1; + } + } + + ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); + if (ret == bufferlen-1) { + ret = 0; + } else { + errno = ferror(file); + ret = -1; + } + + free(buffer); + + if (file != stdout) + fclose(file); + return ret; +} + +static size_t +hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *refname, char *xmlbuffer, int buflen) +{ + struct hwloc__xml_export_state_s state, childstate; + hwloc__nolibxml_export_state_data_t ndata = (void *) &state.data; + int res; + + HWLOC_BUILD_ASSERT(sizeof(*ndata) <= sizeof(state.data)); + + state.new_child = hwloc__nolibxml_export_new_child; + state.new_prop = hwloc__nolibxml_export_new_prop; + state.add_content = hwloc__nolibxml_export_add_content; + state.end_object = hwloc__nolibxml_export_end_object; + + ndata->indent = 0; + ndata->written = 0; + ndata->buffer = xmlbuffer; + ndata->remaining = buflen; + + ndata->nr_children = 1; /* don't close a non-existing previous tag when opening the topology tag */ + ndata->has_content = 0; + + res = hwloc_snprintf(ndata->buffer, ndata->remaining, + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE topologydiff SYSTEM \"hwloc2-diff.dtd\">\n"); + hwloc__nolibxml_export_update_buffer(ndata, res); + hwloc__nolibxml_export_new_child(&state, &childstate, "topologydiff"); + if (refname) + hwloc__nolibxml_export_new_prop(&childstate, "refname", refname); + hwloc__xml_export_diff (&childstate, diff); + hwloc__nolibxml_export_end_object(&childstate, "topologydiff"); + + return ndata->written+1; +} + +static int +hwloc_nolibxml_export_diff_buffer(hwloc_topology_diff_t diff, const char *refname, char **bufferp, int *buflenp) +{ + char *buffer; + size_t bufferlen, res; + + bufferlen = 16384; /* random guess for large enough default */ + buffer = malloc(bufferlen); + if (!buffer) + return -1; + res = hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)bufferlen); + + if (res > bufferlen) { + char *tmp = realloc(buffer, res); + if (!tmp) { + free(buffer); + return -1; + } + buffer = tmp; + hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)res); + } + + *bufferp = buffer; + *buflenp = (int)res; + return 0; +} + +static int +hwloc_nolibxml_export_diff_file(hwloc_topology_diff_t diff, const char *refname, const char *filename) +{ + FILE *file; + char *buffer; + int bufferlen; + int ret; + + ret = hwloc_nolibxml_export_diff_buffer(diff, refname, &buffer, &bufferlen); + if (ret < 0) + return -1; + + if (!strcmp(filename, "-")) { + file = stdout; + } else { + file = fopen(filename, "w"); + if (!file) { + free(buffer); + return -1; + } + } + + ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file); + if (ret == bufferlen-1) { + ret = 0; + } else { + errno = ferror(file); + ret = -1; + } + + free(buffer); + + if (file != stdout) + fclose(file); + return ret; +} + +static void +hwloc_nolibxml_free_buffer(void *xmlbuffer) +{ + free(xmlbuffer); +} + +/************* + * Callbacks * + *************/ + +static struct hwloc_xml_callbacks hwloc_xml_nolibxml_callbacks = { + hwloc_nolibxml_backend_init, + hwloc_nolibxml_export_file, + hwloc_nolibxml_export_buffer, + hwloc_nolibxml_free_buffer, + hwloc_nolibxml_import_diff, + hwloc_nolibxml_export_diff_file, + hwloc_nolibxml_export_diff_buffer +}; + +static struct hwloc_xml_component hwloc_nolibxml_xml_component = { + &hwloc_xml_nolibxml_callbacks, + NULL +}; + +const struct hwloc_component hwloc_xml_nolibxml_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_XML, + 0, + &hwloc_nolibxml_xml_component +}; diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c new file mode 100644 index 000000000..e7c5ef621 --- /dev/null +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -0,0 +1,2886 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/xml.h> +#include <private/private.h> +#include <private/misc.h> +#include <private/debug.h> + +#include <math.h> + +int +hwloc__xml_verbose(void) +{ + static int checked = 0; + static int verbose = 0; + if (!checked) { + const char *env = getenv("HWLOC_XML_VERBOSE"); + if (env) + verbose = atoi(env); + checked = 1; + } + return verbose; +} + +static int +hwloc_nolibxml_import(void) +{ + static int checked = 0; + static int nolibxml = 0; + if (!checked) { + const char *env = getenv("HWLOC_LIBXML"); + if (env) { + nolibxml = !atoi(env); + } else { + env = getenv("HWLOC_LIBXML_IMPORT"); + if (env) + nolibxml = !atoi(env); + } + checked = 1; + } + return nolibxml; +} + +static int +hwloc_nolibxml_export(void) +{ + static int checked = 0; + static int nolibxml = 0; + if (!checked) { + const char *env = getenv("HWLOC_LIBXML"); + if (env) { + nolibxml = !atoi(env); + } else { + env = getenv("HWLOC_LIBXML_EXPORT"); + if (env) + nolibxml = !atoi(env); + } + checked = 1; + } + return nolibxml; +} + +#define BASE64_ENCODED_LENGTH(length) (4*(((length)+2)/3)) + +/********************************* + ********* XML callbacks ********* + *********************************/ + +/* set when registering nolibxml and libxml components. + * modifications protected by the components mutex. + * read by the common XML code in topology-xml.c to jump to the right XML backend. + */ +static struct hwloc_xml_callbacks *hwloc_nolibxml_callbacks = NULL, *hwloc_libxml_callbacks = NULL; + +void +hwloc_xml_callbacks_register(struct hwloc_xml_component *comp) +{ + if (!hwloc_nolibxml_callbacks) + hwloc_nolibxml_callbacks = comp->nolibxml_callbacks; + if (!hwloc_libxml_callbacks) + hwloc_libxml_callbacks = comp->libxml_callbacks; +} + +void +hwloc_xml_callbacks_reset(void) +{ + hwloc_nolibxml_callbacks = NULL; + hwloc_libxml_callbacks = NULL; +} + +/************************************************ + ********* XML import (common routines) ********* + ************************************************/ + +#define _HWLOC_OBJ_CACHE_OLD (HWLOC_OBJ_TYPE_MAX+1) /* temporarily used when importing pre-v2.0 attribute-less cache types */ +#define _HWLOC_OBJ_FUTURE (HWLOC_OBJ_TYPE_MAX+2) /* temporarily used when ignoring future types */ + +static void +hwloc__xml_import_object_attr(struct hwloc_topology *topology, + struct hwloc_xml_backend_data_s *data, + struct hwloc_obj *obj, + const char *name, const char *value, + hwloc__xml_import_state_t state) +{ + if (!strcmp(name, "type")) { + /* already handled */ + return; + } + + else if (!strcmp(name, "os_index")) + obj->os_index = strtoul(value, NULL, 10); + else if (!strcmp(name, "gp_index")) { + obj->gp_index = strtoull(value, NULL, 10); + if (!obj->gp_index && hwloc__xml_verbose()) + fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix); + if (obj->gp_index >= topology->next_gp_index) + topology->next_gp_index = obj->gp_index + 1; + } else if (!strcmp(name, "cpuset")) { + if (!obj->cpuset) + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(obj->cpuset, value); + } else if (!strcmp(name, "complete_cpuset")) { + if (!obj->complete_cpuset) + obj->complete_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(obj->complete_cpuset, value); + } else if (!strcmp(name, "allowed_cpuset")) { + /* ignored except for root */ + if (!obj->parent) + hwloc_bitmap_sscanf(topology->allowed_cpuset, value); + } else if (!strcmp(name, "nodeset")) { + if (!obj->nodeset) + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(obj->nodeset, value); + } else if (!strcmp(name, "complete_nodeset")) { + if (!obj->complete_nodeset) + obj->complete_nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(obj->complete_nodeset, value); + } else if (!strcmp(name, "allowed_nodeset")) { + /* ignored except for root */ + if (!obj->parent) + hwloc_bitmap_sscanf(topology->allowed_nodeset, value); + } else if (!strcmp(name, "name")) { + if (obj->name) + free(obj->name); + obj->name = strdup(value); + } else if (!strcmp(name, "subtype")) { + if (obj->subtype) + free(obj->subtype); + obj->subtype = strdup(value); + } + + else if (!strcmp(name, "cache_size")) { + unsigned long long lvalue = strtoull(value, NULL, 10); + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + obj->attr->cache.size = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "cache_linesize")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + obj->attr->cache.linesize = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "cache_associativity")) { + int lvalue = atoi(value); + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) + obj->attr->cache.associativity = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "cache_type")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + if (lvalue == HWLOC_OBJ_CACHE_UNIFIED + || lvalue == HWLOC_OBJ_CACHE_DATA + || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) + obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue; + else + fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n", + state->global->msgprefix, lvalue); + } else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "local_memory")) { + unsigned long long lvalue = strtoull(value, NULL, 10); + if (obj->type == HWLOC_OBJ_NUMANODE) + obj->attr->numanode.local_memory = lvalue; + else if (!obj->parent) + topology->machine_memory.local_memory = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring local_memory attribute for non-NUMAnode non-root object\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "depth")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (hwloc__obj_type_is_cache(obj->type) || obj->type == _HWLOC_OBJ_CACHE_OLD) { + obj->attr->cache.depth = lvalue; + } else if (obj->type == HWLOC_OBJ_GROUP || obj->type == HWLOC_OBJ_BRIDGE) { + /* will be overwritten by the core */ + } else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring depth attribute for object type without depth\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "kind")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_GROUP) + obj->attr->group.kind = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring kind attribute for non-group object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "subkind")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_GROUP) + obj->attr->group.subkind = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring subkind attribute for non-group object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "dont_merge")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_GROUP) + obj->attr->group.dont_merge = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n", + state->global->msgprefix); + } + + else if (!strcmp(name, "pci_busid")) { + switch (obj->type) { + case HWLOC_OBJ_PCI_DEVICE: + case HWLOC_OBJ_BRIDGE: { + unsigned domain, bus, dev, func; + if (sscanf(value, "%04x:%02x:%02x.%01x", + &domain, &bus, &dev, &func) != 4) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid pci_busid format string %s\n", + state->global->msgprefix, value); + } else { + obj->attr->pcidev.domain = domain; + obj->attr->pcidev.bus = bus; + obj->attr->pcidev.dev = dev; + obj->attr->pcidev.func = func; + } + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring pci_busid attribute for non-PCI object\n", + state->global->msgprefix); + break; + } + } + + else if (!strcmp(name, "pci_type")) { + switch (obj->type) { + case HWLOC_OBJ_PCI_DEVICE: + case HWLOC_OBJ_BRIDGE: { + unsigned classid, vendor, device, subvendor, subdevice, revision; + if (sscanf(value, "%04x [%04x:%04x] [%04x:%04x] %02x", + &classid, &vendor, &device, &subvendor, &subdevice, &revision) != 6) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid pci_type format string %s\n", + state->global->msgprefix, value); + } else { + obj->attr->pcidev.class_id = classid; + obj->attr->pcidev.vendor_id = vendor; + obj->attr->pcidev.device_id = device; + obj->attr->pcidev.subvendor_id = subvendor; + obj->attr->pcidev.subdevice_id = subdevice; + obj->attr->pcidev.revision = revision; + } + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring pci_type attribute for non-PCI object\n", + state->global->msgprefix); + break; + } + } + + else if (!strcmp(name, "pci_link_speed")) { + switch (obj->type) { + case HWLOC_OBJ_PCI_DEVICE: + case HWLOC_OBJ_BRIDGE: { + obj->attr->pcidev.linkspeed = (float) atof(value); + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring pci_link_speed attribute for non-PCI object\n", + state->global->msgprefix); + break; + } + } + + else if (!strcmp(name, "bridge_type")) { + switch (obj->type) { + case HWLOC_OBJ_BRIDGE: { + unsigned upstream_type, downstream_type; + if (sscanf(value, "%u-%u", &upstream_type, &downstream_type) != 2) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid bridge_type format string %s\n", + state->global->msgprefix, value); + } else { + obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type; + obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type; + }; + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring bridge_type attribute for non-bridge object\n", + state->global->msgprefix); + break; + } + } + + else if (!strcmp(name, "bridge_pci")) { + switch (obj->type) { + case HWLOC_OBJ_BRIDGE: { + unsigned domain, secbus, subbus; + if (sscanf(value, "%04x:[%02x-%02x]", + &domain, &secbus, &subbus) != 3) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid bridge_pci format string %s\n", + state->global->msgprefix, value); + } else { + obj->attr->bridge.downstream.pci.domain = domain; + obj->attr->bridge.downstream.pci.secondary_bus = secbus; + obj->attr->bridge.downstream.pci.subordinate_bus = subbus; + } + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring bridge_pci attribute for non-bridge object\n", + state->global->msgprefix); + break; + } + } + + else if (!strcmp(name, "osdev_type")) { + switch (obj->type) { + case HWLOC_OBJ_OS_DEVICE: { + unsigned osdev_type; + if (sscanf(value, "%u", &osdev_type) != 1) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid osdev_type format string %s\n", + state->global->msgprefix, value); + } else + obj->attr->osdev.type = (hwloc_obj_osdev_type_t) osdev_type; + break; + } + default: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring osdev_type attribute for non-osdev object\n", + state->global->msgprefix); + break; + } + } + + else if (data->version_major < 2) { + /************************ + * deprecated from 1.x + */ + if (!strcmp(name, "os_level") + || !strcmp(name, "online_cpuset")) + { /* ignored */ } + + /************************* + * deprecated from 1.0 + */ + else if (!strcmp(name, "dmi_board_vendor")) { + if (value[0]) + hwloc_obj_add_info(obj, "DMIBoardVendor", value); + } + else if (!strcmp(name, "dmi_board_name")) { + if (value[0]) + hwloc_obj_add_info(obj, "DMIBoardName", value); + } + + else if (data->version_major < 1) { + /************************* + * deprecated from 0.9 + */ + if (!strcmp(name, "memory_kB")) { + unsigned long long lvalue = strtoull(value, NULL, 10); + if (obj->type == _HWLOC_OBJ_CACHE_OLD) + obj->attr->cache.size = lvalue << 10; + else if (obj->type == HWLOC_OBJ_NUMANODE) + obj->attr->numanode.local_memory = lvalue << 10; + else if (!obj->parent) + topology->machine_memory.local_memory = lvalue << 10; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memory_kB attribute for non-NUMAnode non-root object\n", + state->global->msgprefix); + } + else if (!strcmp(name, "huge_page_size_kB")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_NUMANODE || !obj->parent) { + struct hwloc_numanode_attr_s *memory = obj->type == HWLOC_OBJ_NUMANODE ? &obj->attr->numanode : &topology->machine_memory; + if (!memory->page_types) { + memory->page_types = malloc(sizeof(*memory->page_types)); + memory->page_types_len = 1; + } + memory->page_types[0].size = lvalue << 10; + } else if (hwloc__xml_verbose()) { + fprintf(stderr, "%s: ignoring huge_page_size_kB attribute for non-NUMAnode non-root object\n", + state->global->msgprefix); + } + } + else if (!strcmp(name, "huge_page_free")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_NUMANODE || !obj->parent) { + struct hwloc_numanode_attr_s *memory = obj->type == HWLOC_OBJ_NUMANODE ? &obj->attr->numanode : &topology->machine_memory; + if (!memory->page_types) { + memory->page_types = malloc(sizeof(*memory->page_types)); + memory->page_types_len = 1; + } + memory->page_types[0].count = lvalue; + } else if (hwloc__xml_verbose()) { + fprintf(stderr, "%s: ignoring huge_page_free attribute for non-NUMAnode non-root object\n", + state->global->msgprefix); + } + } + /* end of deprecated from 0.9 */ + else goto unknown; + } + /* end of deprecated from 1.0 */ + else goto unknown; + } + else { + unknown: + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown object attribute %s\n", + state->global->msgprefix, name); + } +} + + +static int +hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, + hwloc_obj_t obj, + hwloc__xml_import_state_t state) +{ + char *infoname = NULL; + char *infovalue = NULL; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "name")) + infoname = attrvalue; + else if (!strcmp(attrname, "value")) + infovalue = attrvalue; + else + return -1; + } + + if (infoname) { + /* empty strings are ignored by libxml */ + if (data->version_major < 2 && + (!strcmp(infoname, "Type") || !strcmp(infoname, "CoProcType"))) { + /* 1.x stored subtype in Type or CoProcType */ + if (infovalue) { + if (obj->subtype) + free(obj->subtype); + obj->subtype = strdup(infovalue); + } + } else { + if (infovalue) + hwloc_obj_add_info(obj, infoname, infovalue); + } + } + + return state->global->close_tag(state); +} + +static int +hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_numanode_attr_s *memory, + hwloc__xml_import_state_t state) +{ + uint64_t size = 0, count = 0; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "size")) + size = strtoull(attrvalue, NULL, 10); + else if (!strcmp(attrname, "count")) + count = strtoull(attrvalue, NULL, 10); + else + return -1; + } + + if (size) { + unsigned idx = memory->page_types_len; + struct hwloc_memory_page_type_s *tmp; + tmp = realloc(memory->page_types, (idx+1)*sizeof(*memory->page_types)); + if (tmp) { /* if failed to allocate, ignore this page_type entry */ + memory->page_types = tmp; + memory->page_types_len = idx+1; + memory->page_types[idx].size = size; + memory->page_types[idx].count = count; + } + } + + return state->global->close_tag(state); +} + +static int +hwloc__xml_v1import_distances(struct hwloc_xml_backend_data_s *data, + hwloc_obj_t obj, + hwloc__xml_import_state_t state) +{ + unsigned long reldepth = 0, nbobjs = 0; + float latbase = 0; + char *tag; + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "nbobjs")) + nbobjs = strtoul(attrvalue, NULL, 10); + else if (!strcmp(attrname, "relative_depth")) + reldepth = strtoul(attrvalue, NULL, 10); + else if (!strcmp(attrname, "latency_base")) + latbase = (float) atof(attrvalue); + else + return -1; + } + + if (nbobjs && reldepth && latbase) { + unsigned i; + float *matrix; + struct hwloc__xml_imported_v1distances_s *v1dist; + + matrix = malloc(nbobjs*nbobjs*sizeof(float)); + v1dist = malloc(sizeof(*v1dist)); + if (!matrix || !v1dist) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to allocate v1distance matrix for %lu objects\n", + state->global->msgprefix, nbobjs); + free(v1dist); + free(matrix); + return -1; + } + + v1dist->kind = HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY; + /* TODO: we can't know for sure if it comes from the OS. + * On Linux/x86, it would be 10 on the diagonal. + * On Solaris/T5, 15 on the diagonal. + * Just check whether all values are integers, and that all values on the diagonal are minimal and identical? + */ + + v1dist->nbobjs = nbobjs; + v1dist->floats = matrix; + + for(i=0; i<nbobjs*nbobjs; i++) { + struct hwloc__xml_import_state_s childstate; + char *attrname, *attrvalue; + float val; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0 || strcmp(tag, "latency")) { + /* a latency child is needed */ + free(matrix); + free(v1dist); + return -1; + } + + ret = state->global->next_attr(&childstate, &attrname, &attrvalue); + if (ret < 0 || strcmp(attrname, "value")) { + free(matrix); + free(v1dist); + return -1; + } + + val = (float) atof((char *) attrvalue); + matrix[i] = val * latbase; + + ret = state->global->close_tag(&childstate); + if (ret < 0) { + free(matrix); + free(v1dist); + return -1; + } + + state->global->close_child(&childstate); + } + + if (nbobjs < 2) { + /* distances with a single object are useless, even if the XML isn't invalid */ + assert(nbobjs == 1); + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid distance matrix with only 1 object\n", + state->global->msgprefix); + free(matrix); + free(v1dist); + + } else if (obj->parent) { + /* we currently only import distances attached to root. + * we can't save obj in v1dist because obj could be dropped during insert if ignored. + * we could save its complete_cpu/nodeset instead to find it back later. + * but it doesn't matter much since only NUMA distances attached to root matter. + */ + free(matrix); + free(v1dist); + + } else { + /* queue the distance for real */ + v1dist->prev = data->last_v1dist; + v1dist->next = NULL; + if (data->last_v1dist) + data->last_v1dist->next = v1dist; + else + data->first_v1dist = v1dist; + data->last_v1dist = v1dist; + } + } + + return state->global->close_tag(state); +} + +static int +hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, + hwloc__xml_import_state_t state) +{ + size_t length = 0; + int encoded = 0; + char *name = NULL; /* optional */ + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "length")) + length = strtoul(attrvalue, NULL, 10); + else if (!strcmp(attrname, "encoding")) + encoded = !strcmp(attrvalue, "base64"); + else if (!strcmp(attrname, "name")) + name = attrvalue; + else + return -1; + } + + if (!topology->userdata_import_cb) { + char *buffer; + size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; + ret = state->global->get_content(state, &buffer, reallength); + if (ret < 0) + return -1; + + } else if (topology->userdata_not_decoded) { + char *buffer, *fakename; + size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; + ret = state->global->get_content(state, &buffer, reallength); + if (ret < 0) + return -1; + fakename = malloc(6 + 1 + (name ? strlen(name) : 4) + 1); + if (!fakename) + return -1; + sprintf(fakename, encoded ? "base64%c%s" : "normal%c%s", name ? ':' : '-', name ? name : "anon"); + topology->userdata_import_cb(topology, obj, fakename, buffer, length); + free(fakename); + + } else if (encoded && length) { + char *encoded_buffer; + size_t encoded_length = BASE64_ENCODED_LENGTH(length); + ret = state->global->get_content(state, &encoded_buffer, encoded_length); + if (ret < 0) + return -1; + if (ret) { + char *decoded_buffer = malloc(length+1); + if (!decoded_buffer) + return -1; + assert(encoded_buffer[encoded_length] == 0); + ret = hwloc_decode_from_base64(encoded_buffer, decoded_buffer, length+1); + if (ret != (int) length) { + free(decoded_buffer); + return -1; + } + topology->userdata_import_cb(topology, obj, name, decoded_buffer, length); + free(decoded_buffer); + } + + } else { /* always handle length==0 in the non-encoded case */ + char *buffer = (char *) ""; + if (length) { + ret = state->global->get_content(state, &buffer, length); + if (ret < 0) + return -1; + } + topology->userdata_import_cb(topology, obj, name, buffer, length); + } + + state->global->close_content(state); + return state->global->close_tag(state); +} + +static void hwloc__xml_import_report_outoforder(hwloc_topology_t topology, hwloc_obj_t new, hwloc_obj_t old) +{ + char *progname = hwloc_progname(topology); + const char *origversion = hwloc_obj_get_info_by_name(topology->levels[0][0], "hwlocVersion"); + const char *origprogname = hwloc_obj_get_info_by_name(topology->levels[0][0], "ProcessName"); + char *c1, *cc1, t1[64]; + char *c2 = NULL, *cc2 = NULL, t2[64]; + + hwloc_bitmap_asprintf(&c1, new->cpuset); + hwloc_bitmap_asprintf(&cc1, new->complete_cpuset); + hwloc_obj_type_snprintf(t1, sizeof(t1), new, 0); + + if (old->cpuset) + hwloc_bitmap_asprintf(&c2, old->cpuset); + if (old->complete_cpuset) + hwloc_bitmap_asprintf(&cc2, old->complete_cpuset); + hwloc_obj_type_snprintf(t2, sizeof(t2), old, 0); + + fprintf(stderr, "****************************************************************************\n"); + fprintf(stderr, "* hwloc has encountered an out-of-order XML topology load.\n"); + fprintf(stderr, "* Object %s cpuset %s complete %s\n", + t1, c1, cc1); + fprintf(stderr, "* was inserted after object %s with %s and %s.\n", + t2, c2 ? c2 : "none", cc2 ? cc2 : "none"); + fprintf(stderr, "* The error occured in hwloc %s inside process `%s', while\n", + HWLOC_VERSION, + progname ? progname : "<unknown>"); + if (origversion || origprogname) + fprintf(stderr, "* the input XML was generated by hwloc %s inside process `%s'.\n", + origversion ? origversion : "(unknown version)", + origprogname ? origprogname : "<unknown>"); + else + fprintf(stderr, "* the input XML was generated by an unspecified ancient hwloc release.\n"); + fprintf(stderr, "* Please check that your input topology XML file is valid.\n"); + fprintf(stderr, "* Set HWLOC_DEBUG_CHECK=1 in the environment to detect further issues.\n"); + fprintf(stderr, "****************************************************************************\n"); + + free(c1); + free(cc1); + free(c2); + free(cc2); + free(progname); +} + +static int +hwloc__xml_import_object(hwloc_topology_t topology, + struct hwloc_xml_backend_data_s *data, + hwloc_obj_t parent, hwloc_obj_t obj, int *gotignored, + hwloc__xml_import_state_t state) +{ + int ignored = 0; + int childrengotignored = 0; + int attribute_less_cache = 0; + int numa_was_root = 0; + char *tag; + struct hwloc__xml_import_state_s childstate; + + /* set parent now since it's used during import below or in subfunctions */ + obj->parent = parent; + + /* process attributes */ + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "type")) { + if (hwloc_type_sscanf(attrvalue, &obj->type, NULL, 0) < 0) { + if (!strcasecmp(attrvalue, "Cache")) { + obj->type = _HWLOC_OBJ_CACHE_OLD; /* will be fixed below */ + attribute_less_cache = 1; + } else if (!strcasecmp(attrvalue, "System")) { + if (!parent) + obj->type = HWLOC_OBJ_MACHINE; + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: obsolete System object only allowed at root\n", + state->global->msgprefix); + goto error_with_object; + } + } else if (!strcasecmp(attrvalue, "Die")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->subtype = strdup("Die"); + obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_DIE; + obj->attr->group.dont_merge = data->dont_merge_die_groups; + } else if (!strcasecmp(attrvalue, "Tile")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->subtype = strdup("Tile"); + obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE; + } else if (!strcasecmp(attrvalue, "Module")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->subtype = strdup("Module"); + obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; + } else if (!strcasecmp(attrvalue, "MemCache")) { + /* ignore possible future type */ + obj->type = _HWLOC_OBJ_FUTURE; + ignored = 1; + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s object not-supported, will be ignored\n", + state->global->msgprefix, attrvalue); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: unrecognized object type string %s\n", + state->global->msgprefix, attrvalue); + goto error_with_object; + } + } + } else { + /* type needed first */ + if (obj->type == HWLOC_OBJ_TYPE_NONE) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: object attribute %s found before type\n", + state->global->msgprefix, attrname); + goto error_with_object; + } + hwloc__xml_import_object_attr(topology, data, obj, attrname, attrvalue, state); + } + } + + /* process non-object subnodes to get info attrs (as well as page_types, etc) */ + while (1) { + int ret; + + tag = NULL; + ret = state->global->find_child(state, &childstate, &tag); + if (ret < 0) + goto error; + if (!ret) + break; + + if (!strcmp(tag, "object")) { + /* we'll handle children later */ + break; + + } else if (!strcmp(tag, "page_type")) { + if (obj->type == HWLOC_OBJ_NUMANODE) { + ret = hwloc__xml_import_pagetype(topology, &obj->attr->numanode, &childstate); + } else if (!parent) { + ret = hwloc__xml_import_pagetype(topology, &topology->machine_memory, &childstate); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid non-NUMAnode object child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + } else if (!strcmp(tag, "info")) { + ret = hwloc__xml_import_info(data, obj, &childstate); + } else if (data->version_major < 2 && !strcmp(tag, "distances")) { + ret = hwloc__xml_v1import_distances(data, obj, &childstate); + } else if (!strcmp(tag, "userdata")) { + ret = hwloc__xml_import_userdata(topology, obj, &childstate); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid special object child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + } + + if (parent && obj->type == HWLOC_OBJ_MACHINE) { + /* replace non-root Machine with Groups */ + obj->type = HWLOC_OBJ_GROUP; + } + + if (parent && data->version_major >= 2) { + /* check parent/child types for 2.x */ + if (hwloc__obj_type_is_normal(obj->type)) { + if (!hwloc__obj_type_is_normal(parent->type)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "normal object %s cannot be child of non-normal parent %s\n", + hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); + goto error_with_object; + } + } else if (hwloc__obj_type_is_memory(obj->type)) { + if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) { + if (hwloc__xml_verbose()) + fprintf(stderr, "Memory object %s cannot be child of non-normal-or-memory parent %s\n", + hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); + goto error_with_object; + } + } else if (hwloc__obj_type_is_io(obj->type)) { + if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) { + if (hwloc__xml_verbose()) + fprintf(stderr, "I/O object %s cannot be child of non-normal-or-I/O parent %s\n", + hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); + goto error_with_object; + } + } + + } else if (parent && data->version_major < 2) { + /* check parent/child types for pre-v2.0 */ + if (hwloc__obj_type_is_normal(obj->type) || HWLOC_OBJ_NUMANODE == obj->type) { + if (hwloc__obj_type_is_special(parent->type)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "v1.x normal v1.x object %s cannot be child of special parent %s\n", + hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); + goto error_with_object; + } + } else if (hwloc__obj_type_is_io(obj->type)) { + if (HWLOC_OBJ_MISC == parent->type) { + if (hwloc__xml_verbose()) + fprintf(stderr, "I/O object %s cannot be child of Misc parent\n", + hwloc_obj_type_string(obj->type)); + goto error_with_object; + } + } + } + + if (data->version_major < 2) { + /*************************** + * 1.x specific checks + */ + + /* attach pre-v2.0 children of NUMA nodes to normal parent */ + if (parent && parent->type == HWLOC_OBJ_NUMANODE) { + parent = parent->parent; + assert(parent); + } + + /* insert a group above pre-v2.0 NUMA nodes if needed */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (!parent) { + /* crazy case of NUMA node root (only possible when filtering Machine keep_structure in v1.x), + * reinsert a Machine object + */ + hwloc_obj_t machine = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MACHINE, HWLOC_UNKNOWN_INDEX); + machine->cpuset = hwloc_bitmap_dup(obj->cpuset); + machine->complete_cpuset = hwloc_bitmap_dup(obj->cpuset); + machine->nodeset = hwloc_bitmap_dup(obj->nodeset); + machine->complete_nodeset = hwloc_bitmap_dup(obj->complete_nodeset); + topology->levels[0][0] = machine; + parent = machine; + numa_was_root = 1; + + } else if (!hwloc_bitmap_isequal(obj->complete_cpuset, parent->complete_cpuset)) { + /* This NUMA node has a different locality from its parent. + * Don't attach it to this parent, or it well get its parent cpusets. + * Add an intermediate Group with the desired locality. + */ + int needgroup = 1; + hwloc_obj_t sibling; + + sibling = parent->memory_first_child; + if (sibling && !sibling->subtype + && !sibling->next_sibling + && obj->subtype && !strcmp(obj->subtype, "MCDRAM") + && hwloc_bitmap_iszero(obj->complete_cpuset)) { + /* this is KNL MCDRAM, we want to attach it near its DDR sibling */ + needgroup = 0; + } + /* Ideally we would also detect similar cases on future non-KNL platforms with multiple local NUMA nodes. + * That's unlikely to occur with v1.x. + * And we have no way to be sure if this CPU-less node is desired or not. + */ + + if (needgroup + && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { + hwloc_obj_t group = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + group->gp_index = 0; /* will be initialized at the end of the discovery once we know the max */ + group->cpuset = hwloc_bitmap_dup(obj->cpuset); + group->complete_cpuset = hwloc_bitmap_dup(obj->cpuset); + group->nodeset = hwloc_bitmap_dup(obj->nodeset); + group->complete_nodeset = hwloc_bitmap_dup(obj->complete_nodeset); + group->attr->group.kind = HWLOC_GROUP_KIND_MEMORY; + hwloc_insert_object_by_parent(topology, parent, group); + parent = group; + } + } + } + + /* fixup attribute-less caches imported from pre-v2.0 XMLs */ + if (attribute_less_cache) { + assert(obj->type == _HWLOC_OBJ_CACHE_OLD); + obj->type = hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type); + } + + /* fixup Misc objects inserted by cpusets in pre-v2.0 XMLs */ + if (obj->type == HWLOC_OBJ_MISC && obj->cpuset) + obj->type = HWLOC_OBJ_GROUP; + + /* check set consistency. + * 1.7.2 and earlier reported I/O Groups with only a cpuset, we don't want to reject those XMLs yet. + * Ignore those Groups since fixing the missing sets is hard (would need to look at children sets which are not available yet). + * Just abort the XML for non-Groups. + */ + if (!obj->cpuset != !obj->complete_cpuset) { + /* has some cpuset without others */ + if (obj->type == HWLOC_OBJ_GROUP) { + ignored = 1; + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid object %s P#%u with some missing cpusets\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + } else if (!obj->nodeset != !obj->complete_nodeset) { + /* has some nodeset without others */ + if (obj->type == HWLOC_OBJ_GROUP) { + ignored = 1; + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid object %s P#%u with some missing nodesets\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + } else if (obj->nodeset && !obj->cpuset) { + /* has nodesets without cpusets (the contrary is allowed in pre-2.0) */ + if (obj->type == HWLOC_OBJ_GROUP) { + ignored = 1; + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid object %s P#%u with either cpuset or nodeset missing\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + } + /* end of 1.x specific checks */ + } + + /* check that cache attributes are coherent with the actual type */ + if (hwloc__obj_type_is_cache(obj->type) + && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid cache type %s with attribute depth %u and type %d\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->attr->cache.depth, (int) obj->attr->cache.type); + goto error_with_object; + } + + /* check special types vs cpuset */ + if (!obj->cpuset && !hwloc__obj_type_is_special(obj->type)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid normal object %s P#%u without cpuset\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + if (obj->cpuset && hwloc__obj_type_is_special(obj->type)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid special object %s with cpuset\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type)); + goto error_with_object; + } + + /* check parent vs child sets */ + if (obj->cpuset && parent && !parent->cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid object %s P#%u with cpuset while parent has none\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + if (obj->nodeset && parent && !parent->nodeset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid object %s P#%u with nodeset while parent has none\n", + state->global->msgprefix, hwloc_obj_type_string(obj->type), obj->os_index); + goto error_with_object; + } + + /* check NUMA nodes */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (!obj->nodeset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid NUMA node object P#%u without nodeset\n", + state->global->msgprefix, obj->os_index); + goto error_with_object; + } + data->nbnumanodes++; + obj->prev_cousin = data->last_numanode; + obj->next_cousin = NULL; + if (data->last_numanode) + data->last_numanode->next_cousin = obj; + else + data->first_numanode = obj; + data->last_numanode = obj; + } + + if (!hwloc_filter_check_keep_object(topology, obj)) { + /* Ignore this object instead of inserting it. + * + * Well, let the core ignore the root object later + * because we don't know yet if root has more than one child. + */ + if (parent) + ignored = 1; + } + + if (parent && !ignored) { + /* root->parent is NULL, and root is already inserted */ + hwloc_insert_object_by_parent(topology, parent, obj); + /* insert_object_by_parent() doesn't merge during insert, so obj is still valid */ + } + + /* process object subnodes, if we found one win the above loop */ + while (tag) { + int ret; + + if (!strcmp(tag, "object")) { + hwloc_obj_t childobj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_TYPE_MAX, HWLOC_UNKNOWN_INDEX); + childobj->parent = ignored ? parent : obj; + ret = hwloc__xml_import_object(topology, data, ignored ? parent : obj, childobj, + &childrengotignored, + &childstate); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid special object child %s while looking for objects\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + + tag = NULL; + ret = state->global->find_child(state, &childstate, &tag); + if (ret < 0) + goto error; + if (!ret) + break; + } + + if (numa_was_root) { + /* duplicate NUMA infos to root, most of them are likely root-specific */ + unsigned i; + for(i=0; i<obj->infos_count; i++) { + struct hwloc_info_s *info = &obj->infos[i]; + hwloc_obj_add_info(parent, info->name, info->value); + } + /* TODO some infos are root-only (hwlocVersion, ProcessName, etc), remove them from obj? */ + } + + if (ignored) { + /* drop that object, and tell the parent that one child got ignored */ + hwloc_free_unlinked_object(obj); + *gotignored = 1; + + } else if (obj->first_child) { + /* now that all children are inserted, make sure they are in-order, + * so that the core doesn't have to deal with crappy children list. + */ + hwloc_obj_t cur, next; + for(cur = obj->first_child, next = cur->next_sibling; + next; + cur = next, next = next->next_sibling) { + /* If reordering is needed, at least one pair of consecutive children will be out-of-order. + * So just check pairs of consecutive children. + * + * We checked above that complete_cpuset is always set. + */ + if (hwloc_bitmap_compare_first(next->complete_cpuset, cur->complete_cpuset) < 0) { + /* next should be before cur */ + if (!childrengotignored) { + static int reported = 0; + if (!reported && !hwloc_hide_errors()) { + hwloc__xml_import_report_outoforder(topology, next, cur); + reported = 1; + } + } + hwloc__reorder_children(obj); + break; + } + } + /* no need to reorder memory children as long as there are no intermediate memory objects + * that could cause reordering when filtered-out. + */ + } + + return state->global->close_tag(state); + + error_with_object: + if (parent) + /* root->parent is NULL, and root is already inserted. the caller will cleanup that root. */ + hwloc_free_unlinked_object(obj); + error: + return -1; +} + +static int +hwloc__xml_v2import_distances(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; + unsigned nbobjs = 0; + int indexing = 0; + int os_indexing = 0; + int gp_indexing = 0; + unsigned long kind = 0; + unsigned nr_indexes, nr_u64values; + uint64_t *indexes; + uint64_t *u64values; + int ret; + + /* process attributes */ + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "nbobjs")) + nbobjs = strtoul(attrvalue, NULL, 10); + else if (!strcmp(attrname, "type")) { + if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0) + goto out; + } + else if (!strcmp(attrname, "indexing")) { + indexing = 1; + if (!strcmp(attrvalue, "os")) + os_indexing = 1; + else if (!strcmp(attrvalue, "gp")) + gp_indexing = 1; + } + else if (!strcmp(attrname, "kind")) { + kind = strtoul(attrvalue, NULL, 10); + } + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown distance attribute %s\n", + state->global->msgprefix, attrname); + } + } + + /* abort if missing attribute */ + if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 missing some attributes\n", + state->global->msgprefix); + goto out; + } + + indexes = malloc(nbobjs*sizeof(*indexes)); + u64values = malloc(nbobjs*nbobjs*sizeof(*u64values)); + if (!indexes || !u64values) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n", + state->global->msgprefix, nbobjs); + goto out_with_arrays; + } + + /* process children */ + nr_indexes = 0; + nr_u64values = 0; + while (1) { + struct hwloc__xml_import_state_s childstate; + char *attrname, *attrvalue, *tag, *buffer; + int length; + int is_index = 0; + int is_u64values = 0; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0) + break; + + if (!strcmp(tag, "indexes")) + is_index = 1; + else if (!strcmp(tag, "u64values")) + is_u64values = 1; + if (!is_index && !is_u64values) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with unrecognized child %s\n", + state->global->msgprefix, tag); + goto out_with_arrays; + } + + if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0 + || strcmp(attrname, "length")) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 child must have length attribute\n", + state->global->msgprefix); + goto out_with_arrays; + } + length = atoi(attrvalue); + + ret = state->global->get_content(&childstate, &buffer, length); + if (ret < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 child needs content of length %d\n", + state->global->msgprefix, length); + goto out_with_arrays; + } + + if (is_index) { + /* get indexes */ + char *tmp; + if (nr_indexes >= nbobjs) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with more than %u indexes\n", + state->global->msgprefix, nbobjs); + goto out_with_arrays; + } + tmp = buffer; + while (1) { + char *next; + unsigned long long u = strtoull(tmp, &next, 0); + if (next == tmp) + break; + indexes[nr_indexes++] = u; + if (*next != ' ') + break; + if (nr_indexes == nbobjs) + break; + tmp = next+1; + } + + } else if (is_u64values) { + /* get uint64_t values */ + char *tmp; + if (nr_u64values >= nbobjs*nbobjs) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with more than %u u64values\n", + state->global->msgprefix, nbobjs*nbobjs); + goto out_with_arrays; + } + tmp = buffer; + while (1) { + char *next; + unsigned long long u = strtoull(tmp, &next, 0); + if (next == tmp) + break; + u64values[nr_u64values++] = u; + if (*next != ' ') + break; + if (nr_u64values == nbobjs*nbobjs) + break; + tmp = next+1; + } + } + + state->global->close_content(&childstate); + + ret = state->global->close_tag(&childstate); + if (ret < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with more than %u indexes\n", + state->global->msgprefix, nbobjs); + goto out_with_arrays; + } + + state->global->close_child(&childstate); + } + + if (nr_indexes != nbobjs) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with less than %u indexes\n", + state->global->msgprefix, nbobjs); + goto out_with_arrays; + } + if (nr_u64values != nbobjs*nbobjs) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: distance2 with less than %u u64values\n", + state->global->msgprefix, nbobjs*nbobjs); + goto out_with_arrays; + } + + if (nbobjs < 2) { + /* distances with a single object are useless, even if the XML isn't invalid */ + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring distances2 with only %u objects\n", + state->global->msgprefix, nbobjs); + goto out_ignore; + } + if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) { + if (!os_indexing) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n", + state->global->msgprefix); + goto out_ignore; + } + } else { + if (!gp_indexing) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n", + state->global->msgprefix); + goto out_ignore; + } + } + + hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0); + + /* prevent freeing below */ + indexes = NULL; + u64values = NULL; + + out_ignore: + free(indexes); + free(u64values); + return state->global->close_tag(state); + + out_with_arrays: + free(indexes); + free(u64values); + out: + return -1; +} + +static int +hwloc__xml_import_diff_one(hwloc__xml_import_state_t state, + hwloc_topology_diff_t *firstdiffp, + hwloc_topology_diff_t *lastdiffp) +{ + char *type_s = NULL; + char *obj_depth_s = NULL; + char *obj_index_s = NULL; + char *obj_attr_type_s = NULL; +/* char *obj_attr_index_s = NULL; unused for now */ + char *obj_attr_name_s = NULL; + char *obj_attr_oldvalue_s = NULL; + char *obj_attr_newvalue_s = NULL; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "type")) + type_s = attrvalue; + else if (!strcmp(attrname, "obj_depth")) + obj_depth_s = attrvalue; + else if (!strcmp(attrname, "obj_index")) + obj_index_s = attrvalue; + else if (!strcmp(attrname, "obj_attr_type")) + obj_attr_type_s = attrvalue; + else if (!strcmp(attrname, "obj_attr_index")) + { /* obj_attr_index_s = attrvalue; unused for now */ } + else if (!strcmp(attrname, "obj_attr_name")) + obj_attr_name_s = attrvalue; + else if (!strcmp(attrname, "obj_attr_oldvalue")) + obj_attr_oldvalue_s = attrvalue; + else if (!strcmp(attrname, "obj_attr_newvalue")) + obj_attr_newvalue_s = attrvalue; + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown diff attribute %s\n", + state->global->msgprefix, attrname); + return -1; + } + } + + if (type_s) { + switch (atoi(type_s)) { + default: + break; + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: { + /* object attribute diff */ + hwloc_topology_diff_obj_attr_type_t obj_attr_type; + hwloc_topology_diff_t diff; + + /* obj_attr mandatory generic attributes */ + if (!obj_depth_s || !obj_index_s || !obj_attr_type_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: missing mandatory obj attr generic attributes\n", + state->global->msgprefix); + break; + } + + /* obj_attr mandatory attributes common to all subtypes */ + if (!obj_attr_oldvalue_s || !obj_attr_newvalue_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: missing mandatory obj attr value attributes\n", + state->global->msgprefix); + break; + } + + /* mandatory attributes for obj_attr_info subtype */ + obj_attr_type = atoi(obj_attr_type_s); + if (obj_attr_type == HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO && !obj_attr_name_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: missing mandatory obj attr info name attribute\n", + state->global->msgprefix); + break; + } + + /* now we know we have everything we need */ + diff = malloc(sizeof(*diff)); + if (!diff) + return -1; + diff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR; + diff->obj_attr.obj_depth = atoi(obj_depth_s); + diff->obj_attr.obj_index = atoi(obj_index_s); + memset(&diff->obj_attr.diff, 0, sizeof(diff->obj_attr.diff)); + diff->obj_attr.diff.generic.type = obj_attr_type; + + switch (atoi(obj_attr_type_s)) { + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE: + diff->obj_attr.diff.uint64.oldvalue = strtoull(obj_attr_oldvalue_s, NULL, 0); + diff->obj_attr.diff.uint64.newvalue = strtoull(obj_attr_newvalue_s, NULL, 0); + break; + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: + diff->obj_attr.diff.string.name = strdup(obj_attr_name_s); + /* FALLTHRU */ + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME: + diff->obj_attr.diff.string.oldvalue = strdup(obj_attr_oldvalue_s); + diff->obj_attr.diff.string.newvalue = strdup(obj_attr_newvalue_s); + break; + } + + if (*firstdiffp) + (*lastdiffp)->generic.next = diff; + else + *firstdiffp = diff; + *lastdiffp = diff; + diff->generic.next = NULL; + } + } + } + + return state->global->close_tag(state); +} + +int +hwloc__xml_import_diff(hwloc__xml_import_state_t state, + hwloc_topology_diff_t *firstdiffp) +{ + hwloc_topology_diff_t firstdiff = NULL, lastdiff = NULL; + *firstdiffp = NULL; + + while (1) { + struct hwloc__xml_import_state_s childstate; + char *tag; + int ret; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret < 0) + return -1; + if (!ret) + break; + + if (!strcmp(tag, "diff")) { + ret = hwloc__xml_import_diff_one(&childstate, &firstdiff, &lastdiff); + } else + ret = -1; + + if (ret < 0) + return ret; + + state->global->close_child(&childstate); + } + + *firstdiffp = firstdiff; + return 0; +} + +/*********************************** + ********* main XML import ********* + ***********************************/ + +static void +hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, float *floats, uint64_t *u64s) +{ + unsigned i; + int is_uint; + char *env; + float scale = 1000.f; + char scalestring[20]; + + env = getenv("HWLOC_XML_V1DIST_SCALE"); + if (env) { + scale = (float) atof(env); + goto scale; + } + + is_uint = 1; + /* find out if all values are integers */ + for(i=0; i<nbobjs*nbobjs; i++) { + float f, iptr, fptr; + f = floats[i]; + if (f < 0.f) { + is_uint = 0; + break; + } + fptr = modff(f, &iptr); + if (fptr > .001f && fptr < .999f) { + is_uint = 0; + break; + } + u64s[i] = (int)(f+.5f); + } + if (is_uint) + return; + + scale: + /* TODO heuristic to find a good scale */ + for(i=0; i<nbobjs*nbobjs; i++) + u64s[i] = (uint64_t)(scale * floats[i]); + + /* save the scale in root info attrs. + * Not perfect since we may have multiple of them, + * and some distances might disappear in case of restrict, etc. + */ + sprintf(scalestring, "%f", scale); + hwloc_obj_add_info(hwloc_get_root_obj(topology), "xmlv1DistancesScale", scalestring); +} + +/* this canNOT be the first XML call */ +static int +hwloc_look_xml(struct hwloc_backend *backend) +{ + struct hwloc_topology *topology = backend->topology; + struct hwloc_xml_backend_data_s *data = backend->private_data; + struct hwloc__xml_import_state_s state, childstate; + struct hwloc_obj *root = topology->levels[0][0]; + char *tag; + int gotignored = 0; + hwloc_localeswitch_declare; + char *env; + int ret; + + state.global = data; + + assert(!root->cpuset); + + hwloc_localeswitch_init(); + + data->nbnumanodes = 0; + data->first_numanode = data->last_numanode = NULL; + data->first_v1dist = data->last_v1dist = NULL; + + env = getenv("HWLOC_DONT_MERGE_DIE_GROUPS"); + data->dont_merge_die_groups = env && atoi(env); + + ret = data->look_init(data, &state); + if (ret < 0) + goto failed; + + if (data->version_major > 2) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n", + data->msgprefix, data->version_major, data->version_minor); + goto err; + } + + /* find root object tag and import it */ + ret = state.global->find_child(&state, &childstate, &tag); + if (ret < 0 || !ret || strcmp(tag, "object")) + goto failed; + ret = hwloc__xml_import_object(topology, data, NULL /* no parent */, root, + &gotignored, + &childstate); + if (ret < 0) + goto failed; + state.global->close_child(&childstate); + assert(!gotignored); + + /* the root may have changed if we had to reinsert a Machine */ + root = topology->levels[0][0]; + + if (data->version_major >= 2) { + /* find v2 distances */ + while (1) { + ret = state.global->find_child(&state, &childstate, &tag); + if (ret < 0) + goto failed; + if (!ret) + break; + if (strcmp(tag, "distances2")) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n", + data->msgprefix, tag); + goto done; + } + ret = hwloc__xml_v2import_distances(topology, &childstate); + if (ret < 0) + goto failed; + state.global->close_child(&childstate); + } + } + + /* find end of topology tag */ + state.global->close_tag(&state); + +done: + if (!root->cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid root object without cpuset\n", + data->msgprefix); + goto err; + } + + /* update pre-v2.0 memory group gp_index */ + if (data->version_major < 2 && data->first_numanode) { + hwloc_obj_t node = data->first_numanode; + do { + if (node->parent->type == HWLOC_OBJ_GROUP + && !node->parent->gp_index) + node->parent->gp_index = topology->next_gp_index++; + node = node->next_cousin; + } while (node); + } + + if (data->version_major < 2 && data->first_v1dist) { + /* handle v1 distances */ + struct hwloc__xml_imported_v1distances_s *v1dist, *v1next = data->first_v1dist; + while ((v1dist = v1next) != NULL) { + unsigned nbobjs = v1dist->nbobjs; + v1next = v1dist->next; + /* Handle distances as NUMA node distances if nbobjs matches. + * Otherwise drop, only NUMA distances really matter. + * + * We could also attach to a random level with the right nbobjs, + * but it would require to have those objects in the original XML order (like the first_numanode cousin-list). + * because the topology order can be different if some parents are ignored during load. + */ + if (nbobjs == data->nbnumanodes) { + hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t)); + uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values)); + if (objs && values) { + hwloc_obj_t node; + unsigned i; + for(i=0, node = data->first_numanode; + i<nbobjs; + i++, node = node->next_cousin) + objs[i] = node; +hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); + hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0); + } else { + free(objs); + free(values); + } + } + free(v1dist->floats); + free(v1dist); + } + data->first_v1dist = data->last_v1dist = NULL; + } + + /* FIXME: + * We should check that the existing object sets are consistent: + * no intersection between objects of a same level, + * object sets included in parent sets. + * hwloc never generated such buggy XML, but users could create one. + * + * We want to add these checks to the existing core code that + * adds missing sets and propagates parent/children sets + * (in case another backend ever generates buggy object sets as well). + */ + + if (data->version_major >= 2) { + /* v2 must have non-empty nodesets since at least one NUMA node is required */ + if (!root->nodeset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid root object without nodeset\n", + data->msgprefix); + goto err; + } + if (hwloc_bitmap_iszero(root->nodeset)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: invalid root object with empty nodeset\n", + data->msgprefix); + goto err; + } + } else { + /* if v1 without nodeset, the core will add a default NUMA node and nodesets */ + } + + /* allocate default cpusets and nodesets if missing, the core will restrict them */ + hwloc_alloc_root_sets(root); + + /* keep the "Backend" information intact */ + /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ + + topology->support.discovery->pu = 1; + if (data->nbnumanodes) { + topology->support.discovery->numa = 1; + topology->support.discovery->numa_memory = 1; // FIXME + } + + if (data->look_done) + data->look_done(data, 0); + + hwloc_localeswitch_fini(); + return 0; + + failed: + if (data->look_done) + data->look_done(data, -1); + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: XML component discovery failed.\n", + data->msgprefix); + err: + hwloc_free_object_siblings_and_children(root->first_child); + root->first_child = NULL; + hwloc_free_object_siblings_and_children(root->memory_first_child); + root->memory_first_child = NULL; + hwloc_free_object_siblings_and_children(root->io_first_child); + root->io_first_child = NULL; + hwloc_free_object_siblings_and_children(root->misc_first_child); + root->misc_first_child = NULL; + + /* make sure the core will abort */ + if (root->cpuset) + hwloc_bitmap_zero(root->cpuset); + if (root->nodeset) + hwloc_bitmap_zero(root->nodeset); + + hwloc_localeswitch_fini(); + return -1; +} + +/* this can be the first XML call */ +int +hwloc_topology_diff_load_xml(const char *xmlpath, + hwloc_topology_diff_t *firstdiffp, char **refnamep) +{ + struct hwloc__xml_import_state_s state; + struct hwloc_xml_backend_data_s fakedata; /* only for storing global info during parsing */ + hwloc_localeswitch_declare; + const char *local_basename; + int force_nolibxml; + int ret; + + state.global = &fakedata; + + local_basename = strrchr(xmlpath, '/'); + if (local_basename) + local_basename++; + else + local_basename = xmlpath; + fakedata.msgprefix = strdup(local_basename); + + hwloc_components_init(); + assert(hwloc_nolibxml_callbacks); + + hwloc_localeswitch_init(); + + *firstdiffp = NULL; + + force_nolibxml = hwloc_nolibxml_import(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->import_diff(&state, xmlpath, NULL, 0, firstdiffp, refnamep); + else { + ret = hwloc_libxml_callbacks->import_diff(&state, xmlpath, NULL, 0, firstdiffp, refnamep); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + hwloc_localeswitch_fini(); + hwloc_components_fini(); + free(fakedata.msgprefix); + return ret; +} + +/* this can be the first XML call */ +int +hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int buflen, + hwloc_topology_diff_t *firstdiffp, char **refnamep) +{ + struct hwloc__xml_import_state_s state; + struct hwloc_xml_backend_data_s fakedata; /* only for storing global info during parsing */ + hwloc_localeswitch_declare; + int force_nolibxml; + int ret; + + state.global = &fakedata; + fakedata.msgprefix = strdup("xmldiffbuffer"); + + hwloc_components_init(); + assert(hwloc_nolibxml_callbacks); + + hwloc_localeswitch_init(); + + *firstdiffp = NULL; + + force_nolibxml = hwloc_nolibxml_import(); + retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->import_diff(&state, NULL, xmlbuffer, buflen, firstdiffp, refnamep); + else { + ret = hwloc_libxml_callbacks->import_diff(&state, NULL, xmlbuffer, buflen, firstdiffp, refnamep); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + hwloc_localeswitch_fini(); + hwloc_components_fini(); + free(fakedata.msgprefix); + return ret; +} + +/************************************************ + ********* XML export (common routines) ********* + ************************************************/ + +#define HWLOC_XML_CHAR_VALID(c) (((c) >= 32 && (c) <= 126) || (c) == '\t' || (c) == '\n' || (c) == '\r') + +static int +hwloc__xml_export_check_buffer(const char *buf, size_t length) +{ + unsigned i; + for(i=0; i<length; i++) + if (!HWLOC_XML_CHAR_VALID(buf[i])) + return -1; + return 0; +} + +/* strdup and remove ugly chars from random string */ +static char* +hwloc__xml_export_safestrdup(const char *old) +{ + char *new = malloc(strlen(old)+1); + char *dst = new; + const char *src = old; + while (*src) { + if (HWLOC_XML_CHAR_VALID(*src)) + *(dst++) = *src; + src++; + } + *dst = '\0'; + return new; +} + +static void +hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) +{ + char *setstring = NULL, *setstring2 = NULL; + char tmp[255]; + int v1export = flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1; + unsigned i,j; + + if (v1export && obj->type == HWLOC_OBJ_PACKAGE) + state->new_prop(state, "type", "Socket"); + else if (v1export && hwloc__obj_type_is_cache(obj->type)) + state->new_prop(state, "type", "Cache"); + else + state->new_prop(state, "type", hwloc_obj_type_string(obj->type)); + + if (obj->os_index != HWLOC_UNKNOWN_INDEX) { + sprintf(tmp, "%u", obj->os_index); + state->new_prop(state, "os_index", tmp); + } + + if (obj->cpuset) { + if (v1export && obj->type == HWLOC_OBJ_NUMANODE && obj->sibling_rank > 0) { + /* v1 non-first NUMA nodes have empty cpusets */ + state->new_prop(state, "cpuset", "0x0"); + state->new_prop(state, "online_cpuset", "0x0"); + state->new_prop(state, "complete_cpuset", "0x0"); + state->new_prop(state, "allowed_cpuset", "0x0"); + + } else { + /* normal case */ + hwloc_bitmap_asprintf(&setstring, obj->cpuset); + state->new_prop(state, "cpuset", setstring); + + hwloc_bitmap_asprintf(&setstring2, obj->complete_cpuset); + state->new_prop(state, "complete_cpuset", setstring2); + free(setstring2); + + if (v1export) + state->new_prop(state, "online_cpuset", setstring); + free(setstring); + + if (v1export || !obj->parent) { + hwloc_bitmap_t allowed_cpuset = hwloc_bitmap_dup(obj->cpuset); + hwloc_bitmap_and(allowed_cpuset, allowed_cpuset, topology->allowed_cpuset); + hwloc_bitmap_asprintf(&setstring, allowed_cpuset); + state->new_prop(state, "allowed_cpuset", setstring); + free(setstring); + hwloc_bitmap_free(allowed_cpuset); + } + } + + /* If exporting v1, we should clear second local NUMA bits from nodeset, + * but the importer will clear them anyway. + */ + hwloc_bitmap_asprintf(&setstring, obj->nodeset); + state->new_prop(state, "nodeset", setstring); + free(setstring); + + hwloc_bitmap_asprintf(&setstring, obj->complete_nodeset); + state->new_prop(state, "complete_nodeset", setstring); + free(setstring); + + if (v1export || !obj->parent) { + hwloc_bitmap_t allowed_nodeset = hwloc_bitmap_dup(obj->nodeset); + hwloc_bitmap_and(allowed_nodeset, allowed_nodeset, topology->allowed_nodeset); + hwloc_bitmap_asprintf(&setstring, allowed_nodeset); + state->new_prop(state, "allowed_nodeset", setstring); + free(setstring); + hwloc_bitmap_free(allowed_nodeset); + } + } + + if (!v1export) { + sprintf(tmp, "%llu", (unsigned long long) obj->gp_index); + state->new_prop(state, "gp_index", tmp); + } + + if (obj->name) { + char *name = hwloc__xml_export_safestrdup(obj->name); + state->new_prop(state, "name", name); + free(name); + } + if (!v1export && obj->subtype) { + char *subtype = hwloc__xml_export_safestrdup(obj->subtype); + state->new_prop(state, "subtype", subtype); + free(subtype); + } + + switch (obj->type) { + case HWLOC_OBJ_NUMANODE: + if (obj->attr->numanode.local_memory) { + sprintf(tmp, "%llu", (unsigned long long) obj->attr->numanode.local_memory); + state->new_prop(state, "local_memory", tmp); + } + for(i=0; i<obj->attr->numanode.page_types_len; i++) { + struct hwloc__xml_export_state_s childstate; + state->new_child(state, &childstate, "page_type"); + sprintf(tmp, "%llu", (unsigned long long) obj->attr->numanode.page_types[i].size); + childstate.new_prop(&childstate, "size", tmp); + sprintf(tmp, "%llu", (unsigned long long) obj->attr->numanode.page_types[i].count); + childstate.new_prop(&childstate, "count", tmp); + childstate.end_object(&childstate, "page_type"); + } + break; + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size); + state->new_prop(state, "cache_size", tmp); + sprintf(tmp, "%u", obj->attr->cache.depth); + state->new_prop(state, "depth", tmp); + sprintf(tmp, "%u", (unsigned) obj->attr->cache.linesize); + state->new_prop(state, "cache_linesize", tmp); + sprintf(tmp, "%d", obj->attr->cache.associativity); + state->new_prop(state, "cache_associativity", tmp); + sprintf(tmp, "%d", (int) obj->attr->cache.type); + state->new_prop(state, "cache_type", tmp); + break; + case HWLOC_OBJ_GROUP: + if (v1export) { + sprintf(tmp, "%u", obj->attr->group.depth); + state->new_prop(state, "depth", tmp); + if (obj->attr->group.dont_merge) + state->new_prop(state, "dont_merge", "1"); + } else { + sprintf(tmp, "%u", obj->attr->group.kind); + state->new_prop(state, "kind", tmp); + sprintf(tmp, "%u", obj->attr->group.subkind); + state->new_prop(state, "subkind", tmp); + if (obj->attr->group.dont_merge) + state->new_prop(state, "dont_merge", "1"); + } + break; + case HWLOC_OBJ_BRIDGE: + sprintf(tmp, "%d-%d", (int) obj->attr->bridge.upstream_type, (int) obj->attr->bridge.downstream_type); + state->new_prop(state, "bridge_type", tmp); + sprintf(tmp, "%u", obj->attr->bridge.depth); + state->new_prop(state, "depth", tmp); + if (obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) { + sprintf(tmp, "%04x:[%02x-%02x]", + (unsigned) obj->attr->bridge.downstream.pci.domain, + (unsigned) obj->attr->bridge.downstream.pci.secondary_bus, + (unsigned) obj->attr->bridge.downstream.pci.subordinate_bus); + state->new_prop(state, "bridge_pci", tmp); + } + if (obj->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI) + break; + /* FALLTHRU */ + case HWLOC_OBJ_PCI_DEVICE: + sprintf(tmp, "%04x:%02x:%02x.%01x", + (unsigned) obj->attr->pcidev.domain, + (unsigned) obj->attr->pcidev.bus, + (unsigned) obj->attr->pcidev.dev, + (unsigned) obj->attr->pcidev.func); + state->new_prop(state, "pci_busid", tmp); + sprintf(tmp, "%04x [%04x:%04x] [%04x:%04x] %02x", + (unsigned) obj->attr->pcidev.class_id, + (unsigned) obj->attr->pcidev.vendor_id, (unsigned) obj->attr->pcidev.device_id, + (unsigned) obj->attr->pcidev.subvendor_id, (unsigned) obj->attr->pcidev.subdevice_id, + (unsigned) obj->attr->pcidev.revision); + state->new_prop(state, "pci_type", tmp); + sprintf(tmp, "%f", obj->attr->pcidev.linkspeed); + state->new_prop(state, "pci_link_speed", tmp); + break; + case HWLOC_OBJ_OS_DEVICE: + sprintf(tmp, "%d", (int) obj->attr->osdev.type); + state->new_prop(state, "osdev_type", tmp); + break; + default: + break; + } + + for(i=0; i<obj->infos_count; i++) { + char *name = hwloc__xml_export_safestrdup(obj->infos[i].name); + char *value = hwloc__xml_export_safestrdup(obj->infos[i].value); + struct hwloc__xml_export_state_s childstate; + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", name); + childstate.new_prop(&childstate, "value", value); + childstate.end_object(&childstate, "info"); + free(name); + free(value); + } + if (v1export && obj->subtype) { + char *subtype = hwloc__xml_export_safestrdup(obj->subtype); + struct hwloc__xml_export_state_s childstate; + int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); + childstate.new_prop(&childstate, "value", subtype); + childstate.end_object(&childstate, "info"); + free(subtype); + } + + if (v1export && !obj->parent) { + /* only latency matrices covering the entire machine can be exported to v1 */ + struct hwloc_internal_distances_s *dist; + /* refresh distances since we need objects below */ + hwloc_internal_distances_refresh(topology); + for(dist = topology->first_dist; dist; dist = dist->next) { + struct hwloc__xml_export_state_s childstate; + unsigned nbobjs = dist->nbobjs; + int depth; + + if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type)) + continue; + if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) + continue; + { + HWLOC_VLA(unsigned, logical_to_v2array, nbobjs); + for(i=0; i<nbobjs; i++) + logical_to_v2array[dist->objs[i]->logical_index] = i; + + /* compute the relative depth */ + if (dist->type == HWLOC_OBJ_NUMANODE) { + /* for NUMA nodes, use the highest normal-parent depth + 1 */ + depth = -1; + for(i=0; i<nbobjs; i++) { + hwloc_obj_t parent = dist->objs[i]->parent; + while (hwloc__obj_type_is_memory(parent->type)) + parent = parent->parent; + if (parent->depth+1 > depth) + depth = parent->depth+1; + } + } else { + /* for non-NUMA nodes, increase the object depth if any of them has memory above */ + int parent_with_memory = 0; + for(i=0; i<nbobjs; i++) { + hwloc_obj_t parent = dist->objs[i]->parent; + while (parent) { + if (parent->memory_first_child) { + parent_with_memory = 1; + goto done; + } + parent = parent->parent; + } + } + done: + depth = hwloc_get_type_depth(topology, dist->type) + parent_with_memory; + } + + state->new_child(state, &childstate, "distances"); + sprintf(tmp, "%u", nbobjs); + childstate.new_prop(&childstate, "nbobjs", tmp); + sprintf(tmp, "%d", depth); + childstate.new_prop(&childstate, "relative_depth", tmp); + sprintf(tmp, "%f", 1.f); + childstate.new_prop(&childstate, "latency_base", tmp); + for(i=0; i<nbobjs; i++) { + for(j=0; j<nbobjs; j++) { + /* we should export i*nbobjs+j, we translate using logical_to_v2array[] */ + unsigned k = logical_to_v2array[i]*nbobjs+logical_to_v2array[j]; + struct hwloc__xml_export_state_s greatchildstate; + childstate.new_child(&childstate, &greatchildstate, "latency"); + sprintf(tmp, "%f", (float) dist->values[k]); + greatchildstate.new_prop(&greatchildstate, "value", tmp); + greatchildstate.end_object(&greatchildstate, "latency"); + } + } + childstate.end_object(&childstate, "distances"); + } + } + } + + if (obj->userdata && topology->userdata_export_cb) + topology->userdata_export_cb((void*) state, topology, obj); +} + +static void +hwloc__xml_v2export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) +{ + struct hwloc__xml_export_state_s state; + hwloc_obj_t child; + + parentstate->new_child(parentstate, &state, "object"); + + hwloc__xml_export_object_contents(&state, topology, obj, flags); + + for_each_memory_child(child, obj) + hwloc__xml_v2export_object (&state, topology, child, flags); + for_each_child(child, obj) + hwloc__xml_v2export_object (&state, topology, child, flags); + for_each_io_child(child, obj) + hwloc__xml_v2export_object (&state, topology, child, flags); + for_each_misc_child(child, obj) + hwloc__xml_v2export_object (&state, topology, child, flags); + + state.end_object(&state, "object"); +} + +static void +hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags); + +static void +hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) +{ + struct hwloc__xml_export_state_s gstate, mstate, ostate, *state = parentstate; + hwloc_obj_t child; + + if (obj->parent->arity > 1 && obj->memory_arity > 1 && parentstate->global->v1_memory_group) { + /* child has sibling, we must add a Group around those memory children */ + hwloc_obj_t group = parentstate->global->v1_memory_group; + parentstate->new_child(parentstate, &gstate, "object"); + group->cpuset = obj->cpuset; + group->complete_cpuset = obj->complete_cpuset; + group->nodeset = obj->nodeset; + group->complete_nodeset = obj->complete_nodeset; + hwloc__xml_export_object_contents (&gstate, topology, group, flags); + group->cpuset = NULL; + group->complete_cpuset = NULL; + group->nodeset = NULL; + group->complete_nodeset = NULL; + state = &gstate; + } + + /* export first memory child */ + child = obj->memory_first_child; + assert(child->type == HWLOC_OBJ_NUMANODE); + state->new_child(state, &mstate, "object"); + hwloc__xml_export_object_contents (&mstate, topology, child, flags); + + /* then the actual object */ + mstate.new_child(&mstate, &ostate, "object"); + hwloc__xml_export_object_contents (&ostate, topology, obj, flags); + + /* then its normal/io/misc children */ + for_each_child(child, obj) + hwloc__xml_v1export_object (&ostate, topology, child, flags); + for_each_io_child(child, obj) + hwloc__xml_v1export_object (&ostate, topology, child, flags); + for_each_misc_child(child, obj) + hwloc__xml_v1export_object (&ostate, topology, child, flags); + + /* close object and first memory child */ + ostate.end_object(&ostate, "object"); + mstate.end_object(&mstate, "object"); + + /* now other memory children */ + for_each_memory_child(child, obj) + if (child->sibling_rank > 0) + hwloc__xml_v1export_object (state, topology, child, flags); + + if (state == &gstate) { + /* close group if any */ + gstate.end_object(&gstate, "object"); + } +} + +static void +hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags) +{ + struct hwloc__xml_export_state_s state; + hwloc_obj_t child; + + parentstate->new_child(parentstate, &state, "object"); + + hwloc__xml_export_object_contents(&state, topology, obj, flags); + + for_each_child(child, obj) { + if (!child->memory_arity) { + /* no memory child, just export normally */ + hwloc__xml_v1export_object (&state, topology, child, flags); + } else { + hwloc__xml_v1export_object_with_memory(&state, topology, child, flags); + } + } + + for_each_io_child(child, obj) + hwloc__xml_v1export_object (&state, topology, child, flags); + for_each_misc_child(child, obj) + hwloc__xml_v1export_object (&state, topology, child, flags); + + state.end_object(&state, "object"); +} + +#define EXPORT_ARRAY(state, type, nr, values, tagname, format, maxperline) do { \ + unsigned _i = 0; \ + while (_i<(nr)) { \ + char _tmp[255]; /* enough for (snprintf(format)+space) x maxperline */ \ + char _tmp2[16]; \ + size_t _len = 0; \ + unsigned _j; \ + struct hwloc__xml_export_state_s _childstate; \ + (state)->new_child(state, &_childstate, tagname); \ + for(_j=0; \ + _i+_j<(nr) && _j<maxperline; \ + _j++) \ + _len += sprintf(_tmp+_len, format " ", (type) (values)[_i+_j]); \ + _i += _j; \ + sprintf(_tmp2, "%lu", (unsigned long) _len); \ + _childstate.new_prop(&_childstate, "length", _tmp2); \ + _childstate.add_content(&_childstate, _tmp, _len); \ + _childstate.end_object(&_childstate, tagname); \ + } \ +} while (0) + +static void +hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) +{ + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) { + char tmp[255]; + unsigned nbobjs = dist->nbobjs; + struct hwloc__xml_export_state_s state; + + parentstate->new_child(parentstate, &state, "distances2"); + + state.new_prop(&state, "type", hwloc_obj_type_string(dist->type)); + sprintf(tmp, "%u", nbobjs); + state.new_prop(&state, "nbobjs", tmp); + sprintf(tmp, "%lu", dist->kind); + state.new_prop(&state, "kind", tmp); + + state.new_prop(&state, "indexing", + (dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp"); + /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ + EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); + EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); + state.end_object(&state, "distances2"); + } +} + +void +hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t topology, unsigned long flags) +{ + hwloc_obj_t root = hwloc_get_root_obj(topology); + + if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { + if (root->memory_first_child) { + /* we don't use hwloc__xml_v1export_object_with_memory() because we want/can keep root above the numa node */ + struct hwloc__xml_export_state_s rstate, mstate; + hwloc_obj_t child; + /* export the root */ + state->new_child(state, &rstate, "object"); + hwloc__xml_export_object_contents (&rstate, topology, root, flags); + /* export first memory child */ + child = root->memory_first_child; + assert(child->type == HWLOC_OBJ_NUMANODE); + rstate.new_child(&rstate, &mstate, "object"); + hwloc__xml_export_object_contents (&mstate, topology, child, flags); + /* then its normal/io/misc children */ + for_each_child(child, root) + hwloc__xml_v1export_object (&mstate, topology, child, flags); + for_each_io_child(child, root) + hwloc__xml_v1export_object (&mstate, topology, child, flags); + for_each_misc_child(child, root) + hwloc__xml_v1export_object (&mstate, topology, child, flags); + /* close first memory child */ + mstate.end_object(&mstate, "object"); + /* now other memory children */ + for_each_memory_child(child, root) + if (child->sibling_rank > 0) + hwloc__xml_v1export_object (&rstate, topology, child, flags); + /* close the root */ + rstate.end_object(&rstate, "object"); + } else { + hwloc__xml_v1export_object(state, topology, root, flags); + } + + } else { + hwloc__xml_v2export_object (state, topology, root, flags); + hwloc__xml_v2export_distances (state, topology); + } +} + +void +hwloc__xml_export_diff(hwloc__xml_export_state_t parentstate, hwloc_topology_diff_t diff) +{ + while (diff) { + struct hwloc__xml_export_state_s state; + char tmp[255]; + + parentstate->new_child(parentstate, &state, "diff"); + + sprintf(tmp, "%d", (int) diff->generic.type); + state.new_prop(&state, "type", tmp); + + switch (diff->generic.type) { + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: + sprintf(tmp, "%d", diff->obj_attr.obj_depth); + state.new_prop(&state, "obj_depth", tmp); + sprintf(tmp, "%u", diff->obj_attr.obj_index); + state.new_prop(&state, "obj_index", tmp); + + sprintf(tmp, "%d", (int) diff->obj_attr.diff.generic.type); + state.new_prop(&state, "obj_attr_type", tmp); + + switch (diff->obj_attr.diff.generic.type) { + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE: + sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.index); + state.new_prop(&state, "obj_attr_index", tmp); + sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.oldvalue); + state.new_prop(&state, "obj_attr_oldvalue", tmp); + sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.newvalue); + state.new_prop(&state, "obj_attr_newvalue", tmp); + break; + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME: + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: + if (diff->obj_attr.diff.string.name) + state.new_prop(&state, "obj_attr_name", diff->obj_attr.diff.string.name); + state.new_prop(&state, "obj_attr_oldvalue", diff->obj_attr.diff.string.oldvalue); + state.new_prop(&state, "obj_attr_newvalue", diff->obj_attr.diff.string.newvalue); + break; + } + + break; + default: + assert(0); + } + state.end_object(&state, "diff"); + + diff = diff->generic.next; + } +} + +/********************************** + ********* main XML export ******** + **********************************/ + +/* this can be the first XML call */ +int hwloc_topology_export_xml(hwloc_topology_t topology, const char *filename, unsigned long flags) +{ + hwloc_localeswitch_declare; + struct hwloc__xml_export_data_s edata; + int force_nolibxml; + int ret; + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */ + + if (flags & ~HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { + errno = EINVAL; + return -1; + } + + hwloc_internal_distances_refresh(topology); + + hwloc_localeswitch_init(); + + edata.v1_memory_group = NULL; + if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) + /* temporary group to be used during v1 export of memory children */ + edata.v1_memory_group = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + + force_nolibxml = hwloc_nolibxml_export(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->export_file(topology, &edata, filename, flags); + else { + ret = hwloc_libxml_callbacks->export_file(topology, &edata, filename, flags); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + if (edata.v1_memory_group) + hwloc_free_unlinked_object(edata.v1_memory_group); + + hwloc_localeswitch_fini(); + return ret; +} + +/* this can be the first XML call */ +int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen, unsigned long flags) +{ + hwloc_localeswitch_declare; + struct hwloc__xml_export_data_s edata; + int force_nolibxml; + int ret; + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */ + + if (flags & ~HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { + errno = EINVAL; + return -1; + } + + hwloc_internal_distances_refresh(topology); + + hwloc_localeswitch_init(); + + edata.v1_memory_group = NULL; + if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) + /* temporary group to be used during v1 export of memory children */ + edata.v1_memory_group = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + + force_nolibxml = hwloc_nolibxml_export(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->export_buffer(topology, &edata, xmlbuffer, buflen, flags); + else { + ret = hwloc_libxml_callbacks->export_buffer(topology, &edata, xmlbuffer, buflen, flags); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + if (edata.v1_memory_group) + hwloc_free_unlinked_object(edata.v1_memory_group); + + hwloc_localeswitch_fini(); + return ret; +} + +/* this can be the first XML call */ +int +hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, + const char *filename) +{ + hwloc_localeswitch_declare; + hwloc_topology_diff_t tmpdiff; + int force_nolibxml; + int ret; + + tmpdiff = diff; + while (tmpdiff) { + if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) { + errno = EINVAL; + return -1; + } + tmpdiff = tmpdiff->generic.next; + } + + hwloc_components_init(); + assert(hwloc_nolibxml_callbacks); + + hwloc_localeswitch_init(); + + force_nolibxml = hwloc_nolibxml_export(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->export_diff_file(diff, refname, filename); + else { + ret = hwloc_libxml_callbacks->export_diff_file(diff, refname, filename); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + hwloc_localeswitch_fini(); + hwloc_components_fini(); + return ret; +} + +/* this can be the first XML call */ +int +hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, + char **xmlbuffer, int *buflen) +{ + hwloc_localeswitch_declare; + hwloc_topology_diff_t tmpdiff; + int force_nolibxml; + int ret; + + tmpdiff = diff; + while (tmpdiff) { + if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) { + errno = EINVAL; + return -1; + } + tmpdiff = tmpdiff->generic.next; + } + + hwloc_components_init(); + assert(hwloc_nolibxml_callbacks); + + hwloc_localeswitch_init(); + + force_nolibxml = hwloc_nolibxml_export(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + ret = hwloc_nolibxml_callbacks->export_diff_buffer(diff, refname, xmlbuffer, buflen); + else { + ret = hwloc_libxml_callbacks->export_diff_buffer(diff, refname, xmlbuffer, buflen); + if (ret < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + + hwloc_localeswitch_fini(); + hwloc_components_fini(); + return ret; +} + +void hwloc_free_xmlbuffer(hwloc_topology_t topology __hwloc_attribute_unused, char *xmlbuffer) +{ + int force_nolibxml; + + assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */ + + force_nolibxml = hwloc_nolibxml_export(); + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + hwloc_nolibxml_callbacks->free_buffer(xmlbuffer); + else + hwloc_libxml_callbacks->free_buffer(xmlbuffer); +} + +void +hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology, + void (*export)(void *reserved, struct hwloc_topology *topology, struct hwloc_obj *obj)) +{ + topology->userdata_export_cb = export; +} + +static void +hwloc__export_obj_userdata(hwloc__xml_export_state_t parentstate, int encoded, + const char *name, size_t length, const void *buffer, size_t encoded_length) +{ + struct hwloc__xml_export_state_s state; + char tmp[255]; + parentstate->new_child(parentstate, &state, "userdata"); + if (name) + state.new_prop(&state, "name", name); + sprintf(tmp, "%lu", (unsigned long) length); + state.new_prop(&state, "length", tmp); + if (encoded) + state.new_prop(&state, "encoding", "base64"); + if (encoded_length) + state.add_content(&state, buffer, encoded ? encoded_length : length); + state.end_object(&state, "userdata"); +} + +int +hwloc_export_obj_userdata(void *reserved, + struct hwloc_topology *topology, struct hwloc_obj *obj __hwloc_attribute_unused, + const char *name, const void *buffer, size_t length) +{ + hwloc__xml_export_state_t state = reserved; + + if (!buffer) { + errno = EINVAL; + return -1; + } + + if ((name && hwloc__xml_export_check_buffer(name, strlen(name)) < 0) + || hwloc__xml_export_check_buffer(buffer, length) < 0) { + errno = EINVAL; + return -1; + } + + if (topology->userdata_not_decoded) { + int encoded; + size_t encoded_length; + const char *realname; + if (!strncmp(name, "base64", 6)) { + encoded = 1; + encoded_length = BASE64_ENCODED_LENGTH(length); + } else { + assert(!strncmp(name, "normal", 6)); + encoded = 0; + encoded_length = length; + } + if (name[6] == ':') + realname = name+7; + else { + assert(!strcmp(name+6, "-anon")); + realname = NULL; + } + hwloc__export_obj_userdata(state, encoded, realname, length, buffer, encoded_length); + + } else + hwloc__export_obj_userdata(state, 0, name, length, buffer, length); + + return 0; +} + +int +hwloc_export_obj_userdata_base64(void *reserved, + struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj __hwloc_attribute_unused, + const char *name, const void *buffer, size_t length) +{ + hwloc__xml_export_state_t state = reserved; + size_t encoded_length; + char *encoded_buffer; + int ret __hwloc_attribute_unused; + + if (!buffer) { + errno = EINVAL; + return -1; + } + + assert(!topology->userdata_not_decoded); + + if (name && hwloc__xml_export_check_buffer(name, strlen(name)) < 0) { + errno = EINVAL; + return -1; + } + + encoded_length = BASE64_ENCODED_LENGTH(length); + encoded_buffer = malloc(encoded_length+1); + if (!encoded_buffer) { + errno = ENOMEM; + return -1; + } + + ret = hwloc_encode_to_base64(buffer, length, encoded_buffer, encoded_length+1); + assert(ret == (int) encoded_length); + + hwloc__export_obj_userdata(state, 1, name, length, encoded_buffer, encoded_length); + + free(encoded_buffer); + return 0; +} + +void +hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology, + void (*import)(struct hwloc_topology *topology, struct hwloc_obj *obj, const char *name, const void *buffer, size_t length)) +{ + topology->userdata_import_cb = import; +} + +/*************************************** + ************ XML component ************ + ***************************************/ + +static void +hwloc_xml_backend_disable(struct hwloc_backend *backend) +{ + struct hwloc_xml_backend_data_s *data = backend->private_data; + data->backend_exit(data); + free(data->msgprefix); + free(data); +} + +static struct hwloc_backend * +hwloc_xml_component_instantiate(struct hwloc_disc_component *component, + const void *_data1, + const void *_data2, + const void *_data3) +{ + struct hwloc_xml_backend_data_s *data; + struct hwloc_backend *backend; + const char *env; + int force_nolibxml; + const char * xmlpath = (const char *) _data1; + const char * xmlbuffer = (const char *) _data2; + int xmlbuflen = (int)(uintptr_t) _data3; + const char *local_basename; + int err; + + assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the component's topology */ + + if (!xmlpath && !xmlbuffer) { + env = getenv("HWLOC_XMLFILE"); + if (env) { + /* 'xml' was given in HWLOC_COMPONENTS without a filename */ + xmlpath = env; + } else { + errno = EINVAL; + goto out; + } + } + + backend = hwloc_backend_alloc(component); + if (!backend) + goto out; + + data = malloc(sizeof(*data)); + if (!data) { + errno = ENOMEM; + goto out_with_backend; + } + + backend->private_data = data; + backend->discover = hwloc_look_xml; + backend->disable = hwloc_xml_backend_disable; + backend->is_thissystem = 0; + + if (xmlpath) { + local_basename = strrchr(xmlpath, '/'); + if (local_basename) + local_basename++; + else + local_basename = xmlpath; + } else { + local_basename = "xmlbuffer"; + } + data->msgprefix = strdup(local_basename); + + force_nolibxml = hwloc_nolibxml_import(); +retry: + if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml)) + err = hwloc_nolibxml_callbacks->backend_init(data, xmlpath, xmlbuffer, xmlbuflen); + else { + err = hwloc_libxml_callbacks->backend_init(data, xmlpath, xmlbuffer, xmlbuflen); + if (err < 0 && errno == ENOSYS) { + hwloc_libxml_callbacks = NULL; + goto retry; + } + } + if (err < 0) + goto out_with_data; + + return backend; + + out_with_data: + free(data->msgprefix); + free(data); + out_with_backend: + free(backend); + out: + return NULL; +} + +static struct hwloc_disc_component hwloc_xml_disc_component = { + HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + "xml", + ~0, + hwloc_xml_component_instantiate, + 30, + 1, + NULL +}; + +const struct hwloc_component hwloc_xml_component = { + HWLOC_COMPONENT_ABI, + NULL, NULL, + HWLOC_COMPONENT_TYPE_DISC, + 0, + &hwloc_xml_disc_component +}; diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c new file mode 100644 index 000000000..55678a084 --- /dev/null +++ b/src/3rdparty/hwloc/src/topology.c @@ -0,0 +1,4484 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2012 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> + +#define _ATFILE_SOURCE +#include <assert.h> +#include <sys/types.h> +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <limits.h> +#include <float.h> + +#include <hwloc.h> +#include <private/private.h> +#include <private/debug.h> +#include <private/misc.h> + +#ifdef HAVE_MACH_MACH_INIT_H +#include <mach/mach_init.h> +#endif +#ifdef HAVE_MACH_MACH_HOST_H +#include <mach/mach_host.h> +#endif + +#ifdef HAVE_SYS_PARAM_H +#include <sys/param.h> +#endif + +#ifdef HAVE_SYS_SYSCTL_H +#include <sys/sysctl.h> +#endif + +#ifdef HWLOC_WIN_SYS +#include <windows.h> +#endif + +unsigned hwloc_get_api_version(void) +{ + return HWLOC_API_VERSION; +} + +int hwloc_topology_abi_check(hwloc_topology_t topology) +{ + return topology->topology_abi != HWLOC_TOPOLOGY_ABI ? -1 : 0; +} + +int hwloc_hide_errors(void) +{ + static int hide = 0; + static int checked = 0; + if (!checked) { + const char *envvar = getenv("HWLOC_HIDE_ERRORS"); + if (envvar) + hide = atoi(envvar); + checked = 1; + } + return hide; +} + +void hwloc_report_os_error(const char *msg, int line) +{ + static int reported = 0; + + if (!reported && !hwloc_hide_errors()) { + fprintf(stderr, "****************************************************************************\n"); + fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION); + fprintf(stderr, "*\n"); + fprintf(stderr, "* %s\n", msg); + fprintf(stderr, "* Error occurred in topology.c line %d\n", line); + fprintf(stderr, "*\n"); + fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n"); + fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n"); + fprintf(stderr, "* Otherwise please report this error message to the hwloc user's mailing list,\n"); +#ifdef HWLOC_LINUX_SYS + fprintf(stderr, "* along with the files generated by the hwloc-gather-topology script.\n"); +#else + fprintf(stderr, "* along with any relevant topology information from your platform.\n"); +#endif + fprintf(stderr, "* \n"); + fprintf(stderr, "* hwloc will now ignore this invalid topology information and continue.\n"); + fprintf(stderr, "****************************************************************************\n"); + reported = 1; + } +} + +#if defined(HAVE_SYSCTLBYNAME) +int hwloc_get_sysctlbyname(const char *name, int64_t *ret) +{ + union { + int32_t i32; + int64_t i64; + } n; + size_t size = sizeof(n); + if (sysctlbyname(name, &n, &size, NULL, 0)) + return -1; + switch (size) { + case sizeof(n.i32): + *ret = n.i32; + break; + case sizeof(n.i64): + *ret = n.i64; + break; + default: + return -1; + } + return 0; +} +#endif + +#if defined(HAVE_SYSCTL) +int hwloc_get_sysctl(int name[], unsigned namelen, int *ret) +{ + int n; + size_t size = sizeof(n); + if (sysctl(name, namelen, &n, &size, NULL, 0)) + return -1; + if (size != sizeof(n)) + return -1; + *ret = n; + return 0; +} +#endif + +/* Return the OS-provided number of processors. Unlike other methods such as + reading sysfs on Linux, this method is not virtualizable; thus it's only + used as a fall-back method, allowing virtual backends (FSROOT, etc) to + have the desired effect. */ +#ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */ +int +hwloc_fallback_nbprocessors(struct hwloc_topology *topology __hwloc_attribute_unused) { + int n; +#if HAVE_DECL__SC_NPROCESSORS_ONLN + n = sysconf(_SC_NPROCESSORS_ONLN); +#elif HAVE_DECL__SC_NPROC_ONLN + n = sysconf(_SC_NPROC_ONLN); +#elif HAVE_DECL__SC_NPROCESSORS_CONF + n = sysconf(_SC_NPROCESSORS_CONF); +#elif HAVE_DECL__SC_NPROC_CONF + n = sysconf(_SC_NPROC_CONF); +#elif defined(HAVE_HOST_INFO) && HAVE_HOST_INFO + struct host_basic_info info; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + host_info(mach_host_self(), HOST_BASIC_INFO, (integer_t*) &info, &count); + n = info.avail_cpus; +#elif defined(HAVE_SYSCTLBYNAME) + int64_t nn; + if (hwloc_get_sysctlbyname("hw.ncpu", &nn)) + nn = -1; + n = nn; +#elif defined(HAVE_SYSCTL) && HAVE_DECL_CTL_HW && HAVE_DECL_HW_NCPU + static int name[2] = {CTL_HW, HW_NCPU}; + if (hwloc_get_sysctl(name, sizeof(name)/sizeof(*name), &n)) + n = -1; +#else +#ifdef __GNUC__ +#warning No known way to discover number of available processors on this system +#endif + n = -1; +#endif + return n; +} +#endif /* !HWLOC_WIN_SYS */ + +/* + * Use the given number of processors to set a PU level. + */ +void +hwloc_setup_pu_level(struct hwloc_topology *topology, + unsigned nb_pus) +{ + struct hwloc_obj *obj; + unsigned oscpu,cpu; + + hwloc_debug("%s", "\n\n * CPU cpusets *\n\n"); + for (cpu=0,oscpu=0; cpu<nb_pus; oscpu++) + { + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, oscpu); + obj->cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_only(obj->cpuset, oscpu); + + hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n", + cpu, oscpu, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + + cpu++; + } +} + +/* Traverse children of a parent in a safe way: reread the next pointer as + * appropriate to prevent crash on child deletion: */ +#define for_each_child_safe(child, parent, pchild) \ + for (pchild = &(parent)->first_child, child = *pchild; \ + child; \ + /* Check whether the current child was not dropped. */ \ + (*pchild == child ? pchild = &(child->next_sibling) : NULL), \ + /* Get pointer to next child. */ \ + child = *pchild) +#define for_each_memory_child_safe(child, parent, pchild) \ + for (pchild = &(parent)->memory_first_child, child = *pchild; \ + child; \ + /* Check whether the current child was not dropped. */ \ + (*pchild == child ? pchild = &(child->next_sibling) : NULL), \ + /* Get pointer to next child. */ \ + child = *pchild) +#define for_each_io_child_safe(child, parent, pchild) \ + for (pchild = &(parent)->io_first_child, child = *pchild; \ + child; \ + /* Check whether the current child was not dropped. */ \ + (*pchild == child ? pchild = &(child->next_sibling) : NULL), \ + /* Get pointer to next child. */ \ + child = *pchild) +#define for_each_misc_child_safe(child, parent, pchild) \ + for (pchild = &(parent)->misc_first_child, child = *pchild; \ + child; \ + /* Check whether the current child was not dropped. */ \ + (*pchild == child ? pchild = &(child->next_sibling) : NULL), \ + /* Get pointer to next child. */ \ + child = *pchild) + +#ifdef HWLOC_DEBUG +/* Just for debugging. */ +static void +hwloc_debug_print_object(int indent __hwloc_attribute_unused, hwloc_obj_t obj) +{ + char type[64], idx[12], attr[1024], *cpuset = NULL; + hwloc_debug("%*s", 2*indent, ""); + hwloc_obj_type_snprintf(type, sizeof(type), obj, 1); + if (obj->os_index != HWLOC_UNKNOWN_INDEX) + snprintf(idx, sizeof(idx), "#%u", obj->os_index); + else + *idx = '\0'; + hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 1); + hwloc_debug("%s%s%s%s%s", type, idx, *attr ? "(" : "", attr, *attr ? ")" : ""); + if (obj->name) + hwloc_debug(" name \"%s\"", obj->name); + if (obj->subtype) + hwloc_debug(" subtype \"%s\"", obj->subtype); + if (obj->cpuset) { + hwloc_bitmap_asprintf(&cpuset, obj->cpuset); + hwloc_debug(" cpuset %s", cpuset); + free(cpuset); + } + if (obj->complete_cpuset) { + hwloc_bitmap_asprintf(&cpuset, obj->complete_cpuset); + hwloc_debug(" complete %s", cpuset); + free(cpuset); + } + if (obj->nodeset) { + hwloc_bitmap_asprintf(&cpuset, obj->nodeset); + hwloc_debug(" nodeset %s", cpuset); + free(cpuset); + } + if (obj->complete_nodeset) { + hwloc_bitmap_asprintf(&cpuset, obj->complete_nodeset); + hwloc_debug(" completeN %s", cpuset); + free(cpuset); + } + if (obj->arity) + hwloc_debug(" arity %u", obj->arity); + hwloc_debug("%s", "\n"); +} + +static void +hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj) +{ + hwloc_obj_t child; + hwloc_debug_print_object(indent, obj); + for_each_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_memory_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_io_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_misc_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); +} +#else /* !HWLOC_DEBUG */ +#define hwloc_debug_print_object(indent, obj) do { /* nothing */ } while (0) +#define hwloc_debug_print_objects(indent, obj) do { /* nothing */ } while (0) +#endif /* !HWLOC_DEBUG */ + +void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count) +{ + unsigned i; + for(i=0; i<count; i++) { + free(infos[i].name); + free(infos[i].value); + } + free(infos); +} + +int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value) +{ + unsigned count = *countp; + struct hwloc_info_s *infos = *infosp; +#define OBJECT_INFO_ALLOC 8 + /* nothing allocated initially, (re-)allocate by multiple of 8 */ + unsigned alloccount = (count + 1 + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1); + if (count != alloccount) { + struct hwloc_info_s *tmpinfos = realloc(infos, alloccount*sizeof(*infos)); + if (!tmpinfos) + /* failed to allocate, ignore this info */ + goto out_with_array; + *infosp = infos = tmpinfos; + } + infos[count].name = strdup(name); + if (!infos[count].name) + goto out_with_array; + infos[count].value = strdup(value); + if (!infos[count].value) + goto out_with_name; + *countp = count+1; + return 0; + + out_with_name: + free(infos[count].name); + out_with_array: + /* don't bother reducing the array */ + return -1; +} + +int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, + const char *name, const char *value, + int replace) +{ + struct hwloc_info_s *infos = *infosp; + unsigned count = *countp; + unsigned i; + for(i=0; i<count; i++) { + if (!strcmp(infos[i].name, name)) { + if (replace) { + char *new = strdup(value); + if (!new) + return -1; + free(infos[i].value); + infos[i].value = new; + } + return 0; + } + } + return hwloc__add_info(infosp, countp, name, value); +} + +int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, + struct hwloc_info_s **src_infosp, unsigned *src_countp) +{ + unsigned dst_count = *dst_countp; + struct hwloc_info_s *dst_infos = *dst_infosp; + unsigned src_count = *src_countp; + struct hwloc_info_s *src_infos = *src_infosp; + unsigned i; +#define OBJECT_INFO_ALLOC 8 + /* nothing allocated initially, (re-)allocate by multiple of 8 */ + unsigned alloccount = (dst_count + src_count + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1); + if (dst_count != alloccount) { + struct hwloc_info_s *tmp_infos = realloc(dst_infos, alloccount*sizeof(*dst_infos)); + if (!tmp_infos) + /* Failed to realloc, ignore the appended infos */ + goto drop; + dst_infos = tmp_infos; + } + for(i=0; i<src_count; i++, dst_count++) { + dst_infos[dst_count].name = src_infos[i].name; + dst_infos[dst_count].value = src_infos[i].value; + } + *dst_infosp = dst_infos; + *dst_countp = dst_count; + free(src_infos); + *src_infosp = NULL; + *src_countp = 0; + return 0; + + drop: + /* drop src infos, don't modify dst_infos at all */ + for(i=0; i<src_count; i++) { + free(src_infos[i].name); + free(src_infos[i].value); + } + free(src_infos); + *src_infosp = NULL; + *src_countp = 0; + return -1; +} + +int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value) +{ + return hwloc__add_info(&obj->infos, &obj->infos_count, name, value); +} + +/* This function may be called with topology->tma set, it cannot free() or realloc() */ +static int hwloc__tma_dup_infos(struct hwloc_tma *tma, hwloc_obj_t new, hwloc_obj_t src) +{ + unsigned i, j; + new->infos = hwloc_tma_calloc(tma, src->infos_count * sizeof(*src->infos)); + if (!new->infos) + return -1; + for(i=0; i<src->infos_count; i++) { + new->infos[i].name = hwloc_tma_strdup(tma, src->infos[i].name); + new->infos[i].value = hwloc_tma_strdup(tma, src->infos[i].value); + if (!new->infos[i].name || !new->infos[i].value) + goto failed; + } + new->infos_count = src->infos_count; + return 0; + + failed: + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + for(j=0; j<=i; j++) { + free(new->infos[i].name); + free(new->infos[i].value); + } + free(new->infos); + new->infos = NULL; + return -1; +} + +static void +hwloc__free_object_contents(hwloc_obj_t obj) +{ + switch (obj->type) { + case HWLOC_OBJ_NUMANODE: + free(obj->attr->numanode.page_types); + break; + default: + break; + } + hwloc__free_infos(obj->infos, obj->infos_count); + free(obj->attr); + free(obj->children); + free(obj->subtype); + free(obj->name); + hwloc_bitmap_free(obj->cpuset); + hwloc_bitmap_free(obj->complete_cpuset); + hwloc_bitmap_free(obj->nodeset); + hwloc_bitmap_free(obj->complete_nodeset); +} + +/* Free an object and all its content. */ +void +hwloc_free_unlinked_object(hwloc_obj_t obj) +{ + hwloc__free_object_contents(obj); + free(obj); +} + +/* Replace old with contents of new object, and make new freeable by the caller. + * Only updates next_sibling/first_child pointers, + * so may only be used during early discovery. + */ +static void +hwloc_replace_linked_object(hwloc_obj_t old, hwloc_obj_t new) +{ + /* drop old fields */ + hwloc__free_object_contents(old); + /* copy old tree pointers to new */ + new->parent = old->parent; + new->next_sibling = old->next_sibling; + new->first_child = old->first_child; + new->memory_first_child = old->memory_first_child; + new->io_first_child = old->io_first_child; + new->misc_first_child = old->misc_first_child; + /* copy new contents to old now that tree pointers are OK */ + memcpy(old, new, sizeof(*old)); + /* clear new to that we may free it */ + memset(new, 0,sizeof(*new)); +} + +/* Remove an object and its children from its parent and free them. + * Only updates next_sibling/first_child pointers, + * so may only be used during early discovery or during destroy. + */ +static void +unlink_and_free_object_and_children(hwloc_obj_t *pobj) +{ + hwloc_obj_t obj = *pobj, child, *pchild; + + for_each_child_safe(child, obj, pchild) + unlink_and_free_object_and_children(pchild); + for_each_memory_child_safe(child, obj, pchild) + unlink_and_free_object_and_children(pchild); + for_each_io_child_safe(child, obj, pchild) + unlink_and_free_object_and_children(pchild); + for_each_misc_child_safe(child, obj, pchild) + unlink_and_free_object_and_children(pchild); + + *pobj = obj->next_sibling; + hwloc_free_unlinked_object(obj); +} + +/* Free an object and its children without unlinking from parent. + */ +void +hwloc_free_object_and_children(hwloc_obj_t obj) +{ + unlink_and_free_object_and_children(&obj); +} + +/* Free an object, its next siblings and their children without unlinking from parent. + */ +void +hwloc_free_object_siblings_and_children(hwloc_obj_t obj) +{ + while (obj) + unlink_and_free_object_and_children(&obj); +} + +/* insert the (non-empty) list of sibling starting at firstnew as new children of newparent, + * and return the address of the pointer to the next one + */ +static hwloc_obj_t * +insert_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent) +{ + hwloc_obj_t tmp; + assert(firstnew); + *firstp = tmp = firstnew; + tmp->parent = newparent; + while (tmp->next_sibling) { + tmp = tmp->next_sibling; + tmp->parent = newparent; + } + return &tmp->next_sibling; +} + +/* Take the new list starting at firstnew and prepend it to the old list starting at *firstp, + * and mark the new children as children of newparent. + * May be used during early or late discovery (updates prev_sibling and sibling_rank). + * List firstnew must be non-NULL. + */ +static void +prepend_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent) +{ + hwloc_obj_t *tmpp, tmp, last; + unsigned length; + + /* update parent pointers and find the length and end of the new list */ + for(length = 0, tmpp = &firstnew, last = NULL ; *tmpp; length++, last = *tmpp, tmpp = &((*tmpp)->next_sibling)) + (*tmpp)->parent = newparent; + + /* update sibling_rank */ + for(tmp = *firstp; tmp; tmp = tmp->next_sibling) + tmp->sibling_rank += length; /* if it wasn't initialized yet, it'll be overwritten later */ + + /* place the existing list at the end of the new one */ + *tmpp = *firstp; + if (*firstp) + (*firstp)->prev_sibling = last; + + /* use the beginning of the new list now */ + *firstp = firstnew; +} + +/* Take the new list starting at firstnew and append it to the old list starting at *firstp, + * and mark the new children as children of newparent. + * May be used during early or late discovery (updates prev_sibling and sibling_rank). + */ +static void +append_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent) +{ + hwloc_obj_t *tmpp, tmp, last; + unsigned length; + + /* find the length and end of the existing list */ + for(length = 0, tmpp = firstp, last = NULL ; *tmpp; length++, last = *tmpp, tmpp = &((*tmpp)->next_sibling)); + + /* update parent pointers and sibling_rank */ + for(tmp = firstnew; tmp; tmp = tmp->next_sibling) { + tmp->parent = newparent; + tmp->sibling_rank += length; /* if it wasn't set yet, it'll be overwritten later */ + } + + /* place new list at the end of the old one */ + *tmpp = firstnew; + if (firstnew) + firstnew->prev_sibling = last; +} + +/* Remove an object from its parent and free it. + * Only updates next_sibling/first_child pointers, + * so may only be used during early discovery. + * + * Children are inserted in the parent. + * If children should be inserted somewhere else (e.g. when merging with a child), + * the caller should move them before calling this function. + */ +static void +unlink_and_free_single_object(hwloc_obj_t *pparent) +{ + hwloc_obj_t old = *pparent; + hwloc_obj_t *lastp; + + if (old->type == HWLOC_OBJ_MISC) { + /* Misc object */ + + /* no normal children */ + assert(!old->first_child); + /* no memory children */ + assert(!old->memory_first_child); + /* no I/O children */ + assert(!old->io_first_child); + + if (old->misc_first_child) + /* insert old misc object children as new siblings below parent instead of old */ + lastp = insert_siblings_list(pparent, old->misc_first_child, old->parent); + else + lastp = pparent; + /* append old siblings back */ + *lastp = old->next_sibling; + + } else if (hwloc__obj_type_is_io(old->type)) { + /* I/O object */ + + /* no normal children */ + assert(!old->first_child); + /* no memory children */ + assert(!old->memory_first_child); + + if (old->io_first_child) + /* insert old I/O object children as new siblings below parent instead of old */ + lastp = insert_siblings_list(pparent, old->io_first_child, old->parent); + else + lastp = pparent; + /* append old siblings back */ + *lastp = old->next_sibling; + + /* append old Misc children to parent */ + if (old->misc_first_child) + append_siblings_list(&old->parent->misc_first_child, old->misc_first_child, old->parent); + + } else if (hwloc__obj_type_is_memory(old->type)) { + /* memory object */ + + /* no normal children */ + assert(!old->first_child); + /* no I/O children */ + assert(!old->io_first_child); + + if (old->memory_first_child) + /* insert old memory object children as new siblings below parent instead of old */ + lastp = insert_siblings_list(pparent, old->memory_first_child, old->parent); + else + lastp = pparent; + /* append old siblings back */ + *lastp = old->next_sibling; + + /* append old Misc children to parent */ + if (old->misc_first_child) + append_siblings_list(&old->parent->misc_first_child, old->misc_first_child, old->parent); + + } else { + /* Normal object */ + + if (old->first_child) + /* insert old object children as new siblings below parent instead of old */ + lastp = insert_siblings_list(pparent, old->first_child, old->parent); + else + lastp = pparent; + /* append old siblings back */ + *lastp = old->next_sibling; + + /* append old memory, I/O and Misc children to parent + * old->parent cannot be NULL (removing root), misc children should have been moved by the caller earlier. + */ + if (old->memory_first_child) + append_siblings_list(&old->parent->memory_first_child, old->memory_first_child, old->parent); + if (old->io_first_child) + append_siblings_list(&old->parent->io_first_child, old->io_first_child, old->parent); + if (old->misc_first_child) + append_siblings_list(&old->parent->misc_first_child, old->misc_first_child, old->parent); + } + + hwloc_free_unlinked_object(old); +} + +/* This function may use a tma, it cannot free() or realloc() */ +static int +hwloc__duplicate_object(struct hwloc_topology *newtopology, + struct hwloc_obj *newparent, + struct hwloc_obj *newobj, + struct hwloc_obj *src) +{ + struct hwloc_tma *tma = newtopology->tma; + hwloc_obj_t *level; + unsigned level_width; + size_t len; + unsigned i; + hwloc_obj_t child, prev; + int err = 0; + + /* either we're duplicating to an already allocated new root, which has no newparent, + * or we're duplicating to a non-yet allocated new non-root, which will have a newparent. + */ + assert(!newparent == !!newobj); + + if (!newobj) { + newobj = hwloc_alloc_setup_object(newtopology, src->type, src->os_index); + if (!newobj) + return -1; + } + + /* duplicate all non-object-pointer fields */ + newobj->logical_index = src->logical_index; + newobj->depth = src->depth; + newobj->sibling_rank = src->sibling_rank; + + newobj->type = src->type; + newobj->os_index = src->os_index; + newobj->gp_index = src->gp_index; + newobj->symmetric_subtree = src->symmetric_subtree; + + if (src->name) + newobj->name = hwloc_tma_strdup(tma, src->name); + if (src->subtype) + newobj->subtype = hwloc_tma_strdup(tma, src->subtype); + newobj->userdata = src->userdata; + + newobj->total_memory = src->total_memory; + + memcpy(newobj->attr, src->attr, sizeof(*newobj->attr)); + + if (src->type == HWLOC_OBJ_NUMANODE && src->attr->numanode.page_types_len) { + len = src->attr->numanode.page_types_len * sizeof(struct hwloc_memory_page_type_s); + newobj->attr->numanode.page_types = hwloc_tma_malloc(tma, len); + memcpy(newobj->attr->numanode.page_types, src->attr->numanode.page_types, len); + } + + newobj->cpuset = hwloc_bitmap_tma_dup(tma, src->cpuset); + newobj->complete_cpuset = hwloc_bitmap_tma_dup(tma, src->complete_cpuset); + newobj->nodeset = hwloc_bitmap_tma_dup(tma, src->nodeset); + newobj->complete_nodeset = hwloc_bitmap_tma_dup(tma, src->complete_nodeset); + + hwloc__tma_dup_infos(tma, newobj, src); + + /* find our level */ + if (src->depth < 0) { + i = HWLOC_SLEVEL_FROM_DEPTH(src->depth); + level = newtopology->slevels[i].objs; + level_width = newtopology->slevels[i].nbobjs; + /* deal with first/last pointers of special levels, even if not really needed */ + if (!newobj->logical_index) + newtopology->slevels[i].first = newobj; + if (newobj->logical_index == newtopology->slevels[i].nbobjs - 1) + newtopology->slevels[i].last = newobj; + } else { + level = newtopology->levels[src->depth]; + level_width = newtopology->level_nbobjects[src->depth]; + } + /* place us for real */ + assert(newobj->logical_index < level_width); + level[newobj->logical_index] = newobj; + /* link to already-inserted cousins + * (hwloc_pci_belowroot_apply_locality() can cause out-of-order logical indexes) + */ + if (newobj->logical_index > 0 && level[newobj->logical_index-1]) { + newobj->prev_cousin = level[newobj->logical_index-1]; + level[newobj->logical_index-1]->next_cousin = newobj; + } + if (newobj->logical_index < level_width-1 && level[newobj->logical_index+1]) { + newobj->next_cousin = level[newobj->logical_index+1]; + level[newobj->logical_index+1]->prev_cousin = newobj; + } + + /* prepare for children */ + if (src->arity) { + newobj->children = hwloc_tma_malloc(tma, src->arity * sizeof(*newobj->children)); + if (!newobj->children) + return -1; + } + newobj->arity = src->arity; + newobj->memory_arity = src->memory_arity; + newobj->io_arity = src->io_arity; + newobj->misc_arity = src->misc_arity; + + /* actually insert children now */ + for_each_child(child, src) { + err = hwloc__duplicate_object(newtopology, newobj, NULL, child); + if (err < 0) + goto out_with_children; + } + for_each_memory_child(child, src) { + err = hwloc__duplicate_object(newtopology, newobj, NULL, child); + if (err < 0) + return err; + } + for_each_io_child(child, src) { + err = hwloc__duplicate_object(newtopology, newobj, NULL, child); + if (err < 0) + goto out_with_children; + } + for_each_misc_child(child, src) { + err = hwloc__duplicate_object(newtopology, newobj, NULL, child); + if (err < 0) + goto out_with_children; + } + + out_with_children: + + /* link children if all of them where inserted */ + if (!err) { + /* only next_sibling is set by insert_by_parent(). + * sibling_rank was set above. + */ + if (newobj->arity) { + newobj->children[0]->prev_sibling = NULL; + for(i=1; i<newobj->arity; i++) + newobj->children[i]->prev_sibling = newobj->children[i-1]; + newobj->last_child = newobj->children[newobj->arity-1]; + } + if (newobj->memory_arity) { + child = newobj->memory_first_child; + prev = NULL; + while (child) { + child->prev_sibling = prev; + prev = child; + child = child->next_sibling; + } + } + if (newobj->io_arity) { + child = newobj->io_first_child; + prev = NULL; + while (child) { + child->prev_sibling = prev; + prev = child; + child = child->next_sibling; + } + } + if (newobj->misc_arity) { + child = newobj->misc_first_child; + prev = NULL; + while (child) { + child->prev_sibling = prev; + prev = child; + child = child->next_sibling; + } + } + } + + /* some children insertion may have failed, but some children may have been inserted below us already. + * keep inserting ourself and let the caller clean the entire tree if we return an error. + */ + + if (newparent) { + /* no need to check the children insert order here, the source topology + * is supposed to be OK already, and we have debug asserts. + */ + hwloc_insert_object_by_parent(newtopology, newparent, newobj); + + /* place us inside our parent children array */ + if (hwloc__obj_type_is_normal(newobj->type)) + newparent->children[newobj->sibling_rank] = newobj; + } + + return err; +} + +static int +hwloc__topology_init (struct hwloc_topology **topologyp, unsigned nblevels, struct hwloc_tma *tma); + +/* This function may use a tma, it cannot free() or realloc() */ +int +hwloc__topology_dup(hwloc_topology_t *newp, + hwloc_topology_t old, + struct hwloc_tma *tma) +{ + hwloc_topology_t new; + hwloc_obj_t newroot; + hwloc_obj_t oldroot = hwloc_get_root_obj(old); + unsigned i; + int err; + + if (!old->is_loaded) { + errno = EINVAL; + return -1; + } + + err = hwloc__topology_init(&new, old->nb_levels_allocated, tma); + if (err < 0) + goto out; + + new->flags = old->flags; + memcpy(new->type_filter, old->type_filter, sizeof(old->type_filter)); + new->is_thissystem = old->is_thissystem; + new->is_loaded = 1; + new->pid = old->pid; + new->next_gp_index = old->next_gp_index; + + memcpy(&new->binding_hooks, &old->binding_hooks, sizeof(old->binding_hooks)); + + memcpy(new->support.discovery, old->support.discovery, sizeof(*old->support.discovery)); + memcpy(new->support.cpubind, old->support.cpubind, sizeof(*old->support.cpubind)); + memcpy(new->support.membind, old->support.membind, sizeof(*old->support.membind)); + + new->allowed_cpuset = hwloc_bitmap_tma_dup(tma, old->allowed_cpuset); + new->allowed_nodeset = hwloc_bitmap_tma_dup(tma, old->allowed_nodeset); + + new->userdata_export_cb = old->userdata_export_cb; + new->userdata_import_cb = old->userdata_import_cb; + new->userdata_not_decoded = old->userdata_not_decoded; + + assert(!old->machine_memory.local_memory); + assert(!old->machine_memory.page_types_len); + assert(!old->machine_memory.page_types); + + for(i = HWLOC_OBJ_TYPE_MIN; i < HWLOC_OBJ_TYPE_MAX; i++) + new->type_depth[i] = old->type_depth[i]; + + /* duplicate levels and we'll place objects there when duplicating objects */ + new->nb_levels = old->nb_levels; + assert(new->nb_levels_allocated >= new->nb_levels); + for(i=1 /* root level already allocated */ ; i<new->nb_levels; i++) { + new->level_nbobjects[i] = old->level_nbobjects[i]; + new->levels[i] = hwloc_tma_calloc(tma, new->level_nbobjects[i] * sizeof(*new->levels[i])); + } + for(i=0; i<HWLOC_NR_SLEVELS; i++) { + new->slevels[i].nbobjs = old->slevels[i].nbobjs; + if (new->slevels[i].nbobjs) + new->slevels[i].objs = hwloc_tma_calloc(tma, new->slevels[i].nbobjs * sizeof(*new->slevels[i].objs)); + } + + /* recursively duplicate object children */ + newroot = hwloc_get_root_obj(new); + err = hwloc__duplicate_object(new, NULL, newroot, oldroot); + if (err < 0) + goto out_with_topology; + + err = hwloc_internal_distances_dup(new, old); + if (err < 0) + goto out_with_topology; + + /* we connected everything during duplication */ + new->modified = 0; + + /* no need to duplicate backends, topology is already loaded */ + new->backends = NULL; + new->get_pci_busid_cpuset_backend = NULL; + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(new); + + *newp = new; + return 0; + + out_with_topology: + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_topology_destroy(new); + out: + return -1; +} + +int +hwloc_topology_dup(hwloc_topology_t *newp, + hwloc_topology_t old) +{ + return hwloc__topology_dup(newp, old, NULL); +} + +/* WARNING: The indexes of this array MUST match the ordering that of + the obj_order_type[] array, below. Specifically, the values must + be laid out such that: + + obj_order_type[obj_type_order[N]] = N + + for all HWLOC_OBJ_* values of N. Put differently: + + obj_type_order[A] = B + + where the A values are in order of the hwloc_obj_type_t enum, and + the B values are the corresponding indexes of obj_order_type. + + We can't use C99 syntax to initialize this in a little safer manner + -- bummer. :-( + + Correctness is asserted in hwloc_topology_init() when debug is enabled. + */ +/***** Make sure you update obj_type_priority[] below as well. *****/ +static const unsigned obj_type_order[] = { + /* first entry is HWLOC_OBJ_MACHINE */ 0, + /* next entry is HWLOC_OBJ_PACKAGE */ 3, + /* next entry is HWLOC_OBJ_CORE */ 12, + /* next entry is HWLOC_OBJ_PU */ 16, + /* next entry is HWLOC_OBJ_L1CACHE */ 10, + /* next entry is HWLOC_OBJ_L2CACHE */ 8, + /* next entry is HWLOC_OBJ_L3CACHE */ 6, + /* next entry is HWLOC_OBJ_L4CACHE */ 5, + /* next entry is HWLOC_OBJ_L5CACHE */ 4, + /* next entry is HWLOC_OBJ_L1ICACHE */ 11, + /* next entry is HWLOC_OBJ_L2ICACHE */ 9, + /* next entry is HWLOC_OBJ_L3ICACHE */ 7, + /* next entry is HWLOC_OBJ_GROUP */ 1, + /* next entry is HWLOC_OBJ_NUMANODE */ 2, + /* next entry is HWLOC_OBJ_BRIDGE */ 13, + /* next entry is HWLOC_OBJ_PCI_DEVICE */ 14, + /* next entry is HWLOC_OBJ_OS_DEVICE */ 15, + /* next entry is HWLOC_OBJ_MISC */ 17 +}; + +#ifndef NDEBUG /* only used in debug check assert if !NDEBUG */ +static const hwloc_obj_type_t obj_order_type[] = { + HWLOC_OBJ_MACHINE, + HWLOC_OBJ_GROUP, + HWLOC_OBJ_NUMANODE, + HWLOC_OBJ_PACKAGE, + HWLOC_OBJ_L5CACHE, + HWLOC_OBJ_L4CACHE, + HWLOC_OBJ_L3CACHE, + HWLOC_OBJ_L3ICACHE, + HWLOC_OBJ_L2CACHE, + HWLOC_OBJ_L2ICACHE, + HWLOC_OBJ_L1CACHE, + HWLOC_OBJ_L1ICACHE, + HWLOC_OBJ_CORE, + HWLOC_OBJ_BRIDGE, + HWLOC_OBJ_PCI_DEVICE, + HWLOC_OBJ_OS_DEVICE, + HWLOC_OBJ_PU, + HWLOC_OBJ_MISC /* Misc is always a leaf */ +}; +#endif +/***** Make sure you update obj_type_priority[] below as well. *****/ + +/* priority to be used when merging identical parent/children object + * (in merge_useless_child), keep the highest priority one. + * + * Always keep Machine/NUMANode/PU/PCIDev/OSDev + * then Core + * then Package + * then Cache, + * then Instruction Caches + * then always drop Group/Misc/Bridge. + * + * Some type won't actually ever be involved in such merging. + */ +/***** Make sure you update this array when changing the list of types. *****/ +static const int obj_type_priority[] = { + /* first entry is HWLOC_OBJ_MACHINE */ 90, + /* next entry is HWLOC_OBJ_PACKAGE */ 40, + /* next entry is HWLOC_OBJ_CORE */ 60, + /* next entry is HWLOC_OBJ_PU */ 100, + /* next entry is HWLOC_OBJ_L1CACHE */ 20, + /* next entry is HWLOC_OBJ_L2CACHE */ 20, + /* next entry is HWLOC_OBJ_L3CACHE */ 20, + /* next entry is HWLOC_OBJ_L4CACHE */ 20, + /* next entry is HWLOC_OBJ_L5CACHE */ 20, + /* next entry is HWLOC_OBJ_L1ICACHE */ 19, + /* next entry is HWLOC_OBJ_L2ICACHE */ 19, + /* next entry is HWLOC_OBJ_L3ICACHE */ 19, + /* next entry is HWLOC_OBJ_GROUP */ 0, + /* next entry is HWLOC_OBJ_NUMANODE */ 100, + /* next entry is HWLOC_OBJ_BRIDGE */ 0, + /* next entry is HWLOC_OBJ_PCI_DEVICE */ 100, + /* next entry is HWLOC_OBJ_OS_DEVICE */ 100, + /* next entry is HWLOC_OBJ_MISC */ 0 +}; + +int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) +{ + unsigned order1 = obj_type_order[type1]; + unsigned order2 = obj_type_order[type2]; + + /* only normal objects are comparable. others are only comparable with machine */ + if (!hwloc__obj_type_is_normal(type1) + && hwloc__obj_type_is_normal(type2) && type2 != HWLOC_OBJ_MACHINE) + return HWLOC_TYPE_UNORDERED; + if (!hwloc__obj_type_is_normal(type2) + && hwloc__obj_type_is_normal(type1) && type1 != HWLOC_OBJ_MACHINE) + return HWLOC_TYPE_UNORDERED; + + return order1 - order2; +} + +enum hwloc_obj_cmp_e { + HWLOC_OBJ_EQUAL = HWLOC_BITMAP_EQUAL, /**< \brief Equal */ + HWLOC_OBJ_INCLUDED = HWLOC_BITMAP_INCLUDED, /**< \brief Strictly included into */ + HWLOC_OBJ_CONTAINS = HWLOC_BITMAP_CONTAINS, /**< \brief Strictly contains */ + HWLOC_OBJ_INTERSECTS = HWLOC_BITMAP_INTERSECTS, /**< \brief Intersects, but no inclusion! */ + HWLOC_OBJ_DIFFERENT = HWLOC_BITMAP_DIFFERENT /**< \brief No intersection */ +}; + +static enum hwloc_obj_cmp_e +hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + hwloc_obj_type_t type1 = obj1->type; + hwloc_obj_type_t type2 = obj2->type; + int compare; + + compare = hwloc_compare_types(type1, type2); + if (compare == HWLOC_TYPE_UNORDERED) + return HWLOC_OBJ_DIFFERENT; /* we cannot do better */ + if (compare > 0) + return HWLOC_OBJ_INCLUDED; + if (compare < 0) + return HWLOC_OBJ_CONTAINS; + + if (obj1->type == HWLOC_OBJ_GROUP + && (obj1->attr->group.kind != obj2->attr->group.kind + || obj1->attr->group.subkind != obj2->attr->group.subkind)) + return HWLOC_OBJ_DIFFERENT; /* we cannot do better */ + + return HWLOC_OBJ_EQUAL; +} + +/* + * How to compare objects based on cpusets. + */ + +static int +hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + hwloc_bitmap_t set1, set2; + int res = HWLOC_OBJ_DIFFERENT; + + assert(!hwloc__obj_type_is_special(obj1->type)); + assert(!hwloc__obj_type_is_special(obj2->type)); + + /* compare cpusets first */ + if (obj1->complete_cpuset && obj2->complete_cpuset) { + set1 = obj1->complete_cpuset; + set2 = obj2->complete_cpuset; + } else { + set1 = obj1->cpuset; + set2 = obj2->cpuset; + } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + res = hwloc_bitmap_compare_inclusion(set1, set2); + if (res == HWLOC_OBJ_INTERSECTS) + return HWLOC_OBJ_INTERSECTS; + } + + /* then compare nodesets, and combine the results */ + if (obj1->complete_nodeset && obj2->complete_nodeset) { + set1 = obj1->complete_nodeset; + set2 = obj2->complete_nodeset; + } else { + set1 = obj1->nodeset; + set2 = obj2->nodeset; + } + if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) { + int noderes = hwloc_bitmap_compare_inclusion(set1, set2); + /* deal with conflicting cpusets/nodesets inclusions */ + if (noderes == HWLOC_OBJ_INCLUDED) { + if (res == HWLOC_OBJ_CONTAINS) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_INCLUDED; + + } else if (noderes == HWLOC_OBJ_CONTAINS) { + if (res == HWLOC_OBJ_INCLUDED) + /* contradicting order for cpusets and nodesets */ + return HWLOC_OBJ_INTERSECTS; + res = HWLOC_OBJ_CONTAINS; + + } else if (noderes == HWLOC_OBJ_INTERSECTS) { + return HWLOC_OBJ_INTERSECTS; + + } else { + /* nodesets are different, keep the cpuset order */ + + } + } + + return res; +} + +/* Compare object cpusets based on complete_cpuset if defined (always correctly ordered), + * or fallback to the main cpusets (only correctly ordered during early insert before disallowed bits are cleared). + * + * This is the sane way to compare object among a horizontal level. + */ +int +hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + if (obj1->complete_cpuset && obj2->complete_cpuset) + return hwloc_bitmap_compare_first(obj1->complete_cpuset, obj2->complete_cpuset); + else if (obj1->cpuset && obj2->cpuset) + return hwloc_bitmap_compare_first(obj1->cpuset, obj2->cpuset); + else if (obj1->complete_nodeset && obj2->complete_nodeset) + return hwloc_bitmap_compare_first(obj1->complete_nodeset, obj2->complete_nodeset); + else if (obj1->nodeset && obj2->nodeset) + return hwloc_bitmap_compare_first(obj1->nodeset, obj2->nodeset); + return 0; +} + +/* format the obj info to print in error messages */ +static void +hwloc__report_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) +{ + char typestr[64]; + char *cpusetstr; + char *nodesetstr = NULL; + hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); + hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); + if (obj->nodeset) /* may be missing during insert */ + hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); + if (obj->os_index != HWLOC_UNKNOWN_INDEX) + snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", + typestr, obj->os_index, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + else + snprintf(buf, buflen, "%s (cpuset %s%s%s)", + typestr, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + free(cpusetstr); + free(nodesetstr); +} + +/* + * How to insert objects into the topology. + * + * Note: during detection, only the first_child and next_sibling pointers are + * kept up to date. Others are computed only once topology detection is + * complete. + */ + +/* merge new object attributes in old. + * use old if defined, otherwise use new. + */ +static void +merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) +{ + if (old->os_index == HWLOC_UNKNOWN_INDEX) + old->os_index = new->os_index; + + if (new->infos_count) { + /* FIXME: dedup */ + hwloc__move_infos(&old->infos, &old->infos_count, + &new->infos, &new->infos_count); + } + + if (new->name && !old->name) { + old->name = new->name; + new->name = NULL; + } + if (new->subtype && !old->subtype) { + old->subtype = new->subtype; + new->subtype = NULL; + } + + /* Ignore userdata. It will be NULL before load(). + * It may be non-NULL if alloc+insert_group() after load(). + */ + + switch(new->type) { + case HWLOC_OBJ_NUMANODE: + if (new->attr->numanode.local_memory && !old->attr->numanode.local_memory) { + /* no memory in old, use new memory */ + old->attr->numanode.local_memory = new->attr->numanode.local_memory; + free(old->attr->numanode.page_types); + old->attr->numanode.page_types_len = new->attr->numanode.page_types_len; + old->attr->numanode.page_types = new->attr->numanode.page_types; + new->attr->numanode.page_types = NULL; + new->attr->numanode.page_types_len = 0; + } + /* old->attr->numanode.total_memory will be updated by propagate_total_memory() */ + break; + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + if (!old->attr->cache.size) + old->attr->cache.size = new->attr->cache.size; + if (!old->attr->cache.linesize) + old->attr->cache.size = new->attr->cache.linesize; + if (!old->attr->cache.associativity) + old->attr->cache.size = new->attr->cache.linesize; + break; + default: + break; + } +} + +/* returns the result of merge, or NULL if not merged */ +static __hwloc_inline hwloc_obj_t +hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) +{ + if (new->type == HWLOC_OBJ_GROUP && old->type == HWLOC_OBJ_GROUP) { + /* which group do we keep? */ + if (new->attr->group.dont_merge) { + if (old->attr->group.dont_merge) + /* nobody wants to be merged */ + return NULL; + + /* keep the new one, it doesn't want to be merged */ + hwloc_replace_linked_object(old, new); + return new; + + } else { + if (old->attr->group.dont_merge) + /* keep the old one, it doesn't want to be merged */ + return old; + + /* compare subkinds to decice who to keep */ + if (new->attr->group.kind < old->attr->group.kind) + hwloc_replace_linked_object(old, new); + return old; + } + } + + if (new->type == HWLOC_OBJ_GROUP && !new->attr->group.dont_merge) { + + if (old->type == HWLOC_OBJ_PU && new->attr->group.kind == HWLOC_GROUP_KIND_MEMORY) + /* Never merge Memory groups with PU, we don't want to attach Memory under PU */ + return NULL; + + /* Remove the Group now. The normal ignore code path wouldn't tell us whether the Group was removed or not, + * while some callers need to know (at least hwloc_topology_insert_group()). + */ + return old; + + } else if (old->type == HWLOC_OBJ_GROUP && !old->attr->group.dont_merge) { + + if (new->type == HWLOC_OBJ_PU && old->attr->group.kind == HWLOC_GROUP_KIND_MEMORY) + /* Never merge Memory groups with PU, we don't want to attach Memory under PU */ + return NULL; + + /* Replace the Group with the new object contents + * and let the caller free the new object + */ + hwloc_replace_linked_object(old, new); + return old; + + } else { + /* cannot merge */ + return NULL; + } +} + +/* Try to insert OBJ in CUR, recurse if needed. + * Returns the object if it was inserted, + * the remaining object it was merged, + * NULL if failed to insert. + */ +static struct hwloc_obj * +hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj, + hwloc_report_error_t report_error) +{ + hwloc_obj_t child, next_child = NULL; + /* These will always point to the pointer to their next last child. */ + hwloc_obj_t *cur_children = &cur->first_child; + hwloc_obj_t *obj_children = &obj->first_child; + /* Pointer where OBJ should be put */ + hwloc_obj_t *putp = NULL; /* OBJ position isn't found yet */ + + assert(!hwloc__obj_type_is_memory(obj->type)); + + /* Iteration with prefetching to be completely safe against CHILD removal. + * The list is already sorted by cpuset, and there's no intersection between siblings. + */ + for (child = cur->first_child, child ? next_child = child->next_sibling : NULL; + child; + child = next_child, child ? next_child = child->next_sibling : NULL) { + + int res = hwloc_obj_cmp_sets(obj, child); + int setres = res; + + if (res == HWLOC_OBJ_EQUAL) { + hwloc_obj_t merged = hwloc__insert_try_merge_group(child, obj); + if (merged) + return merged; + /* otherwise compare actual types to decide of the inclusion */ + res = hwloc_type_cmp(obj, child); + } + + switch (res) { + case HWLOC_OBJ_EQUAL: + /* Two objects with same type. + * Groups are handled above. + */ + merge_insert_equal(obj, child); + /* Already present, no need to insert. */ + return child; + + case HWLOC_OBJ_INCLUDED: + /* OBJ is strictly contained is some child of CUR, go deeper. */ + return hwloc___insert_object_by_cpuset(topology, child, obj, report_error); + + case HWLOC_OBJ_INTERSECTS: + if (report_error) { + char childstr[512]; + char objstr[512]; + char msg[1100]; + hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); + hwloc__report_error_format_obj(childstr, sizeof(childstr), child); + snprintf(msg, sizeof(msg), "%s intersects with %s without inclusion!", objstr, childstr); + report_error(msg, __LINE__); + } + goto putback; + + case HWLOC_OBJ_DIFFERENT: + /* OBJ should be a child of CUR before CHILD, mark its position if not found yet. */ + if (!putp && hwloc__object_cpusets_compare_first(obj, child) < 0) + /* Don't insert yet, there could be intersect errors later */ + putp = cur_children; + /* Advance cur_children. */ + cur_children = &child->next_sibling; + break; + + case HWLOC_OBJ_CONTAINS: + /* OBJ contains CHILD, remove CHILD from CUR */ + *cur_children = child->next_sibling; + child->next_sibling = NULL; + /* Put CHILD in OBJ */ + *obj_children = child; + obj_children = &child->next_sibling; + child->parent = obj; + if (setres == HWLOC_OBJ_EQUAL) { + obj->memory_first_child = child->memory_first_child; + child->memory_first_child = NULL; + } + break; + } + } + /* cur/obj_children points to last CUR/OBJ child next_sibling pointer, which must be NULL. */ + assert(!*obj_children); + assert(!*cur_children); + + /* Put OBJ where it belongs, or in last in CUR's children. */ + if (!putp) + putp = cur_children; + obj->next_sibling = *putp; + *putp = obj; + obj->parent = cur; + + topology->modified = 1; + return obj; + + putback: + /* Put-back OBJ children in CUR and return an error. */ + if (putp) + cur_children = putp; /* No need to try to insert before where OBJ was supposed to go */ + else + cur_children = &cur->first_child; /* Start from the beginning */ + /* We can insert in order, but there can be holes in the middle. */ + while ((child = obj->first_child) != NULL) { + /* Remove from OBJ */ + obj->first_child = child->next_sibling; + obj->parent = cur; + /* Find child position in CUR, and insert. */ + while (*cur_children && hwloc__object_cpusets_compare_first(*cur_children, child) < 0) + cur_children = &(*cur_children)->next_sibling; + child->next_sibling = *cur_children; + *cur_children = child; + } + return NULL; +} + +/* this differs from hwloc_get_obj_covering_cpuset() by: + * - not looking at the parent cpuset first, which means we can insert + * below root even if root PU bits are not set yet (PU are inserted later). + * - returning the first child that exactly matches instead of walking down in case + * of identical children. + */ +static struct hwloc_obj * +hwloc__find_obj_covering_memory_cpuset(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_bitmap_t cpuset) +{ + hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, cpuset, parent); + if (!child) + return parent; + if (child && hwloc_bitmap_isequal(child->cpuset, cpuset)) + return child; + return hwloc__find_obj_covering_memory_cpuset(topology, child, cpuset); +} + +static struct hwloc_obj * +hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t obj, + hwloc_report_error_t report_error) +{ + hwloc_obj_t parent, group, result; + + if (hwloc_bitmap_iszero(obj->cpuset)) { + /* CPU-less go in dedicated group below root */ + parent = topology->levels[0][0]; + + } else { + /* find the highest obj covering the cpuset */ + parent = hwloc__find_obj_covering_memory_cpuset(topology, topology->levels[0][0], obj->cpuset); + if (!parent) { + /* fallback to root */ + parent = hwloc_get_root_obj(topology); + } + + if (parent->type == HWLOC_OBJ_PU) { + /* Never attach to PU, try parent */ + parent = parent->parent; + assert(parent); + } + + /* TODO: if root->cpuset was updated earlier, we would be sure whether the group will remain identical to root */ + if (parent != topology->levels[0][0] && hwloc_bitmap_isequal(parent->cpuset, obj->cpuset)) + /* that parent is fine */ + return parent; + } + + if (!hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) + /* even if parent isn't perfect, we don't want an intermediate group */ + return parent; + + /* need to insert an intermediate group for attaching the NUMA node */ + group = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + if (!group) + /* failed to create the group, fallback to larger parent */ + return parent; + + group->attr->group.kind = HWLOC_GROUP_KIND_MEMORY; + group->cpuset = hwloc_bitmap_dup(obj->cpuset); + group->complete_cpuset = hwloc_bitmap_dup(obj->complete_cpuset); + /* we could duplicate nodesets too but hwloc__insert_object_by_cpuset() + * doesn't actually need it. and it could prevent future calls from reusing + * that groups for other NUMA nodes. + */ + if (!group->cpuset != !obj->cpuset + || !group->complete_cpuset != !obj->complete_cpuset) { + /* failed to create the group, fallback to larger parent */ + hwloc_free_unlinked_object(group); + return parent; + } + + result = hwloc__insert_object_by_cpuset(topology, parent, group, report_error); + if (!result) { + /* failed to insert, fallback to larger parent */ + return parent; + } + + assert(result == group); + return group; +} + +/*attach the given memory object below the given normal parent. */ +struct hwloc_obj * +hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, + hwloc_obj_t obj, + hwloc_report_error_t report_error __hwloc_attribute_unused) +{ + hwloc_obj_t *cur_children; + + assert(parent); + assert(hwloc__obj_type_is_normal(parent->type)); + +#if 0 + /* TODO: enable this instead of hack in fixup_sets once NUMA nodes are inserted late */ + /* copy the parent cpuset in case it's larger than expected. + * we could also keep the cpuset smaller than the parent and say that a normal-parent + * can have multiple memory children with smaller cpusets. + * However, the user decided the ignore Groups, so hierarchy/locality loss is expected. + */ + hwloc_bitmap_copy(obj->cpuset, parent->cpuset); +#endif + + /* only NUMA nodes are memory for now, just append to the end of the list */ + assert(obj->type == HWLOC_OBJ_NUMANODE); + assert(obj->nodeset); + cur_children = &parent->memory_first_child; + while (*cur_children) { + /* TODO check that things are inserted in order. + * it's OK for KNL, the only user so far + */ + cur_children = &(*cur_children)->next_sibling; + } + *cur_children = obj; + obj->next_sibling = NULL; + + /* Initialize the complete nodeset if needed */ + if (!obj->complete_nodeset) { + obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); + } + + /* Add the bit to the top sets, and to the parent CPU-side object */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (hwloc_bitmap_isset(obj->nodeset, obj->os_index)) + hwloc_bitmap_set(topology->levels[0][0]->nodeset, obj->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + } + + topology->modified = 1; + return obj; +} + +/* insertion routine that lets you change the error reporting callback */ +struct hwloc_obj * +hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, + hwloc_obj_t obj, + hwloc_report_error_t report_error) +{ + struct hwloc_obj *result; + +#ifdef HWLOC_DEBUG + assert(!hwloc__obj_type_is_special(obj->type)); + + /* we need at least one non-NULL set (normal or complete, cpuset or nodeset) */ + assert(obj->cpuset || obj->complete_cpuset || obj->nodeset || obj->complete_nodeset); + /* we support the case where all of them are empty. + * it may happen when hwloc__find_insert_memory_parent() + * inserts a Group for a CPU-less NUMA-node. + */ +#endif + + if (hwloc__obj_type_is_memory(obj->type)) { + if (!root) { + root = hwloc__find_insert_memory_parent(topology, obj, report_error); + if (!root) { + hwloc_free_unlinked_object(obj); + return NULL; + } + } + return hwloc__attach_memory_object(topology, root, obj, report_error); + } + + if (!root) + /* Start at the top. */ + root = topology->levels[0][0]; + + result = hwloc___insert_object_by_cpuset(topology, root, obj, report_error); + if (result && result->type == HWLOC_OBJ_PU) { + /* Add the bit to the top sets */ + if (hwloc_bitmap_isset(result->cpuset, result->os_index)) + hwloc_bitmap_set(topology->levels[0][0]->cpuset, result->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, result->os_index); + } + if (result != obj) { + /* either failed to insert, or got merged, free the original object */ + hwloc_free_unlinked_object(obj); + } + return result; +} + +/* the default insertion routine warns in case of error. + * it's used by most backends */ +struct hwloc_obj * +hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj) +{ + return hwloc__insert_object_by_cpuset(topology, NULL, obj, hwloc_report_os_error); +} + +void +hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj) +{ + hwloc_obj_t *current; + + if (obj->type == HWLOC_OBJ_MISC) { + /* Append to the end of the Misc list */ + for (current = &parent->misc_first_child; *current; current = &(*current)->next_sibling); + } else if (hwloc__obj_type_is_io(obj->type)) { + /* Append to the end of the I/O list */ + for (current = &parent->io_first_child; *current; current = &(*current)->next_sibling); + } else if (hwloc__obj_type_is_memory(obj->type)) { + /* Append to the end of the memory list */ + for (current = &parent->memory_first_child; *current; current = &(*current)->next_sibling); + /* Add the bit to the top sets */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + if (hwloc_bitmap_isset(obj->nodeset, obj->os_index)) + hwloc_bitmap_set(topology->levels[0][0]->nodeset, obj->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_nodeset, obj->os_index); + } + } else { + /* Append to the end of the list. + * The caller takes care of inserting children in the right cpuset order, without intersection between them. + * Duplicating doesn't need to check the order since the source topology is supposed to be OK already. + * XML reorders if needed, and fails on intersecting siblings. + * Other callers just insert random objects such as I/O or Misc, no cpuset issue there. + */ + for (current = &parent->first_child; *current; current = &(*current)->next_sibling); + /* Add the bit to the top sets */ + if (obj->type == HWLOC_OBJ_PU) { + if (hwloc_bitmap_isset(obj->cpuset, obj->os_index)) + hwloc_bitmap_set(topology->levels[0][0]->cpuset, obj->os_index); + hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, obj->os_index); + } + } + + *current = obj; + obj->parent = parent; + obj->next_sibling = NULL; + topology->modified = 1; +} + +hwloc_obj_t +hwloc_alloc_setup_object(hwloc_topology_t topology, + hwloc_obj_type_t type, unsigned os_index) +{ + struct hwloc_obj *obj = hwloc_tma_malloc(topology->tma, sizeof(*obj)); + memset(obj, 0, sizeof(*obj)); + obj->type = type; + obj->os_index = os_index; + obj->gp_index = topology->next_gp_index++; + obj->attr = hwloc_tma_malloc(topology->tma, sizeof(*obj->attr)); + memset(obj->attr, 0, sizeof(*obj->attr)); + /* do not allocate the cpuset here, let the caller do it */ + return obj; +} + +hwloc_obj_t +hwloc_topology_alloc_group_object(struct hwloc_topology *topology) +{ + if (!topology->is_loaded) { + /* this could actually work, see insert() below */ + errno = EINVAL; + return NULL; + } + return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); +} + +static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root); +static void propagate_total_memory(hwloc_obj_t obj); +static void hwloc_set_group_depth(hwloc_topology_t topology); + +hwloc_obj_t +hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj) +{ + hwloc_obj_t res, root; + int cmp; + + if (!topology->is_loaded) { + /* this could actually work, we would just need to disable connect_children/levels below */ + hwloc_free_unlinked_object(obj); + errno = EINVAL; + return NULL; + } + + if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) { + hwloc_free_unlinked_object(obj); + errno = EINVAL; + return NULL; + } + + root = hwloc_get_root_obj(topology); + if (obj->cpuset) + hwloc_bitmap_and(obj->cpuset, obj->cpuset, root->cpuset); + if (obj->complete_cpuset) + hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, root->complete_cpuset); + if (obj->nodeset) + hwloc_bitmap_and(obj->nodeset, obj->nodeset, root->nodeset); + if (obj->complete_nodeset) + hwloc_bitmap_and(obj->complete_nodeset, obj->complete_nodeset, root->complete_nodeset); + + if ((!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset)) + && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset)) + && (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset)) + && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) { + hwloc_free_unlinked_object(obj); + errno = EINVAL; + return NULL; + } + + cmp = hwloc_obj_cmp_sets(obj, root); + if (cmp == HWLOC_OBJ_INCLUDED) { + res = hwloc__insert_object_by_cpuset(topology, NULL, obj, NULL /* do not show errors on stdout */); + } else { + /* just merge root */ + res = root; + } + + if (!res) + return NULL; + if (res != obj) + /* merged */ + return res; + + /* properly inserted */ + hwloc_obj_add_children_sets(obj); + if (hwloc_topology_reconnect(topology, 0) < 0) + return NULL; + + hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); + hwloc_set_group_depth(topology); + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(topology); + + return obj; +} + +hwloc_obj_t +hwloc_topology_insert_misc_object(struct hwloc_topology *topology, hwloc_obj_t parent, const char *name) +{ + hwloc_obj_t obj; + + if (topology->type_filter[HWLOC_OBJ_MISC] == HWLOC_TYPE_FILTER_KEEP_NONE) { + errno = EINVAL; + return NULL; + } + + if (!topology->is_loaded) { + errno = EINVAL; + return NULL; + } + + obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, HWLOC_UNKNOWN_INDEX); + if (name) + obj->name = strdup(name); + + hwloc_insert_object_by_parent(topology, parent, obj); + + /* FIXME: only connect misc parent children and misc level, + * but this API is likely not performance critical anyway + */ + hwloc_topology_reconnect(topology, 0); + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(topology); + + return obj; +} + +/* assuming set is included in the topology complete_cpuset + * and all objects have a proper complete_cpuset, + * return the best one containing set. + * if some object are equivalent (same complete_cpuset), return the highest one. + */ +static hwloc_obj_t +hwloc_get_highest_obj_covering_complete_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t current = hwloc_get_root_obj(topology); + hwloc_obj_t child; + + if (hwloc_bitmap_isequal(set, current->complete_cpuset)) + /* root cpuset is exactly what we want, no need to look at children, we want the highest */ + return current; + + recurse: + /* find the right child */ + for_each_child(child, current) { + if (hwloc_bitmap_isequal(set, child->complete_cpuset)) + /* child puset is exactly what we want, no need to look at children, we want the highest */ + return child; + if (!hwloc_bitmap_iszero(child->complete_cpuset) && hwloc_bitmap_isincluded(set, child->complete_cpuset)) + break; + } + + if (child) { + current = child; + goto recurse; + } + + /* no better child */ + return current; +} + +hwloc_obj_t +hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset) +{ + hwloc_obj_t group_obj, largeparent, parent; + + /* restrict to the existing complete cpuset to avoid errors later */ + hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_complete_cpuset(topology)); + if (hwloc_bitmap_iszero(cpuset)) + /* remaining cpuset is empty, invalid */ + return NULL; + + largeparent = hwloc_get_highest_obj_covering_complete_cpuset(topology, cpuset); + if (hwloc_bitmap_isequal(largeparent->complete_cpuset, cpuset) + || !hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) + /* Found a valid object (normal case) */ + return largeparent; + + /* we need to insert an intermediate group */ + group_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); + if (!group_obj) + /* Failed to insert the exact Group, fallback to largeparent */ + return largeparent; + + group_obj->complete_cpuset = hwloc_bitmap_dup(cpuset); + hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology)); + group_obj->cpuset = hwloc_bitmap_dup(cpuset); + group_obj->attr->group.kind = HWLOC_GROUP_KIND_IO; + parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, hwloc_report_os_error); + if (!parent) + /* Failed to insert the Group, maybe a conflicting cpuset */ + return largeparent; + + /* Group couldn't get merged or we would have gotten the right largeparent earlier */ + assert(parent == group_obj); + + /* Group inserted without being merged, everything OK, setup its sets */ + hwloc_obj_add_children_sets(group_obj); + + return parent; +} + +static int hwloc_memory_page_type_compare(const void *_a, const void *_b) +{ + const struct hwloc_memory_page_type_s *a = _a; + const struct hwloc_memory_page_type_s *b = _b; + /* consider 0 as larger so that 0-size page_type go to the end */ + if (!b->size) + return -1; + /* don't cast a-b in int since those are ullongs */ + if (b->size == a->size) + return 0; + return a->size < b->size ? -1 : 1; +} + +/* Propagate memory counts */ +static void +propagate_total_memory(hwloc_obj_t obj) +{ + hwloc_obj_t child; + unsigned i; + + /* reset total before counting local and children memory */ + obj->total_memory = 0; + + /* Propagate memory up. */ + for_each_child(child, obj) { + propagate_total_memory(child); + obj->total_memory += child->total_memory; + } + for_each_memory_child(child, obj) { + propagate_total_memory(child); + obj->total_memory += child->total_memory; + } + /* No memory under I/O or Misc */ + + if (obj->type == HWLOC_OBJ_NUMANODE) { + obj->total_memory += obj->attr->numanode.local_memory; + + /* By the way, sort the page_type array. + * Cannot do it on insert since some backends (e.g. XML) add page_types after inserting the object. + */ + qsort(obj->attr->numanode.page_types, obj->attr->numanode.page_types_len, sizeof(*obj->attr->numanode.page_types), hwloc_memory_page_type_compare); + /* Ignore 0-size page_types, they are at the end */ + for(i=obj->attr->numanode.page_types_len; i>=1; i--) + if (obj->attr->numanode.page_types[i-1].size) + break; + obj->attr->numanode.page_types_len = i; + } +} + +/* Now that root sets are ready, propagate them to children + * by allocating missing sets and restricting existing ones. + */ +static void +fixup_sets(hwloc_obj_t obj) +{ + int in_memory_list; + hwloc_obj_t child; + + child = obj->first_child; + in_memory_list = 0; + /* iterate over normal children first, we'll come back for memory children later */ + + iterate: + while (child) { + /* our cpuset must be included in our parent's one */ + hwloc_bitmap_and(child->cpuset, child->cpuset, obj->cpuset); + hwloc_bitmap_and(child->nodeset, child->nodeset, obj->nodeset); + /* our complete_cpuset must be included in our parent's one, but can be larger than our cpuset */ + if (child->complete_cpuset) { + hwloc_bitmap_and(child->complete_cpuset, child->complete_cpuset, obj->complete_cpuset); + } else { + child->complete_cpuset = hwloc_bitmap_dup(child->cpuset); + } + if (child->complete_nodeset) { + hwloc_bitmap_and(child->complete_nodeset, child->complete_nodeset, obj->complete_nodeset); + } else { + child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); + } + + fixup_sets(child); + child = child->next_sibling; + } + + /* switch to memory children list if any */ + if (!in_memory_list && obj->memory_first_child) { + child = obj->memory_first_child; + in_memory_list = 1; + goto iterate; + } + + /* No sets in I/O or Misc */ +} + +/* Setup object cpusets/nodesets by OR'ing its children. */ +int +hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src) +{ +#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \ + if ((_src)->_set) { \ + if (!(_dst)->_set) \ + (_dst)->_set = hwloc_bitmap_alloc(); \ + hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set); \ + } + ADD_OTHER_OBJ_SET(dst, src, cpuset); + ADD_OTHER_OBJ_SET(dst, src, complete_cpuset); + ADD_OTHER_OBJ_SET(dst, src, nodeset); + ADD_OTHER_OBJ_SET(dst, src, complete_nodeset); + return 0; +} + +int +hwloc_obj_add_children_sets(hwloc_obj_t obj) +{ + hwloc_obj_t child; + for_each_child(child, obj) { + hwloc_obj_add_other_obj_sets(obj, child); + } + /* No need to look at Misc children, they contain no PU. */ + return 0; +} + +/* CPU objects are inserted by cpusets, we know their cpusets are properly included. + * We just need fixup_sets() to make sure they aren't too wide. + * + * Memory objects are inserted by cpusets to find their CPU parent, + * but nodesets are only used inside the memory hierarchy below that parent. + * Thus we need to propagate nodesets to CPU-side parents and children. + * + * A memory object nodeset consists of NUMA nodes below it. + * A normal object nodeset consists in NUMA nodes attached to any + * of its children or parents. + */ +static void +propagate_nodeset(hwloc_obj_t obj) +{ + hwloc_obj_t child; + + /* Start our nodeset from the parent one. + * It was emptied at root, and it's being filled with local nodes + * in that branch of the tree as we recurse down. + */ + if (!obj->nodeset) + obj->nodeset = hwloc_bitmap_alloc(); + if (obj->parent) + hwloc_bitmap_copy(obj->nodeset, obj->parent->nodeset); + else + hwloc_bitmap_zero(obj->nodeset); + + /* Don't clear complete_nodeset, just make sure it contains nodeset. + * We cannot clear the complete_nodeset at root and rebuild it down because + * some bits may correspond to offline/disallowed NUMA nodes missing in the topology. + */ + if (!obj->complete_nodeset) + obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset); + else + hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, obj->nodeset); + + /* now add our local nodeset */ + for_each_memory_child(child, obj) { + /* FIXME rather recurse in the memory hierarchy */ + + /* first, update children complete_nodeset if needed */ + if (!child->complete_nodeset) + child->complete_nodeset = hwloc_bitmap_dup(child->nodeset); + else + hwloc_bitmap_or(child->complete_nodeset, child->complete_nodeset, child->nodeset); + + /* add memory children nodesets to ours */ + hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset); + hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset); + + /* by the way, copy our cpusets to memory children */ + if (child->cpuset) + hwloc_bitmap_copy(child->cpuset, obj->cpuset); + else + child->cpuset = hwloc_bitmap_dup(obj->cpuset); + if (child->complete_cpuset) + hwloc_bitmap_copy(child->complete_cpuset, obj->complete_cpuset); + else + child->complete_cpuset = hwloc_bitmap_dup(obj->complete_cpuset); + } + + /* Propagate our nodeset to CPU children. */ + for_each_child(child, obj) { + propagate_nodeset(child); + } + + /* Propagate CPU children specific nodesets back to us. + * + * We cannot merge these two loops because we don't want to first child + * nodeset to be propagated back to us and then down to the second child. + * Each child may have its own local nodeset, + * each of them is propagated to us, but not to other children. + */ + for_each_child(child, obj) { + hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset); + hwloc_bitmap_or(obj->complete_nodeset, obj->complete_nodeset, child->complete_nodeset); + } + + /* No nodeset under I/O or Misc */ + +} + +static void +remove_unused_sets(hwloc_topology_t topology, hwloc_obj_t obj) +{ + hwloc_obj_t child; + + hwloc_bitmap_and(obj->cpuset, obj->cpuset, topology->allowed_cpuset); + hwloc_bitmap_and(obj->nodeset, obj->nodeset, topology->allowed_nodeset); + + for_each_child(child, obj) + remove_unused_sets(topology, child); + for_each_memory_child(child, obj) + remove_unused_sets(topology, child); + /* No cpuset under I/O or Misc */ +} + +static void +hwloc__filter_bridges(hwloc_topology_t topology, hwloc_obj_t root, unsigned depth) +{ + hwloc_obj_t child, *pchild; + + /* filter I/O children and recurse */ + for_each_io_child_safe(child, root, pchild) { + enum hwloc_type_filter_e filter = topology->type_filter[child->type]; + + /* recurse into grand-children */ + hwloc__filter_bridges(topology, child, depth+1); + + child->attr->bridge.depth = depth; + + if (child->type == HWLOC_OBJ_BRIDGE + && filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT + && !child->io_first_child) { + unlink_and_free_single_object(pchild); + topology->modified = 1; + } + } +} + +static void +hwloc_filter_bridges(hwloc_topology_t topology, hwloc_obj_t parent) +{ + hwloc_obj_t child = parent->first_child; + while (child) { + hwloc_filter_bridges(topology, child); + child = child->next_sibling; + } + + hwloc__filter_bridges(topology, parent, 0); +} + +void +hwloc__reorder_children(hwloc_obj_t parent) +{ + /* move the children list on the side */ + hwloc_obj_t *prev, child, children = parent->first_child; + parent->first_child = NULL; + while (children) { + /* dequeue child */ + child = children; + children = child->next_sibling; + /* find where to enqueue it */ + prev = &parent->first_child; + while (*prev && hwloc__object_cpusets_compare_first(child, *prev) > 0) + prev = &((*prev)->next_sibling); + /* enqueue */ + child->next_sibling = *prev; + *prev = child; + } + /* No ordering to enforce for Misc or I/O children. */ +} + +/* Remove all normal children whose cpuset is empty, + * and memory children whose nodeset is empty. + * Also don't remove objects that have I/O children, but ignore Misc. + */ +static void +remove_empty(hwloc_topology_t topology, hwloc_obj_t *pobj) +{ + hwloc_obj_t obj = *pobj, child, *pchild; + + for_each_child_safe(child, obj, pchild) + remove_empty(topology, pchild); + for_each_memory_child_safe(child, obj, pchild) + remove_empty(topology, pchild); + /* No cpuset under I/O or Misc */ + + if (obj->first_child /* only remove if all children were removed above, so that we don't remove parents of NUMAnode */ + || obj->memory_first_child /* only remove if no memory attached there */ + || obj->io_first_child /* only remove if no I/O is attached there */) + /* ignore Misc */ + return; + + if (hwloc__obj_type_is_normal(obj->type)) { + if (!hwloc_bitmap_iszero(obj->cpuset)) + return; + } else { + assert(hwloc__obj_type_is_memory(obj->type)); + if (!hwloc_bitmap_iszero(obj->nodeset)) + return; + } + + hwloc_debug("%s", "\nRemoving empty object "); + hwloc_debug_print_object(0, obj); + unlink_and_free_single_object(pobj); + topology->modified = 1; +} + +/* reset type depth before modifying levels (either reconnecting or filtering/keep_structure) */ +static void +hwloc_reset_normal_type_depths(hwloc_topology_t topology) +{ + unsigned i; + for (i=HWLOC_OBJ_TYPE_MIN; i<=HWLOC_OBJ_GROUP; i++) + topology->type_depth[i] = HWLOC_TYPE_DEPTH_UNKNOWN; + /* type contiguity is asserted in topology_check() */ +} + +static int +hwloc_dont_merge_group_level(hwloc_topology_t topology, unsigned i) +{ + unsigned j; + + /* Don't merge some groups in that level? */ + for(j=0; j<topology->level_nbobjects[i]; j++) + if (topology->levels[i][j]->attr->group.dont_merge) + return 1; + + return 0; +} + +/* compare i-th and i-1-th levels structure */ +static int +hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i) +{ + int checkmemory = (topology->levels[i][0]->type == HWLOC_OBJ_PU); + unsigned j; + + if (topology->level_nbobjects[i-1] != topology->level_nbobjects[i]) + return -1; + + for(j=0; j<topology->level_nbobjects[i]; j++) { + if (topology->levels[i-1][j]->arity != 1) + return -1; + if (checkmemory && topology->levels[i-1][j]->memory_arity) + /* don't merge PUs if there's memory above */ + return -1; + } + /* same number of objects with arity 1 above, no problem */ + return 0; +} + +/* return > 0 if any level was removed, which means reconnect is needed */ +static void +hwloc_filter_levels_keep_structure(hwloc_topology_t topology) +{ + unsigned i, j; + int res = 0; + + /* start from the bottom since we'll remove intermediate levels */ + for(i=topology->nb_levels-1; i>0; i--) { + int replacechild = 0, replaceparent = 0; + hwloc_obj_t obj1 = topology->levels[i-1][0]; + hwloc_obj_t obj2 = topology->levels[i][0]; + hwloc_obj_type_t type1 = obj1->type; + hwloc_obj_type_t type2 = obj2->type; + + /* Check whether parents and/or children can be replaced */ + if (topology->type_filter[type1] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) { + /* Parents can be ignored in favor of children. */ + replaceparent = 1; + if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i-1)) + replaceparent = 0; + } + if (topology->type_filter[type2] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) { + /* Children can be ignored in favor of parents. */ + replacechild = 1; + if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i)) + replacechild = 0; + } + if (!replacechild && !replaceparent) + /* no ignoring */ + continue; + /* Decide which one to actually replace */ + if (replaceparent && replacechild) { + /* If both may be replaced, look at obj_type_priority */ + if (obj_type_priority[type1] >= obj_type_priority[type2]) + replaceparent = 0; + else + replacechild = 0; + } + /* Are these levels actually identical? */ + if (hwloc_compare_levels_structure(topology, i) < 0) + continue; + hwloc_debug("may merge levels #%u=%s and #%u=%s\n", + i-1, hwloc_obj_type_string(type1), i, hwloc_obj_type_string(type2)); + + /* OK, remove intermediate objects from the tree. */ + for(j=0; j<topology->level_nbobjects[i]; j++) { + hwloc_obj_t parent = topology->levels[i-1][j]; + hwloc_obj_t child = topology->levels[i][j]; + unsigned k; + if (replacechild) { + /* move child's children to parent */ + parent->first_child = child->first_child; + parent->last_child = child->last_child; + parent->arity = child->arity; + free(parent->children); + parent->children = child->children; + child->children = NULL; + /* update children parent */ + for(k=0; k<parent->arity; k++) + parent->children[k]->parent = parent; + /* append child memory/io/misc children to parent */ + if (child->memory_first_child) { + append_siblings_list(&parent->memory_first_child, child->memory_first_child, parent); + parent->memory_arity += child->memory_arity; + } + if (child->io_first_child) { + append_siblings_list(&parent->io_first_child, child->io_first_child, parent); + parent->io_arity += child->io_arity; + } + if (child->misc_first_child) { + append_siblings_list(&parent->misc_first_child, child->misc_first_child, parent); + parent->misc_arity += child->misc_arity; + } + hwloc_free_unlinked_object(child); + } else { + /* replace parent with child in grand-parent */ + if (parent->parent) { + parent->parent->children[parent->sibling_rank] = child; + child->sibling_rank = parent->sibling_rank; + if (!parent->sibling_rank) { + parent->parent->first_child = child; + /* child->prev_sibling was already NULL, child was single */ + } else { + child->prev_sibling = parent->parent->children[parent->sibling_rank-1]; + child->prev_sibling->next_sibling = child; + } + if (parent->sibling_rank == parent->parent->arity-1) { + parent->parent->last_child = child; + /* child->next_sibling was already NULL, child was single */ + } else { + child->next_sibling = parent->parent->children[parent->sibling_rank+1]; + child->next_sibling->prev_sibling = child; + } + /* update child parent */ + child->parent = parent->parent; + } else { + /* make child the new root */ + topology->levels[0][0] = child; + child->parent = NULL; + } + /* prepend parent memory/io/misc children to child */ + if (parent->memory_first_child) { + prepend_siblings_list(&child->memory_first_child, parent->memory_first_child, child); + child->memory_arity += parent->memory_arity; + } + if (parent->io_first_child) { + prepend_siblings_list(&child->io_first_child, parent->io_first_child, child); + child->io_arity += parent->io_arity; + } + if (parent->misc_first_child) { + prepend_siblings_list(&child->misc_first_child, parent->misc_first_child, child); + child->misc_arity += parent->misc_arity; + } + hwloc_free_unlinked_object(parent); + /* prev/next_sibling will be updated below in another loop */ + } + } + if (replaceparent && i>1) { + /* Update sibling list within modified parent->parent arrays */ + for(j=0; j<topology->level_nbobjects[i]; j++) { + hwloc_obj_t child = topology->levels[i][j]; + unsigned rank = child->sibling_rank; + child->prev_sibling = rank > 0 ? child->parent->children[rank-1] : NULL; + child->next_sibling = rank < child->parent->arity-1 ? child->parent->children[rank+1] : NULL; + } + } + + /* Update levels so that the next reconnect isn't confused */ + if (replaceparent) { + /* Removing level i-1, so move levels [i..nb_levels-1] to [i-1..] */ + free(topology->levels[i-1]); + memmove(&topology->levels[i-1], + &topology->levels[i], + (topology->nb_levels-i)*sizeof(topology->levels[i])); + memmove(&topology->level_nbobjects[i-1], + &topology->level_nbobjects[i], + (topology->nb_levels-i)*sizeof(topology->level_nbobjects[i])); + hwloc_debug("removed parent level %s at depth %u\n", + hwloc_obj_type_string(type1), i-1); + } else { + /* Removing level i, so move levels [i+1..nb_levels-1] and later to [i..] */ + free(topology->levels[i]); + memmove(&topology->levels[i], + &topology->levels[i+1], + (topology->nb_levels-1-i)*sizeof(topology->levels[i])); + memmove(&topology->level_nbobjects[i], + &topology->level_nbobjects[i+1], + (topology->nb_levels-1-i)*sizeof(topology->level_nbobjects[i])); + hwloc_debug("removed child level %s at depth %u\n", + hwloc_obj_type_string(type2), i); + } + topology->level_nbobjects[topology->nb_levels-1] = 0; + topology->levels[topology->nb_levels-1] = NULL; + topology->nb_levels--; + + res++; + } + + if (res > 0) { + /* Update object and type depths if some levels were removed */ + hwloc_reset_normal_type_depths(topology); + for(i=0; i<topology->nb_levels; i++) { + hwloc_obj_type_t type = topology->levels[i][0]->type; + for(j=0; j<topology->level_nbobjects[i]; j++) + topology->levels[i][j]->depth = (int)i; + if (topology->type_depth[type] == HWLOC_TYPE_DEPTH_UNKNOWN) + topology->type_depth[type] = (int)i; + else + topology->type_depth[type] = HWLOC_TYPE_DEPTH_MULTIPLE; + } + } +} + +static void +hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root) +{ + hwloc_obj_t child; + unsigned arity = root->arity; + int ok; + + /* assume we're not symmetric by default */ + root->symmetric_subtree = 0; + + /* if no child, we are symmetric */ + if (!arity) + goto good; + + /* FIXME ignore memory just like I/O and Misc? */ + + /* look at normal children only, I/O and Misc are ignored. + * return if any child is not symmetric. + */ + ok = 1; + for_each_child(child, root) { + hwloc_propagate_symmetric_subtree(topology, child); + if (!child->symmetric_subtree) + ok = 0; + } + if (!ok) + return; + /* Misc and I/O children do not care about symmetric_subtree */ + + /* if single child is symmetric, we're good */ + if (arity == 1) + goto good; + + /* now check that children subtrees are identical. + * just walk down the first child in each tree and compare their depth and arities + */ +{ + HWLOC_VLA(hwloc_obj_t, array, arity); + memcpy(array, root->children, arity * sizeof(*array)); + while (1) { + unsigned i; + /* check current level arities and depth */ + for(i=1; i<arity; i++) + if (array[i]->depth != array[0]->depth + || array[i]->arity != array[0]->arity) { + return; + } + if (!array[0]->arity) + /* no more children level, we're ok */ + break; + /* look at first child of each element now */ + for(i=0; i<arity; i++) + array[i] = array[i]->first_child; + } +} + + /* everything went fine, we're symmetric */ + good: + root->symmetric_subtree = 1; +} + +static void hwloc_set_group_depth(hwloc_topology_t topology) +{ + unsigned groupdepth = 0; + unsigned i, j; + for(i=0; i<topology->nb_levels; i++) + if (topology->levels[i][0]->type == HWLOC_OBJ_GROUP) { + for (j = 0; j < topology->level_nbobjects[i]; j++) + topology->levels[i][j]->attr->group.depth = groupdepth; + groupdepth++; + } +} + +/* + * Initialize handy pointers in the whole topology. + * The topology only had first_child and next_sibling pointers. + * When this funtions return, all parent/children pointers are initialized. + * The remaining fields (levels, cousins, logical_index, depth, ...) will + * be setup later in hwloc_connect_levels(). + * + * Can be called several times, so may have to update the array. + */ +static void +hwloc_connect_children(hwloc_obj_t parent) +{ + unsigned n, oldn = parent->arity; + hwloc_obj_t child, prev_child; + int ok; + + /* Main children list */ + + ok = 1; + prev_child = NULL; + for (n = 0, child = parent->first_child; + child; + n++, prev_child = child, child = child->next_sibling) { + child->sibling_rank = n; + child->prev_sibling = prev_child; + /* already OK in the array? */ + if (n >= oldn || parent->children[n] != child) + ok = 0; + /* recurse */ + hwloc_connect_children(child); + } + parent->last_child = prev_child; + parent->arity = n; + if (!n) { + /* no need for an array anymore */ + free(parent->children); + parent->children = NULL; + goto memory; + } + if (ok) + /* array is already OK (even if too large) */ + goto memory; + + /* alloc a larger array if needed */ + if (oldn < n) { + free(parent->children); + parent->children = malloc(n * sizeof(*parent->children)); + } + /* refill */ + for (n = 0, child = parent->first_child; + child; + n++, child = child->next_sibling) { + parent->children[n] = child; + } + + + + memory: + /* Memory children list */ + + prev_child = NULL; + for (n = 0, child = parent->memory_first_child; + child; + n++, prev_child = child, child = child->next_sibling) { + child->parent = parent; + child->sibling_rank = n; + child->prev_sibling = prev_child; + hwloc_connect_children(child); + } + parent->memory_arity = n; + + /* I/O children list */ + + prev_child = NULL; + for (n = 0, child = parent->io_first_child; + child; + n++, prev_child = child, child = child->next_sibling) { + child->parent = parent; + child->sibling_rank = n; + child->prev_sibling = prev_child; + hwloc_connect_children(child); + } + parent->io_arity = n; + + /* Misc children list */ + + prev_child = NULL; + for (n = 0, child = parent->misc_first_child; + child; + n++, prev_child = child, child = child->next_sibling) { + child->parent = parent; + child->sibling_rank = n; + child->prev_sibling = prev_child; + hwloc_connect_children(child); + } + parent->misc_arity = n; +} + +/* + * Check whether there is an object below ROOT that has the same type as OBJ + */ +static int +find_same_type(hwloc_obj_t root, hwloc_obj_t obj) +{ + hwloc_obj_t child; + + if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL) + return 1; + + for_each_child (child, root) + if (find_same_type(child, obj)) + return 1; + + return 0; +} + +/* traverse the array of current object and compare them with top_obj. + * if equal, take the object and put its children into the remaining objs. + * if not equal, put the object into the remaining objs. + */ +static unsigned +hwloc_level_take_objects(hwloc_obj_t top_obj, + hwloc_obj_t *current_objs, unsigned n_current_objs, + hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused, + hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused) +{ + unsigned taken_i = 0; + unsigned new_i = 0; + unsigned i, j; + + for (i = 0; i < n_current_objs; i++) + if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) { + /* Take it, add main children. */ + taken_objs[taken_i++] = current_objs[i]; + for (j = 0; j < current_objs[i]->arity; j++) + remaining_objs[new_i++] = current_objs[i]->children[j]; + } else { + /* Leave it. */ + remaining_objs[new_i++] = current_objs[i]; + } + +#ifdef HWLOC_DEBUG + /* Make sure we didn't mess up. */ + assert(taken_i == n_taken_objs); + assert(new_i == n_current_objs - n_taken_objs + n_remaining_objs); +#endif + + return new_i; +} + +static int +hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) +{ + unsigned i, nb; + struct hwloc_obj * obj; + + /* count */ + obj = slevel->first; + i = 0; + while (obj) { + i++; + obj = obj->next_cousin; + } + nb = i; + + if (nb) { + /* allocate and fill level */ + slevel->objs = malloc(nb * sizeof(struct hwloc_obj *)); + obj = slevel->first; + i = 0; + while (obj) { + obj->logical_index = i; + slevel->objs[i] = obj; + i++; + obj = obj->next_cousin; + } + } + + slevel->nbobjs = nb; + return 0; +} + +static void +hwloc_append_special_object(struct hwloc_special_level_s *level, hwloc_obj_t obj) +{ + if (level->first) { + obj->prev_cousin = level->last; + obj->prev_cousin->next_cousin = obj; + level->last = obj; + } else { + obj->prev_cousin = NULL; + level->first = level->last = obj; + } +} + +/* Append special objects to their lists */ +static void +hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) +{ + hwloc_obj_t child; + + if (obj->type == HWLOC_OBJ_NUMANODE) { + obj->next_cousin = NULL; + obj->depth = HWLOC_TYPE_DEPTH_NUMANODE; + /* Insert the main NUMA node list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_NUMANODE], obj); + + /* Recurse */ + for_each_memory_child(child, obj) + hwloc_list_special_objects(topology, child); + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + + } else if (obj->type == HWLOC_OBJ_MISC) { + obj->next_cousin = NULL; + obj->depth = HWLOC_TYPE_DEPTH_MISC; + /* Insert the main Misc list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_MISC], obj); + /* Recurse, Misc only have Misc children */ + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + + } else if (hwloc__obj_type_is_io(obj->type)) { + obj->next_cousin = NULL; + + if (obj->type == HWLOC_OBJ_BRIDGE) { + obj->depth = HWLOC_TYPE_DEPTH_BRIDGE; + /* Insert in the main bridge list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_BRIDGE], obj); + + } else if (obj->type == HWLOC_OBJ_PCI_DEVICE) { + obj->depth = HWLOC_TYPE_DEPTH_PCI_DEVICE; + /* Insert in the main pcidev list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_PCIDEV], obj); + + } else if (obj->type == HWLOC_OBJ_OS_DEVICE) { + obj->depth = HWLOC_TYPE_DEPTH_OS_DEVICE; + /* Insert in the main osdev list */ + hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_OSDEV], obj); + } + /* Recurse, I/O only have I/O and Misc children */ + for_each_io_child(child, obj) + hwloc_list_special_objects(topology, child); + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + + } else { + /* Recurse */ + for_each_child(child, obj) + hwloc_list_special_objects(topology, child); + for_each_memory_child(child, obj) + hwloc_list_special_objects(topology, child); + for_each_io_child(child, obj) + hwloc_list_special_objects(topology, child); + for_each_misc_child(child, obj) + hwloc_list_special_objects(topology, child); + } +} + +/* Build I/O levels */ +static void +hwloc_connect_io_misc_levels(hwloc_topology_t topology) +{ + unsigned i; + + for(i=0; i<HWLOC_NR_SLEVELS; i++) + free(topology->slevels[i].objs); + memset(&topology->slevels, 0, sizeof(topology->slevels)); + + hwloc_list_special_objects(topology, topology->levels[0][0]); + + for(i=0; i<HWLOC_NR_SLEVELS; i++) + hwloc_build_level_from_list(&topology->slevels[i]); +} + +/* + * Do the remaining work that hwloc_connect_children() did not do earlier. + * Requires object arity and children list to be properly initialized (by hwloc_connect_children()). + */ +static int +hwloc_connect_levels(hwloc_topology_t topology) +{ + unsigned l, i=0; + hwloc_obj_t *objs, *taken_objs, *new_objs, top_obj, root; + unsigned n_objs, n_taken_objs, n_new_objs; + + /* reset non-root levels (root was initialized during init and will not change here) */ + for(l=1; l<topology->nb_levels; l++) + free(topology->levels[l]); + memset(topology->levels+1, 0, (topology->nb_levels-1)*sizeof(*topology->levels)); + memset(topology->level_nbobjects+1, 0, (topology->nb_levels-1)*sizeof(*topology->level_nbobjects)); + topology->nb_levels = 1; + + /* initialize all non-IO/non-Misc depths to unknown */ + hwloc_reset_normal_type_depths(topology); + + /* initialize root type depth */ + root = topology->levels[0][0]; + root->depth = 0; + topology->type_depth[root->type] = 0; + /* root level */ + root->logical_index = 0; + root->prev_cousin = NULL; + root->next_cousin = NULL; + /* root as a child of nothing */ + root->parent = NULL; + root->sibling_rank = 0; + root->prev_sibling = NULL; + root->next_sibling = NULL; + + /* Start with children of the whole system. */ + n_objs = topology->levels[0][0]->arity; + objs = malloc(n_objs * sizeof(objs[0])); + if (!objs) { + errno = ENOMEM; + return -1; + } + memcpy(objs, topology->levels[0][0]->children, n_objs*sizeof(objs[0])); + + /* Keep building levels while there are objects left in OBJS. */ + while (n_objs) { + /* At this point, the objs array contains only objects that may go into levels */ + + /* First find which type of object is the topmost. + * Don't use PU if there are other types since we want to keep PU at the bottom. + */ + + /* Look for the first non-PU object, and use the first PU if we really find nothing else */ + for (i = 0; i < n_objs; i++) + if (objs[i]->type != HWLOC_OBJ_PU) + break; + top_obj = i == n_objs ? objs[0] : objs[i]; + + /* See if this is actually the topmost object */ + for (i = 0; i < n_objs; i++) { + if (hwloc_type_cmp(top_obj, objs[i]) != HWLOC_OBJ_EQUAL) { + if (find_same_type(objs[i], top_obj)) { + /* OBJS[i] is strictly above an object of the same type as TOP_OBJ, so it + * is above TOP_OBJ. */ + top_obj = objs[i]; + } + } + } + + /* Now peek all objects of the same type, build a level with that and + * replace them with their children. */ + + /* First count them. */ + n_taken_objs = 0; + n_new_objs = 0; + for (i = 0; i < n_objs; i++) + if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) { + n_taken_objs++; + n_new_objs += objs[i]->arity; + } + + /* New level. */ + taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0])); + /* New list of pending objects. */ + if (n_objs - n_taken_objs + n_new_objs) { + new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0])); + } else { +#ifdef HWLOC_DEBUG + assert(!n_new_objs); + assert(n_objs == n_taken_objs); +#endif + new_objs = NULL; + } + + n_new_objs = hwloc_level_take_objects(top_obj, + objs, n_objs, + taken_objs, n_taken_objs, + new_objs, n_new_objs); + + /* Ok, put numbers in the level and link cousins. */ + for (i = 0; i < n_taken_objs; i++) { + taken_objs[i]->depth = (int) topology->nb_levels; + taken_objs[i]->logical_index = i; + if (i) { + taken_objs[i]->prev_cousin = taken_objs[i-1]; + taken_objs[i-1]->next_cousin = taken_objs[i]; + } + } + taken_objs[0]->prev_cousin = NULL; + taken_objs[n_taken_objs-1]->next_cousin = NULL; + + /* One more level! */ + hwloc_debug("--- %s level", hwloc_obj_type_string(top_obj->type)); + hwloc_debug(" has number %u\n\n", topology->nb_levels); + + if (topology->type_depth[top_obj->type] == HWLOC_TYPE_DEPTH_UNKNOWN) + topology->type_depth[top_obj->type] = (int) topology->nb_levels; + else + topology->type_depth[top_obj->type] = HWLOC_TYPE_DEPTH_MULTIPLE; /* mark as unknown */ + + taken_objs[n_taken_objs] = NULL; + + if (topology->nb_levels == topology->nb_levels_allocated) { + /* extend the arrays of levels */ + void *tmplevels, *tmpnbobjs; + tmplevels = realloc(topology->levels, + 2 * topology->nb_levels_allocated * sizeof(*topology->levels)); + tmpnbobjs = realloc(topology->level_nbobjects, + 2 * topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); + if (!tmplevels || !tmpnbobjs) { + fprintf(stderr, "hwloc failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2); + + /* if one realloc succeeded, make sure the caller will free the new buffer */ + if (tmplevels) + topology->levels = tmplevels; + if (tmpnbobjs) + topology->level_nbobjects = tmpnbobjs; + /* the realloc that failed left topology->level_foo untouched, will be freed by the caller */ + + free(objs); + free(taken_objs); + free(new_objs); + errno = ENOMEM; + return -1; + } + topology->levels = tmplevels; + topology->level_nbobjects = tmpnbobjs; + memset(topology->levels + topology->nb_levels_allocated, + 0, topology->nb_levels_allocated * sizeof(*topology->levels)); + memset(topology->level_nbobjects + topology->nb_levels_allocated, + 0, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); + topology->nb_levels_allocated *= 2; + } + /* add the new level */ + topology->level_nbobjects[topology->nb_levels] = n_taken_objs; + topology->levels[topology->nb_levels] = taken_objs; + + topology->nb_levels++; + + free(objs); + + /* Switch to new_objs */ + objs = new_objs; + n_objs = n_new_objs; + } + + /* It's empty now. */ + free(objs); + + return 0; +} + +int +hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags) +{ + if (flags) { + errno = EINVAL; + return -1; + } + if (!topology->modified) + return 0; + + hwloc_connect_children(topology->levels[0][0]); + + if (hwloc_connect_levels(topology) < 0) + return -1; + + hwloc_connect_io_misc_levels(topology); + + topology->modified = 0; + + return 0; +} + +void hwloc_alloc_root_sets(hwloc_obj_t root) +{ + /* + * All sets are initially NULL. + * + * At least one backend should call this function to initialize all sets at once. + * XML uses it lazily in case only some sets were given in the XML import. + * + * Other backends can check root->cpuset != NULL to see if somebody + * discovered things before them. + */ + if (!root->cpuset) + root->cpuset = hwloc_bitmap_alloc(); + if (!root->complete_cpuset) + root->complete_cpuset = hwloc_bitmap_alloc(); + if (!root->nodeset) + root->nodeset = hwloc_bitmap_alloc(); + if (!root->complete_nodeset) + root->complete_nodeset = hwloc_bitmap_alloc(); +} + +/* Main discovery loop */ +static int +hwloc_discover(struct hwloc_topology *topology) +{ + struct hwloc_backend *backend; + + topology->modified = 0; /* no need to reconnect yet */ + + topology->allowed_cpuset = hwloc_bitmap_alloc_full(); + topology->allowed_nodeset = hwloc_bitmap_alloc_full(); + + /* discover() callbacks should use hwloc_insert to add objects initialized + * through hwloc_alloc_setup_object. + * For node levels, nodeset and memory must be initialized. + * For cache levels, memory and type/depth must be initialized. + * For group levels, depth must be initialized. + */ + + /* There must be at least a PU object for each logical processor, at worse + * produced by hwloc_setup_pu_level() + */ + + /* To be able to just use hwloc_insert_object_by_cpuset to insert the object + * in the topology according to the cpuset, the cpuset field must be + * initialized. + */ + + /* A priori, All processors are visible in the topology, and allowed + * for the application. + * + * - If some processors exist but topology information is unknown for them + * (and thus the backend couldn't create objects for them), they should be + * added to the complete_cpuset field of the lowest object where the object + * could reside. + * + * - If some processors are not allowed for the application (e.g. for + * administration reasons), they should be dropped from the allowed_cpuset + * field. + * + * The same applies to the node sets complete_nodeset and allowed_cpuset. + * + * If such field doesn't exist yet, it can be allocated, and initialized to + * zero (for complete), or to full (for allowed). The values are + * automatically propagated to the whole tree after detection. + */ + + /* + * Discover CPUs first + */ + backend = topology->backends; + while (NULL != backend) { + if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU + && backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL) + /* not yet */ + goto next_cpubackend; + if (!backend->discover) + goto next_cpubackend; + backend->discover(backend); + hwloc_debug_print_objects(0, topology->levels[0][0]); + +next_cpubackend: + backend = backend->next; + } + + /* One backend should have called hwloc_alloc_root_sets() + * and set bits during PU and NUMA insert. + */ + if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { + hwloc_debug("%s", "No PU added by any CPU and global backend\n"); + errno = EINVAL; + return -1; + } + + if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) { + const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES"); + if ((env && atoi(env)) + || (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) + topology->binding_hooks.get_allowed_resources(topology); + } + + /* If there's no NUMA node, add one with all the memory. + * root->complete_nodeset wouldn't be empty if any NUMA was ever added: + * - insert_by_cpuset() adds bits whe PU/NUMA are added. + * - XML takes care of sanitizing nodesets. + */ + if (hwloc_bitmap_iszero(topology->levels[0][0]->complete_nodeset)) { + hwloc_obj_t node; + hwloc_debug("%s", "\nAdd missing single NUMA node\n"); + node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, 0); + node->cpuset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset); + node->nodeset = hwloc_bitmap_alloc(); + /* other nodesets will be filled below */ + hwloc_bitmap_set(node->nodeset, 0); + memcpy(&node->attr->numanode, &topology->machine_memory, sizeof(topology->machine_memory)); + memset(&topology->machine_memory, 0, sizeof(topology->machine_memory)); + hwloc_insert_object_by_cpuset(topology, node); + } else { + /* if we're sure we found all NUMA nodes without their sizes (x86 backend?), + * we could split topology->total_memory in all of them. + */ + free(topology->machine_memory.page_types); + memset(&topology->machine_memory, 0, sizeof(topology->machine_memory)); + } + + hwloc_debug("%s", "\nFixup root sets\n"); + hwloc_bitmap_and(topology->levels[0][0]->cpuset, topology->levels[0][0]->cpuset, topology->levels[0][0]->complete_cpuset); + hwloc_bitmap_and(topology->levels[0][0]->nodeset, topology->levels[0][0]->nodeset, topology->levels[0][0]->complete_nodeset); + + hwloc_bitmap_and(topology->allowed_cpuset, topology->allowed_cpuset, topology->levels[0][0]->cpuset); + hwloc_bitmap_and(topology->allowed_nodeset, topology->allowed_nodeset, topology->levels[0][0]->nodeset); + + hwloc_debug("%s", "\nPropagate sets\n"); + /* cpuset are already there thanks to the _by_cpuset insertion, + * but nodeset have to be propagated below and above NUMA nodes + */ + propagate_nodeset(topology->levels[0][0]); + /* now fixup parent/children sets */ + fixup_sets(topology->levels[0][0]); + + hwloc_debug_print_objects(0, topology->levels[0][0]); + + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + hwloc_debug("%s", "\nRemoving unauthorized sets from all sets\n"); + remove_unused_sets(topology, topology->levels[0][0]); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } + + /* see if we should ignore the root now that we know how many children it has */ + if (!hwloc_filter_check_keep_object(topology, topology->levels[0][0]) + && topology->levels[0][0]->first_child && !topology->levels[0][0]->first_child->next_sibling) { + hwloc_obj_t oldroot = topology->levels[0][0]; + hwloc_obj_t newroot = oldroot->first_child; + /* switch to the new root */ + newroot->parent = NULL; + topology->levels[0][0] = newroot; + /* move oldroot memory/io/misc children before newroot children */ + if (oldroot->memory_first_child) + prepend_siblings_list(&newroot->memory_first_child, oldroot->memory_first_child, newroot); + if (oldroot->io_first_child) + prepend_siblings_list(&newroot->io_first_child, oldroot->io_first_child, newroot); + if (oldroot->misc_first_child) + prepend_siblings_list(&newroot->misc_first_child, oldroot->misc_first_child, newroot); + /* destroy oldroot and use the new one */ + hwloc_free_unlinked_object(oldroot); + } + + /* + * All object cpusets and nodesets are properly set now. + */ + + /* Now connect handy pointers to make remaining discovery easier. */ + hwloc_debug("%s", "\nOk, finished tweaking, now connect\n"); + if (hwloc_topology_reconnect(topology, 0) < 0) + return -1; + hwloc_debug_print_objects(0, topology->levels[0][0]); + + /* + * Additional discovery with other backends + */ + + backend = topology->backends; + while (NULL != backend) { + if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU + || backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL) + /* already done above */ + goto next_noncpubackend; + if (!backend->discover) + goto next_noncpubackend; + backend->discover(backend); + hwloc_debug_print_objects(0, topology->levels[0][0]); + +next_noncpubackend: + backend = backend->next; + } + + hwloc_pci_belowroot_apply_locality(topology); + + hwloc_debug("%s", "\nNow reconnecting\n"); + hwloc_debug_print_objects(0, topology->levels[0][0]); + + /* Remove some stuff */ + + hwloc_debug("%s", "\nRemoving bridge objects if needed\n"); + hwloc_filter_bridges(topology, topology->levels[0][0]); + hwloc_debug_print_objects(0, topology->levels[0][0]); + + hwloc_debug("%s", "\nRemoving empty objects\n"); + remove_empty(topology, &topology->levels[0][0]); + if (!topology->levels[0][0]) { + fprintf(stderr, "Topology became empty, aborting!\n"); + return -1; + } + if (hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { + fprintf(stderr, "Topology does not contain any PU, aborting!\n"); + return -1; + } + if (hwloc_bitmap_iszero(topology->levels[0][0]->nodeset)) { + fprintf(stderr, "Topology does not contain any NUMA node, aborting!\n"); + return -1; + } + hwloc_debug_print_objects(0, topology->levels[0][0]); + + /* Reconnect things after all these changes. + * Often needed because of Groups inserted for I/Os. + * And required for KEEP_STRUCTURE below. + */ + if (hwloc_topology_reconnect(topology, 0) < 0) + return -1; + + hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n"); + hwloc_filter_levels_keep_structure(topology); + hwloc_debug_print_objects(0, topology->levels[0][0]); + + /* accumulate children memory in total_memory fields (only once parent is set) */ + hwloc_debug("%s", "\nPropagate total memory up\n"); + propagate_total_memory(topology->levels[0][0]); + + /* setup the symmetric_subtree attribute */ + hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); + + /* apply group depths */ + hwloc_set_group_depth(topology); + + /* add some identification attributes if not loading from XML */ + if (topology->backends + && strcmp(topology->backends->component->name, "xml")) { + char *value; + /* add a hwlocVersion */ + hwloc_obj_add_info(topology->levels[0][0], "hwlocVersion", HWLOC_VERSION); + /* add a ProcessName */ + value = hwloc_progname(topology); + if (value) { + hwloc_obj_add_info(topology->levels[0][0], "ProcessName", value); + free(value); + } + } + + return 0; +} + +/* To be called before discovery is actually launched, + * Resets everything in case a previous load initialized some stuff. + */ +void +hwloc_topology_setup_defaults(struct hwloc_topology *topology) +{ + struct hwloc_obj *root_obj; + + /* reset support */ + memset(&topology->binding_hooks, 0, sizeof(topology->binding_hooks)); + memset(topology->support.discovery, 0, sizeof(*topology->support.discovery)); + memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind)); + memset(topology->support.membind, 0, sizeof(*topology->support.membind)); + + /* Only the System object on top by default */ + topology->next_gp_index = 1; /* keep 0 as an invalid value */ + topology->nb_levels = 1; /* there's at least SYSTEM */ + topology->levels[0] = hwloc_tma_malloc (topology->tma, sizeof (hwloc_obj_t)); + topology->level_nbobjects[0] = 1; + + /* Machine-wide memory */ + topology->machine_memory.local_memory = 0; + topology->machine_memory.page_types_len = 0; + topology->machine_memory.page_types = NULL; + + /* Allowed stuff */ + topology->allowed_cpuset = NULL; + topology->allowed_nodeset = NULL; + + /* NULLify other special levels */ + memset(&topology->slevels, 0, sizeof(topology->slevels)); + /* assert the indexes of special levels */ + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_NUMANODE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_NUMANODE)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_MISC == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_MISC)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_BRIDGE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_BRIDGE)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_PCIDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_PCI_DEVICE)); + HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_OSDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_OS_DEVICE)); + + /* sane values to type_depth */ + hwloc_reset_normal_type_depths(topology); + topology->type_depth[HWLOC_OBJ_NUMANODE] = HWLOC_TYPE_DEPTH_NUMANODE; + topology->type_depth[HWLOC_OBJ_MISC] = HWLOC_TYPE_DEPTH_MISC; + topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; + topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; + topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE; + + /* Create the actual machine object, but don't touch its attributes yet + * since the OS backend may still change the object into something else + * (for instance System) + */ + root_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MACHINE, 0); + topology->levels[0][0] = root_obj; +} + +static void hwloc__topology_filter_init(struct hwloc_topology *topology); + +/* This function may use a tma, it cannot free() or realloc() */ +static int +hwloc__topology_init (struct hwloc_topology **topologyp, + unsigned nblevels, + struct hwloc_tma *tma) +{ + struct hwloc_topology *topology; + + topology = hwloc_tma_malloc (tma, sizeof (struct hwloc_topology)); + if(!topology) + return -1; + + topology->tma = tma; + + hwloc_components_init(); /* uses malloc without tma, but won't need it since dup() caller already took a reference */ + hwloc_backends_init(topology); + hwloc_pci_discovery_init(topology); /* make sure both dup() and load() get sane variables */ + + /* Setup topology context */ + topology->is_loaded = 0; + topology->flags = 0; + topology->is_thissystem = 1; + topology->pid = 0; + topology->userdata = NULL; + topology->topology_abi = HWLOC_TOPOLOGY_ABI; + topology->adopted_shmem_addr = NULL; + topology->adopted_shmem_length = 0; + + topology->support.discovery = hwloc_tma_malloc(tma, sizeof(*topology->support.discovery)); + topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); + topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); + + topology->nb_levels_allocated = nblevels; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); + topology->level_nbobjects = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); + + hwloc__topology_filter_init(topology); + + hwloc_internal_distances_init(topology); + + topology->userdata_export_cb = NULL; + topology->userdata_import_cb = NULL; + topology->userdata_not_decoded = 0; + + /* Make the topology look like something coherent but empty */ + hwloc_topology_setup_defaults(topology); + + *topologyp = topology; + return 0; +} + +int +hwloc_topology_init (struct hwloc_topology **topologyp) +{ + return hwloc__topology_init(topologyp, + 16, /* 16 is enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + NULL); /* no TMA for normal topologies, too many allocations to fix */ +} + +int +hwloc_topology_set_pid(struct hwloc_topology *topology __hwloc_attribute_unused, + hwloc_pid_t pid __hwloc_attribute_unused) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + /* this does *not* change the backend */ +#ifdef HWLOC_LINUX_SYS + topology->pid = pid; + return 0; +#else /* HWLOC_LINUX_SYS */ + errno = ENOSYS; + return -1; +#endif /* HWLOC_LINUX_SYS */ +} + +int +hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *description) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + return hwloc_disc_component_force_enable(topology, + 0 /* api */, + -1, "synthetic", + description, NULL, NULL); +} + +int +hwloc_topology_set_xml(struct hwloc_topology *topology, + const char *xmlpath) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + return hwloc_disc_component_force_enable(topology, + 0 /* api */, + -1, "xml", + xmlpath, NULL, NULL); +} + +int +hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology, + const char *xmlbuffer, + int size) +{ + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + return hwloc_disc_component_force_enable(topology, + 0 /* api */, + -1, "xml", NULL, + xmlbuffer, (void*) (uintptr_t) size); +} + +int +hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) +{ + if (topology->is_loaded) { + /* actually harmless */ + errno = EBUSY; + return -1; + } + + if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { + errno = EINVAL; + return -1; + } + + topology->flags = flags; + return 0; +} + +unsigned long +hwloc_topology_get_flags (struct hwloc_topology *topology) +{ + return topology->flags; +} + +static void +hwloc__topology_filter_init(struct hwloc_topology *topology) +{ + hwloc_obj_type_t type; + /* Only ignore useless cruft by default */ + for(type = HWLOC_OBJ_TYPE_MIN; type < HWLOC_OBJ_TYPE_MAX; type++) + topology->type_filter[type] = HWLOC_TYPE_FILTER_KEEP_ALL; + topology->type_filter[HWLOC_OBJ_L1ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_L2ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_L3ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_GROUP] = HWLOC_TYPE_FILTER_KEEP_STRUCTURE; + topology->type_filter[HWLOC_OBJ_MISC] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_FILTER_KEEP_NONE; + topology->type_filter[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_FILTER_KEEP_NONE; +} + +static int +hwloc__topology_set_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter) +{ + if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE || type == HWLOC_OBJ_MACHINE) { + if (filter != HWLOC_TYPE_FILTER_KEEP_ALL) { + /* we need the Machine, PU and NUMA levels */ + errno = EINVAL; + return -1; + } + } else if (hwloc__obj_type_is_special(type)) { + if (filter == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) { + /* I/O and Misc are outside of the main topology structure, makes no sense. */ + errno = EINVAL; + return -1; + } + } else if (type == HWLOC_OBJ_GROUP) { + if (filter == HWLOC_TYPE_FILTER_KEEP_ALL) { + /* Groups are always ignored, at least keep_structure */ + errno = EINVAL; + return -1; + } + } + + /* "important" just means "all" for non-I/O non-Misc */ + if (!hwloc__obj_type_is_special(type) && filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT) + filter = HWLOC_TYPE_FILTER_KEEP_ALL; + + topology->type_filter[type] = filter; + return 0; +} + +int +hwloc_topology_set_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter) +{ + HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MIN == 0); + if ((unsigned) type >= HWLOC_OBJ_TYPE_MAX) { + errno = EINVAL; + return -1; + } + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + return hwloc__topology_set_type_filter(topology, type, filter); +} + +int +hwloc_topology_set_all_types_filter(struct hwloc_topology *topology, enum hwloc_type_filter_e filter) +{ + hwloc_obj_type_t type; + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + for(type = HWLOC_OBJ_TYPE_MIN; type < HWLOC_OBJ_TYPE_MAX; type++) + hwloc__topology_set_type_filter(topology, type, filter); + return 0; +} + +int +hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) +{ + unsigned i; + for(i=HWLOC_OBJ_L1CACHE; i<HWLOC_OBJ_L3ICACHE; i++) + hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter); + return 0; +} + +int +hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) +{ + unsigned i; + for(i=HWLOC_OBJ_L1ICACHE; i<HWLOC_OBJ_L3ICACHE; i++) + hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter); + return 0; +} + +int +hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) +{ + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter); + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter); + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter); + return 0; +} + +int +hwloc_topology_get_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e *filterp) +{ + HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MIN == 0); + if ((unsigned) type >= HWLOC_OBJ_TYPE_MAX) { + errno = EINVAL; + return -1; + } + *filterp = topology->type_filter[type]; + return 0; +} + +void +hwloc_topology_clear (struct hwloc_topology *topology) +{ + /* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */ + unsigned l; + hwloc_internal_distances_destroy(topology); + hwloc_free_object_and_children(topology->levels[0][0]); + hwloc_bitmap_free(topology->allowed_cpuset); + hwloc_bitmap_free(topology->allowed_nodeset); + for (l=0; l<topology->nb_levels; l++) + free(topology->levels[l]); + for(l=0; l<HWLOC_NR_SLEVELS; l++) + free(topology->slevels[l].objs); + free(topology->machine_memory.page_types); +} + +void +hwloc_topology_destroy (struct hwloc_topology *topology) +{ + if (topology->adopted_shmem_addr) { + hwloc__topology_disadopt(topology); + return; + } + + hwloc_backends_disable_all(topology); + hwloc_components_fini(); + + hwloc_topology_clear(topology); + + free(topology->levels); + free(topology->level_nbobjects); + + free(topology->support.discovery); + free(topology->support.cpubind); + free(topology->support.membind); + free(topology); +} + +int +hwloc_topology_load (struct hwloc_topology *topology) +{ + int err; + + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + + hwloc_internal_distances_prepare(topology); + + if (getenv("HWLOC_XML_USERDATA_NOT_DECODED")) + topology->userdata_not_decoded = 1; + + /* Ignore variables if HWLOC_COMPONENTS is set. It will be processed later */ + if (!getenv("HWLOC_COMPONENTS")) { + /* Only apply variables if we have not changed the backend yet. + * Only the first one will be kept. + * Check for FSROOT first since it's for debugging so likely needs to override everything else. + * Check for XML last (that's the one that may be set system-wide by administrators) + * so that it's only used if other variables are not set, + * to allow users to override easily. + */ + if (!topology->backends) { + const char *fsroot_path_env = getenv("HWLOC_FSROOT"); + if (fsroot_path_env) + hwloc_disc_component_force_enable(topology, + 1 /* env force */, + HWLOC_DISC_COMPONENT_TYPE_CPU, "linux", + NULL /* backend will getenv again */, NULL, NULL); + } + if (!topology->backends) { + const char *cpuid_path_env = getenv("HWLOC_CPUID_PATH"); + if (cpuid_path_env) + hwloc_disc_component_force_enable(topology, + 1 /* env force */, + HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", + NULL /* backend will getenv again */, NULL, NULL); + } + if (!topology->backends) { + const char *synthetic_env = getenv("HWLOC_SYNTHETIC"); + if (synthetic_env) + hwloc_disc_component_force_enable(topology, + 1 /* env force */, + -1, "synthetic", + synthetic_env, NULL, NULL); + } + if (!topology->backends) { + const char *xmlpath_env = getenv("HWLOC_XMLFILE"); + if (xmlpath_env) + hwloc_disc_component_force_enable(topology, + 1 /* env force */, + -1, "xml", + xmlpath_env, NULL, NULL); + } + } + + /* instantiate all possible other backends now */ + hwloc_disc_components_enable_others(topology); + /* now that backends are enabled, update the thissystem flag and some callbacks */ + hwloc_backends_is_thissystem(topology); + hwloc_backends_find_callbacks(topology); + /* + * Now set binding hooks according to topology->is_thissystem + * and what the native OS backend offers. + */ + hwloc_set_binding_hooks(topology); + + hwloc_pci_discovery_prepare(topology); + + /* actual topology discovery */ + err = hwloc_discover(topology); + if (err < 0) + goto out; + + hwloc_pci_discovery_exit(topology); + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(topology); + + /* Mark distances objs arrays as invalid since we may have removed objects + * from the topology after adding the distances (remove_empty, etc). + * It would be hard to actually verify whether it's needed. + */ + hwloc_internal_distances_invalidate_cached_objs(topology); + /* And refresh distances so that multithreaded concurrent distances_get() + * don't refresh() concurrently (disallowed). + */ + hwloc_internal_distances_refresh(topology); + + topology->is_loaded = 1; + return 0; + + out: + hwloc_pci_discovery_exit(topology); + hwloc_topology_clear(topology); + hwloc_topology_setup_defaults(topology); + hwloc_backends_disable_all(topology); + return -1; +} + +/* adjust object cpusets according the given droppedcpuset, + * drop object whose cpuset becomes empty and that have no children, + * and propagate NUMA node removal as nodeset changes in parents. + */ +static void +restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj, + hwloc_bitmap_t droppedcpuset, hwloc_bitmap_t droppednodeset) +{ + hwloc_obj_t obj = *pobj, child, *pchild; + int modified = 0; + + if (hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)) { + hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset); + hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset); + modified = 1; + } else { + if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) + && hwloc_bitmap_iszero(obj->complete_cpuset)) { + /* we're empty, there's a NUMAnode below us, it'll be removed this time */ + modified = 1; + } + /* nodeset cannot intersect unless cpuset intersects or is empty */ + if (droppednodeset) + assert(!hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset) + || hwloc_bitmap_iszero(obj->complete_cpuset)); + } + if (droppednodeset) { + hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset); + hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset); + } + + if (modified) { + for_each_child_safe(child, obj, pchild) + restrict_object_by_cpuset(topology, flags, pchild, droppedcpuset, droppednodeset); + /* if some hwloc_bitmap_first(child->complete_cpuset) changed, children might need to be reordered */ + hwloc__reorder_children(obj); + + for_each_memory_child_safe(child, obj, pchild) + restrict_object_by_cpuset(topology, flags, pchild, droppedcpuset, droppednodeset); + /* local NUMA nodes have the same cpusets, no need to reorder them */ + + /* Nothing to restrict under I/O or Misc */ + } + + if (!obj->first_child && !obj->memory_first_child /* arity not updated before connect_children() */ + && hwloc_bitmap_iszero(obj->cpuset) + && (obj->type != HWLOC_OBJ_NUMANODE || (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS))) { + /* remove object */ + hwloc_debug("%s", "\nRemoving object during restrict"); + hwloc_debug_print_object(0, obj); + + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + hwloc_free_object_siblings_and_children(obj->io_first_child); + obj->io_first_child = NULL; + } + if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC)) { + hwloc_free_object_siblings_and_children(obj->misc_first_child); + obj->misc_first_child = NULL; + } + assert(!obj->first_child); + assert(!obj->memory_first_child); + unlink_and_free_single_object(pobj); + topology->modified = 1; + } +} + +int +hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags) +{ + hwloc_bitmap_t droppedcpuset, droppednodeset; + + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + + if (flags & ~(HWLOC_RESTRICT_FLAG_REMOVE_CPULESS + |HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO)) { + errno = EINVAL; + return -1; + } + + /* make sure we'll keep something in the topology */ + if (!hwloc_bitmap_intersects(cpuset, topology->allowed_cpuset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + return -1; + } + + droppedcpuset = hwloc_bitmap_alloc(); + droppednodeset = hwloc_bitmap_alloc(); + if (!droppedcpuset || !droppednodeset) { + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + + /* cpuset to clear */ + hwloc_bitmap_not(droppedcpuset, cpuset); + /* nodeset to clear */ + if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + do { + /* node will be removed if nodeset gets or was empty */ + if (hwloc_bitmap_iszero(node->cpuset) + || hwloc_bitmap_isincluded(node->cpuset, droppedcpuset)) + hwloc_bitmap_set(droppednodeset, node->os_index); + node = node->next_cousin; + } while (node); + + /* check we're not removing all NUMA nodes */ + if (hwloc_bitmap_isincluded(topology->allowed_nodeset, droppednodeset)) { + errno = EINVAL; /* easy failure, just don't touch the topology */ + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + return -1; + } + } + /* remove nodeset if empty */ + if (!(flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) + || hwloc_bitmap_iszero(droppednodeset)) { + hwloc_bitmap_free(droppednodeset); + droppednodeset = NULL; + } + + /* now recurse to filter sets and drop things */ + restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset); + hwloc_bitmap_andnot(topology->allowed_cpuset, topology->allowed_cpuset, droppedcpuset); + if (droppednodeset) + hwloc_bitmap_andnot(topology->allowed_nodeset, topology->allowed_nodeset, droppednodeset); + + hwloc_bitmap_free(droppedcpuset); + hwloc_bitmap_free(droppednodeset); + + if (hwloc_topology_reconnect(topology, 0) < 0) + goto out; + + /* some objects may have disappeared, we need to update distances objs arrays */ + hwloc_internal_distances_invalidate_cached_objs(topology); + + hwloc_filter_levels_keep_structure(topology); + hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); + propagate_total_memory(topology->levels[0][0]); + +#ifndef HWLOC_DEBUG + if (getenv("HWLOC_DEBUG_CHECK")) +#endif + hwloc_topology_check(topology); + + return 0; + + out: + /* unrecoverable failure, re-init the topology */ + hwloc_topology_clear(topology); + hwloc_topology_setup_defaults(topology); + return -1; +} + +int +hwloc_topology_is_thissystem(struct hwloc_topology *topology) +{ + return topology->is_thissystem; +} + +int +hwloc_topology_get_depth(struct hwloc_topology *topology) +{ + return (int) topology->nb_levels; +} + +const struct hwloc_topology_support * +hwloc_topology_get_support(struct hwloc_topology * topology) +{ + return &topology->support; +} + +void hwloc_topology_set_userdata(struct hwloc_topology * topology, const void *userdata) +{ + topology->userdata = (void *) userdata; +} + +void * hwloc_topology_get_userdata(struct hwloc_topology * topology) +{ + return topology->userdata; +} + +hwloc_const_cpuset_t +hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_cpuset; +} + +hwloc_const_cpuset_t +hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->cpuset; +} + +hwloc_const_cpuset_t +hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) +{ + return topology->allowed_cpuset; +} + +hwloc_const_nodeset_t +hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_nodeset; +} + +hwloc_const_nodeset_t +hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->nodeset; +} + +hwloc_const_nodeset_t +hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) +{ + return topology->allowed_nodeset; +} + + +/**************** + * Debug Checks * + ****************/ + +#ifndef NDEBUG /* assert only enabled if !NDEBUG */ + +static void +hwloc__check_child_siblings(hwloc_obj_t parent, hwloc_obj_t *array, + unsigned arity, unsigned i, + hwloc_obj_t child, hwloc_obj_t prev) +{ + assert(child->parent == parent); + + assert(child->sibling_rank == i); + if (array) + assert(child == array[i]); + + if (prev) + assert(prev->next_sibling == child); + assert(child->prev_sibling == prev); + + if (!i) + assert(child->prev_sibling == NULL); + else + assert(child->prev_sibling != NULL); + + if (i == arity-1) + assert(child->next_sibling == NULL); + else + assert(child->next_sibling != NULL); +} + +static void +hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t obj); + +/* check children between a parent object */ +static void +hwloc__check_normal_children(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t parent) +{ + hwloc_obj_t child, prev; + unsigned j; + + if (!parent->arity) { + /* check whether that parent has no children for real */ + assert(!parent->children); + assert(!parent->first_child); + assert(!parent->last_child); + return; + } + /* check whether that parent has children for real */ + assert(parent->children); + assert(parent->first_child); + assert(parent->last_child); + + /* sibling checks */ + for(prev = NULL, child = parent->first_child, j = 0; + child; + prev = child, child = child->next_sibling, j++) { + /* normal child */ + assert(hwloc__obj_type_is_normal(child->type)); + /* check depth */ + assert(child->depth > parent->depth); + /* check siblings */ + hwloc__check_child_siblings(parent, parent->children, parent->arity, j, child, prev); + /* recurse */ + hwloc__check_object(topology, gp_indexes, child); + } + /* check arity */ + assert(j == parent->arity); + + assert(parent->first_child == parent->children[0]); + assert(parent->last_child == parent->children[parent->arity-1]); + + /* no normal children below a PU */ + if (parent->type == HWLOC_OBJ_PU) + assert(!parent->arity); +} + +static void +hwloc__check_children_cpusets(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) +{ + /* we already checked in the caller that objects have either all sets or none */ + hwloc_obj_t child; + int prev_first, prev_empty; + + if (obj->type == HWLOC_OBJ_PU) { + /* PU cpuset is just itself, with no normal children */ + assert(hwloc_bitmap_weight(obj->cpuset) == 1); + assert(hwloc_bitmap_first(obj->cpuset) == (int) obj->os_index); + assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1); + assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + assert(hwloc_bitmap_isset(topology->allowed_cpuset, (int) obj->os_index)); + } + assert(!obj->arity); + } else if (hwloc__obj_type_is_memory(obj->type)) { + /* memory object cpuset is equal to its parent */ + assert(hwloc_bitmap_isequal(obj->parent->cpuset, obj->cpuset)); + assert(!obj->arity); + } else if (!hwloc__obj_type_is_special(obj->type)) { + hwloc_bitmap_t set; + /* other obj cpuset is an exclusive OR of normal children, except for PUs */ + set = hwloc_bitmap_alloc(); + for_each_child(child, obj) { + assert(!hwloc_bitmap_intersects(set, child->cpuset)); + hwloc_bitmap_or(set, set, child->cpuset); + } + assert(hwloc_bitmap_isequal(set, obj->cpuset)); + hwloc_bitmap_free(set); + } + + /* check that memory children have same cpuset */ + for_each_memory_child(child, obj) + assert(hwloc_bitmap_isequal(obj->cpuset, child->cpuset)); + + /* check that children complete_cpusets are properly ordered, empty ones may be anywhere + * (can be wrong for main cpuset since removed PUs can break the ordering). + */ + prev_first = -1; /* -1 works fine with first comparisons below */ + prev_empty = 0; /* no empty cpuset in previous children */ + for_each_child(child, obj) { + int first = hwloc_bitmap_first(child->complete_cpuset); + if (first >= 0) { + assert(!prev_empty); /* no objects with CPU after objects without CPU */ + assert(prev_first < first); + } else { + prev_empty = 1; + } + prev_first = first; + } +} + +static void +hwloc__check_memory_children(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t parent) +{ + unsigned j; + hwloc_obj_t child, prev; + + if (!parent->memory_arity) { + /* check whether that parent has no children for real */ + assert(!parent->memory_first_child); + return; + } + /* check whether that parent has children for real */ + assert(parent->memory_first_child); + + for(prev = NULL, child = parent->memory_first_child, j = 0; + child; + prev = child, child = child->next_sibling, j++) { + assert(hwloc__obj_type_is_memory(child->type)); + /* check siblings */ + hwloc__check_child_siblings(parent, NULL, parent->memory_arity, j, child, prev); + /* only Memory and Misc children, recurse */ + assert(!child->first_child); + assert(!child->io_first_child); + hwloc__check_object(topology, gp_indexes, child); + } + /* check arity */ + assert(j == parent->memory_arity); + + /* no memory children below a NUMA node */ + if (parent->type == HWLOC_OBJ_NUMANODE) + assert(!parent->memory_arity); +} + +static void +hwloc__check_io_children(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t parent) +{ + unsigned j; + hwloc_obj_t child, prev; + + if (!parent->io_arity) { + /* check whether that parent has no children for real */ + assert(!parent->io_first_child); + return; + } + /* check whether that parent has children for real */ + assert(parent->io_first_child); + + for(prev = NULL, child = parent->io_first_child, j = 0; + child; + prev = child, child = child->next_sibling, j++) { + /* all children must be I/O */ + assert(hwloc__obj_type_is_io(child->type)); + /* check siblings */ + hwloc__check_child_siblings(parent, NULL, parent->io_arity, j, child, prev); + /* only I/O and Misc children, recurse */ + assert(!child->first_child); + assert(!child->memory_first_child); + hwloc__check_object(topology, gp_indexes, child); + } + /* check arity */ + assert(j == parent->io_arity); +} + +static void +hwloc__check_misc_children(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t parent) +{ + unsigned j; + hwloc_obj_t child, prev; + + if (!parent->misc_arity) { + /* check whether that parent has no children for real */ + assert(!parent->misc_first_child); + return; + } + /* check whether that parent has children for real */ + assert(parent->misc_first_child); + + for(prev = NULL, child = parent->misc_first_child, j = 0; + child; + prev = child, child = child->next_sibling, j++) { + /* all children must be Misc */ + assert(child->type == HWLOC_OBJ_MISC); + /* check siblings */ + hwloc__check_child_siblings(parent, NULL, parent->misc_arity, j, child, prev); + /* only Misc children, recurse */ + assert(!child->first_child); + assert(!child->memory_first_child); + assert(!child->io_first_child); + hwloc__check_object(topology, gp_indexes, child); + } + /* check arity */ + assert(j == parent->misc_arity); +} + +static void +hwloc__check_object(hwloc_topology_t topology, hwloc_bitmap_t gp_indexes, hwloc_obj_t obj) +{ + assert(!hwloc_bitmap_isset(gp_indexes, obj->gp_index)); + hwloc_bitmap_set(gp_indexes, obj->gp_index); + + HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MIN == 0); + assert((unsigned) obj->type < HWLOC_OBJ_TYPE_MAX); + + assert(hwloc_filter_check_keep_object(topology, obj)); + + /* check that sets and depth */ + if (hwloc__obj_type_is_special(obj->type)) { + assert(!obj->cpuset); + if (obj->type == HWLOC_OBJ_BRIDGE) + assert(obj->depth == HWLOC_TYPE_DEPTH_BRIDGE); + else if (obj->type == HWLOC_OBJ_PCI_DEVICE) + assert(obj->depth == HWLOC_TYPE_DEPTH_PCI_DEVICE); + else if (obj->type == HWLOC_OBJ_OS_DEVICE) + assert(obj->depth == HWLOC_TYPE_DEPTH_OS_DEVICE); + else if (obj->type == HWLOC_OBJ_MISC) + assert(obj->depth == HWLOC_TYPE_DEPTH_MISC); + } else { + assert(obj->cpuset); + if (obj->type == HWLOC_OBJ_NUMANODE) + assert(obj->depth == HWLOC_TYPE_DEPTH_NUMANODE); + else + assert(obj->depth >= 0); + } + + /* group depth cannot be -1 anymore in v2.0+ */ + if (obj->type == HWLOC_OBJ_GROUP) { + assert(obj->attr->group.depth != (unsigned) -1); + } + + /* there's other cpusets and nodesets if and only if there's a main cpuset */ + assert(!!obj->cpuset == !!obj->complete_cpuset); + assert(!!obj->cpuset == !!obj->nodeset); + assert(!!obj->nodeset == !!obj->complete_nodeset); + + /* check that complete/inline sets are larger than the main sets */ + if (obj->cpuset) { + assert(hwloc_bitmap_isincluded(obj->cpuset, obj->complete_cpuset)); + assert(hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset)); + } + + /* check cache type/depth vs type */ + if (hwloc__obj_type_is_cache(obj->type)) { + if (hwloc__obj_type_is_icache(obj->type)) + assert(obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION); + else if (hwloc__obj_type_is_dcache(obj->type)) + assert(obj->attr->cache.type == HWLOC_OBJ_CACHE_DATA + || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED); + else + assert(0); + assert(hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type) == obj->type); + } + + /* check children */ + hwloc__check_normal_children(topology, gp_indexes, obj); + hwloc__check_memory_children(topology, gp_indexes, obj); + hwloc__check_io_children(topology, gp_indexes, obj); + hwloc__check_misc_children(topology, gp_indexes, obj); + hwloc__check_children_cpusets(topology, obj); + /* nodesets are checked during another recursion with state below */ +} + +static void +hwloc__check_nodesets(hwloc_topology_t topology, hwloc_obj_t obj, hwloc_bitmap_t parentset) +{ + hwloc_obj_t child; + int prev_first; + + if (obj->type == HWLOC_OBJ_NUMANODE) { + /* NUMANODE nodeset is just itself, with no memory/normal children */ + assert(hwloc_bitmap_weight(obj->nodeset) == 1); + assert(hwloc_bitmap_first(obj->nodeset) == (int) obj->os_index); + assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1); + assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) { + assert(hwloc_bitmap_isset(topology->allowed_nodeset, (int) obj->os_index)); + } + assert(!obj->arity); + assert(!obj->memory_arity); + assert(hwloc_bitmap_isincluded(obj->nodeset, parentset)); + } else { + hwloc_bitmap_t myset; + hwloc_bitmap_t childset; + + /* the local nodeset is an exclusive OR of memory children */ + myset = hwloc_bitmap_alloc(); + for_each_memory_child(child, obj) { + assert(!hwloc_bitmap_intersects(myset, child->nodeset)); + hwloc_bitmap_or(myset, myset, child->nodeset); + } + /* the local nodeset cannot intersect with parents' local nodeset */ + assert(!hwloc_bitmap_intersects(myset, parentset)); + hwloc_bitmap_or(parentset, parentset, myset); + hwloc_bitmap_free(myset); + /* parentset now contains parent+local contribution */ + + /* for each children, recurse to check/get its contribution */ + childset = hwloc_bitmap_alloc(); + for_each_child(child, obj) { + hwloc_bitmap_t set = hwloc_bitmap_dup(parentset); /* don't touch parentset, we don't want to propagate the first child contribution to other children */ + hwloc__check_nodesets(topology, child, set); + /* extract this child contribution */ + hwloc_bitmap_andnot(set, set, parentset); + /* save it */ + assert(!hwloc_bitmap_intersects(childset, set)); + hwloc_bitmap_or(childset, childset, set); + hwloc_bitmap_free(set); + } + /* combine child contribution into parentset */ + assert(!hwloc_bitmap_intersects(parentset, childset)); + hwloc_bitmap_or(parentset, parentset, childset); + hwloc_bitmap_free(childset); + /* now check that our nodeset is combination of parent, local and children */ + assert(hwloc_bitmap_isequal(obj->nodeset, parentset)); + } + + /* check that children complete_nodesets are properly ordered, empty ones may be anywhere + * (can be wrong for main nodeset since removed PUs can break the ordering). + */ + prev_first = -1; /* -1 works fine with first comparisons below */ + for_each_memory_child(child, obj) { + int first = hwloc_bitmap_first(child->complete_nodeset); + assert(prev_first < first); + prev_first = first; + } +} + +static void +hwloc__check_level(struct hwloc_topology *topology, int depth, + hwloc_obj_t first, hwloc_obj_t last) +{ + unsigned width = hwloc_get_nbobjs_by_depth(topology, depth); + struct hwloc_obj *prev = NULL; + hwloc_obj_t obj; + unsigned j; + + /* check each object of the level */ + for(j=0; j<width; j++) { + obj = hwloc_get_obj_by_depth(topology, depth, j); + /* check that the object is corrected placed horizontally and vertically */ + assert(obj); + assert(obj->depth == depth); + assert(obj->logical_index == j); + /* check that all objects in the level have the same type */ + if (prev) { + assert(hwloc_type_cmp(obj, prev) == HWLOC_OBJ_EQUAL); + assert(prev->next_cousin == obj); + } + assert(obj->prev_cousin == prev); + + /* check that PUs and NUMA nodes have correct cpuset/nodeset */ + if (obj->type == HWLOC_OBJ_NUMANODE) { + assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1); + assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index); + } + prev = obj; + } + if (prev) + assert(prev->next_cousin == NULL); + + if (width) { + /* check first object of the level */ + obj = hwloc_get_obj_by_depth(topology, depth, 0); + assert(obj); + assert(!obj->prev_cousin); + /* check type */ + assert(hwloc_get_depth_type(topology, depth) == obj->type); + assert(depth == hwloc_get_type_depth(topology, obj->type) + || HWLOC_TYPE_DEPTH_MULTIPLE == hwloc_get_type_depth(topology, obj->type)); + /* check last object of the level */ + obj = hwloc_get_obj_by_depth(topology, depth, width-1); + assert(obj); + assert(!obj->next_cousin); + } + + if (depth < 0) { + assert(first == hwloc_get_obj_by_depth(topology, depth, 0)); + assert(last == hwloc_get_obj_by_depth(topology, depth, width-1)); + } else { + assert(!first); + assert(!last); + } + + /* check last+1 object of the level */ + obj = hwloc_get_obj_by_depth(topology, depth, width); + assert(!obj); +} + +/* check a whole topology structure */ +void +hwloc_topology_check(struct hwloc_topology *topology) +{ + struct hwloc_obj *obj; + hwloc_bitmap_t gp_indexes, set; + hwloc_obj_type_t type; + unsigned i; + int j, depth; + + /* make sure we can use ranges to check types */ + + /* hwloc__obj_type_is_{,d,i}cache() want cache types to be ordered like this */ + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L2CACHE == HWLOC_OBJ_L1CACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L3CACHE == HWLOC_OBJ_L2CACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L4CACHE == HWLOC_OBJ_L3CACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L5CACHE == HWLOC_OBJ_L4CACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L1ICACHE == HWLOC_OBJ_L5CACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L2ICACHE == HWLOC_OBJ_L1ICACHE + 1); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_L3ICACHE == HWLOC_OBJ_L2ICACHE + 1); + + /* hwloc__obj_type_is_normal(), hwloc__obj_type_is_memory(), hwloc__obj_type_is_io(), hwloc__obj_type_is_special() + * and hwloc_reset_normal_type_depths() + * want special types to be ordered like this, after all normal types. + */ + HWLOC_BUILD_ASSERT(HWLOC_OBJ_NUMANODE + 1 == HWLOC_OBJ_BRIDGE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_BRIDGE + 1 == HWLOC_OBJ_PCI_DEVICE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_OS_DEVICE + 1 == HWLOC_OBJ_MISC); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_TYPE_MAX); + + /* make sure order and priority arrays have the right size */ + HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX); + HWLOC_BUILD_ASSERT(sizeof(obj_order_type)/sizeof(*obj_order_type) == HWLOC_OBJ_TYPE_MAX); + HWLOC_BUILD_ASSERT(sizeof(obj_type_priority)/sizeof(*obj_type_priority) == HWLOC_OBJ_TYPE_MAX); + + /* make sure group are not entirely ignored */ + assert(topology->type_filter[HWLOC_OBJ_GROUP] != HWLOC_TYPE_FILTER_KEEP_ALL); + + /* make sure order arrays are coherent */ + for(type=HWLOC_OBJ_TYPE_MIN; type<HWLOC_OBJ_TYPE_MAX; type++) + assert(obj_order_type[obj_type_order[type]] == type); + for(i=HWLOC_OBJ_TYPE_MIN; i<HWLOC_OBJ_TYPE_MAX; i++) + assert(obj_type_order[obj_order_type[i]] == i); + + depth = hwloc_topology_get_depth(topology); + + assert(!topology->modified); + + /* check that first level is Machine. + * Root object cannot be ignored. And Machine can only be merged into PU, + * but there must be a NUMA node below Machine, and it cannot be below PU. + */ + assert(hwloc_get_depth_type(topology, 0) == HWLOC_OBJ_MACHINE); + + /* check that last level is PU and that it doesn't have memory */ + assert(hwloc_get_depth_type(topology, depth-1) == HWLOC_OBJ_PU); + assert(hwloc_get_nbobjs_by_depth(topology, depth-1) > 0); + for(i=0; i<hwloc_get_nbobjs_by_depth(topology, depth-1); i++) { + obj = hwloc_get_obj_by_depth(topology, depth-1, i); + assert(obj); + assert(obj->type == HWLOC_OBJ_PU); + assert(!obj->memory_first_child); + } + /* check that other levels are not PU or Machine */ + for(j=1; j<depth-1; j++) { + assert(hwloc_get_depth_type(topology, j) != HWLOC_OBJ_PU); + assert(hwloc_get_depth_type(topology, j) != HWLOC_OBJ_MACHINE); + } + + /* check normal levels */ + for(j=0; j<depth; j++) { + int d; + type = hwloc_get_depth_type(topology, j); + assert(type != HWLOC_OBJ_NUMANODE); + assert(type != HWLOC_OBJ_PCI_DEVICE); + assert(type != HWLOC_OBJ_BRIDGE); + assert(type != HWLOC_OBJ_OS_DEVICE); + assert(type != HWLOC_OBJ_MISC); + d = hwloc_get_type_depth(topology, type); + assert(d == j || d == HWLOC_TYPE_DEPTH_MULTIPLE); + } + + /* check type depths, even if there's no such level */ + for(type=HWLOC_OBJ_TYPE_MIN; type<HWLOC_OBJ_TYPE_MAX; type++) { + int d; + d = hwloc_get_type_depth(topology, type); + if (type == HWLOC_OBJ_NUMANODE) { + assert(d == HWLOC_TYPE_DEPTH_NUMANODE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_NUMANODE); + } else if (type == HWLOC_OBJ_BRIDGE) { + assert(d == HWLOC_TYPE_DEPTH_BRIDGE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_BRIDGE); + } else if (type == HWLOC_OBJ_PCI_DEVICE) { + assert(d == HWLOC_TYPE_DEPTH_PCI_DEVICE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_PCI_DEVICE); + } else if (type == HWLOC_OBJ_OS_DEVICE) { + assert(d == HWLOC_TYPE_DEPTH_OS_DEVICE); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_OS_DEVICE); + } else if (type == HWLOC_OBJ_MISC) { + assert(d == HWLOC_TYPE_DEPTH_MISC); + assert(hwloc_get_depth_type(topology, d) == HWLOC_OBJ_MISC); + } else { + assert(d >=0 || d == HWLOC_TYPE_DEPTH_UNKNOWN || d == HWLOC_TYPE_DEPTH_MULTIPLE); + } + } + + /* top-level specific checks */ + assert(hwloc_get_nbobjs_by_depth(topology, 0) == 1); + obj = hwloc_get_root_obj(topology); + assert(obj); + assert(!obj->parent); + assert(obj->cpuset); + assert(!obj->depth); + + /* check that allowed sets are larger than the main sets */ + if (topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) { + assert(hwloc_bitmap_isincluded(topology->allowed_cpuset, obj->cpuset)); + assert(hwloc_bitmap_isincluded(topology->allowed_nodeset, obj->nodeset)); + } else { + assert(hwloc_bitmap_isequal(topology->allowed_cpuset, obj->cpuset)); + assert(hwloc_bitmap_isequal(topology->allowed_nodeset, obj->nodeset)); + } + + /* check each level */ + for(j=0; j<depth; j++) + hwloc__check_level(topology, j, NULL, NULL); + for(j=0; j<HWLOC_NR_SLEVELS; j++) + hwloc__check_level(topology, HWLOC_SLEVEL_TO_DEPTH(j), topology->slevels[j].first, topology->slevels[j].last); + + /* recurse and check the tree of children, and type-specific checks */ + gp_indexes = hwloc_bitmap_alloc(); /* TODO prealloc to topology->next_gp_index */ + hwloc__check_object(topology, gp_indexes, obj); + hwloc_bitmap_free(gp_indexes); + + /* recurse and check the nodesets of children */ + set = hwloc_bitmap_alloc(); + hwloc__check_nodesets(topology, obj, set); + hwloc_bitmap_free(set); +} + +#else /* NDEBUG */ + +void +hwloc_topology_check(struct hwloc_topology *topology __hwloc_attribute_unused) +{ +} + +#endif /* NDEBUG */ diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c new file mode 100644 index 000000000..9c5e6268c --- /dev/null +++ b/src/3rdparty/hwloc/src/traversal.c @@ -0,0 +1,616 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/misc.h> +#include <private/debug.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif /* HAVE_STRINGS_H */ + +int +hwloc_get_type_depth (struct hwloc_topology *topology, hwloc_obj_type_t type) +{ + HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MIN == 0); + if ((unsigned) type >= HWLOC_OBJ_TYPE_MAX) + return HWLOC_TYPE_DEPTH_UNKNOWN; + else + return topology->type_depth[type]; +} + +hwloc_obj_type_t +hwloc_get_depth_type (hwloc_topology_t topology, int depth) +{ + if ((unsigned)depth >= topology->nb_levels) + switch (depth) { + case HWLOC_TYPE_DEPTH_NUMANODE: + return HWLOC_OBJ_NUMANODE; + case HWLOC_TYPE_DEPTH_BRIDGE: + return HWLOC_OBJ_BRIDGE; + case HWLOC_TYPE_DEPTH_PCI_DEVICE: + return HWLOC_OBJ_PCI_DEVICE; + case HWLOC_TYPE_DEPTH_OS_DEVICE: + return HWLOC_OBJ_OS_DEVICE; + case HWLOC_TYPE_DEPTH_MISC: + return HWLOC_OBJ_MISC; + default: + return HWLOC_OBJ_TYPE_NONE; + } + return topology->levels[depth][0]->type; +} + +int +hwloc_get_memory_parents_depth (hwloc_topology_t topology) +{ + int depth = HWLOC_TYPE_DEPTH_UNKNOWN; + /* memory leaves are always NUMA nodes for now, no need to check parents of other memory types */ + hwloc_obj_t numa = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, 0); + assert(numa); + while (numa) { + hwloc_obj_t parent = numa->parent; + /* walk-up the memory hierarchy */ + while (hwloc__obj_type_is_memory(parent->type)) + parent = parent->parent; + + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + depth = parent->depth; + else if (depth != parent->depth) + return HWLOC_TYPE_DEPTH_MULTIPLE; + + numa = numa->next_cousin; + } + + assert(depth >= 0); + return depth; +} + +unsigned +hwloc_get_nbobjs_by_depth (struct hwloc_topology *topology, int depth) +{ + if ((unsigned)depth >= topology->nb_levels) { + unsigned l = HWLOC_SLEVEL_FROM_DEPTH(depth); + if (l < HWLOC_NR_SLEVELS) + return topology->slevels[l].nbobjs; + else + return 0; + } + return topology->level_nbobjects[depth]; +} + +struct hwloc_obj * +hwloc_get_obj_by_depth (struct hwloc_topology *topology, int depth, unsigned idx) +{ + if ((unsigned)depth >= topology->nb_levels) { + unsigned l = HWLOC_SLEVEL_FROM_DEPTH(depth); + if (l < HWLOC_NR_SLEVELS) + return idx < topology->slevels[l].nbobjs ? topology->slevels[l].objs[idx] : NULL; + else + return NULL; + } + if (idx >= topology->level_nbobjects[depth]) + return NULL; + return topology->levels[depth][idx]; +} + +int +hwloc_obj_type_is_normal(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_normal(type); +} + +int +hwloc_obj_type_is_memory(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_memory(type); +} + +int +hwloc_obj_type_is_io(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_io(type); +} + +int +hwloc_obj_type_is_cache(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_cache(type); +} + +int +hwloc_obj_type_is_dcache(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_dcache(type); +} + +int +hwloc_obj_type_is_icache(hwloc_obj_type_t type) +{ + return hwloc__obj_type_is_icache(type); +} + +unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max) +{ + struct hwloc_obj *parent, *nextparent, **src_objs; + unsigned i,src_nbobjects; + unsigned stored = 0; + + if (!src->cpuset) + return 0; + + src_nbobjects = topology->level_nbobjects[src->depth]; + src_objs = topology->levels[src->depth]; + + parent = src; + while (stored < max) { + while (1) { + nextparent = parent->parent; + if (!nextparent) + goto out; + if (!hwloc_bitmap_isequal(parent->cpuset, nextparent->cpuset)) + break; + parent = nextparent; + } + + /* traverse src's objects and find those that are in nextparent and were not in parent */ + for(i=0; i<src_nbobjects; i++) { + if (hwloc_bitmap_isincluded(src_objs[i]->cpuset, nextparent->cpuset) + && !hwloc_bitmap_isincluded(src_objs[i]->cpuset, parent->cpuset)) { + objs[stored++] = src_objs[i]; + if (stored == max) + goto out; + } + } + parent = nextparent; + } + + out: + return stored; +} + +static int +hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set, + struct hwloc_obj ***res, int *max) +{ + int gotten = 0; + unsigned i; + + /* the caller must ensure this */ + if (*max <= 0) + return 0; + + if (hwloc_bitmap_isequal(current->cpuset, set)) { + **res = current; + (*res)++; + (*max)--; + return 1; + } + + for (i=0; i<current->arity; i++) { + hwloc_bitmap_t subset; + int ret; + + /* split out the cpuset part corresponding to this child and see if there's anything to do */ + if (!hwloc_bitmap_intersects(set,current->children[i]->cpuset)) + continue; + + subset = hwloc_bitmap_dup(set); + hwloc_bitmap_and(subset, subset, current->children[i]->cpuset); + ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max); + gotten += ret; + hwloc_bitmap_free(subset); + + /* if no more room to store remaining objects, return what we got so far */ + if (!*max) + break; + } + + return gotten; +} + +int +hwloc_get_largest_objs_inside_cpuset (struct hwloc_topology *topology, hwloc_const_bitmap_t set, + struct hwloc_obj **objs, int max) +{ + struct hwloc_obj *current = topology->levels[0][0]; + + if (!hwloc_bitmap_isincluded(set, current->cpuset)) + return -1; + + if (max <= 0) + return 0; + + return hwloc__get_largest_objs_inside_cpuset (current, set, &objs, &max); +} + +const char * +hwloc_obj_type_string (hwloc_obj_type_t obj) +{ + switch (obj) + { + case HWLOC_OBJ_MACHINE: return "Machine"; + case HWLOC_OBJ_MISC: return "Misc"; + case HWLOC_OBJ_GROUP: return "Group"; + case HWLOC_OBJ_NUMANODE: return "NUMANode"; + case HWLOC_OBJ_PACKAGE: return "Package"; + case HWLOC_OBJ_L1CACHE: return "L1Cache"; + case HWLOC_OBJ_L2CACHE: return "L2Cache"; + case HWLOC_OBJ_L3CACHE: return "L3Cache"; + case HWLOC_OBJ_L4CACHE: return "L4Cache"; + case HWLOC_OBJ_L5CACHE: return "L5Cache"; + case HWLOC_OBJ_L1ICACHE: return "L1iCache"; + case HWLOC_OBJ_L2ICACHE: return "L2iCache"; + case HWLOC_OBJ_L3ICACHE: return "L3iCache"; + case HWLOC_OBJ_CORE: return "Core"; + case HWLOC_OBJ_BRIDGE: return "Bridge"; + case HWLOC_OBJ_PCI_DEVICE: return "PCIDev"; + case HWLOC_OBJ_OS_DEVICE: return "OSDev"; + case HWLOC_OBJ_PU: return "PU"; + default: return "Unknown"; + } +} + +int +hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep, + union hwloc_obj_attr_u *attrp, size_t attrsize) +{ + hwloc_obj_type_t type = (hwloc_obj_type_t) -1; + unsigned depthattr = (unsigned) -1; + hwloc_obj_cache_type_t cachetypeattr = (hwloc_obj_cache_type_t) -1; /* unspecified */ + hwloc_obj_bridge_type_t ubtype = (hwloc_obj_bridge_type_t) -1; + hwloc_obj_osdev_type_t ostype = (hwloc_obj_osdev_type_t) -1; + char *end; + + /* never match the ending \0 since we want to match things like core:2 too. + * just use hwloc_strncasecmp() everywhere. + */ + + /* types without a custom depth */ + + /* osdev subtype first to avoid conflicts coproc/core etc */ + if (!hwloc_strncasecmp(string, "os", 2)) { + type = HWLOC_OBJ_OS_DEVICE; + } else if (!hwloc_strncasecmp(string, "bloc", 4)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_BLOCK; + } else if (!hwloc_strncasecmp(string, "net", 3)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_NETWORK; + } else if (!hwloc_strncasecmp(string, "openfab", 7)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_OPENFABRICS; + } else if (!hwloc_strncasecmp(string, "dma", 3)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_DMA; + } else if (!hwloc_strncasecmp(string, "gpu", 3)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_GPU; + } else if (!hwloc_strncasecmp(string, "copro", 5) + || !hwloc_strncasecmp(string, "co-pro", 6)) { + type = HWLOC_OBJ_OS_DEVICE; + ostype = HWLOC_OBJ_OSDEV_COPROC; + + } else if (!hwloc_strncasecmp(string, "machine", 2)) { + type = HWLOC_OBJ_MACHINE; + } else if (!hwloc_strncasecmp(string, "node", 2) + || !hwloc_strncasecmp(string, "numa", 2)) { /* matches node and numanode */ + type = HWLOC_OBJ_NUMANODE; + } else if (!hwloc_strncasecmp(string, "package", 2) + || !hwloc_strncasecmp(string, "socket", 2)) { /* backward compat with v1.10 */ + type = HWLOC_OBJ_PACKAGE; + } else if (!hwloc_strncasecmp(string, "core", 2)) { + type = HWLOC_OBJ_CORE; + } else if (!hwloc_strncasecmp(string, "pu", 2)) { + type = HWLOC_OBJ_PU; + } else if (!hwloc_strncasecmp(string, "misc", 4)) { + type = HWLOC_OBJ_MISC; + + } else if (!hwloc_strncasecmp(string, "bridge", 4)) { + type = HWLOC_OBJ_BRIDGE; + } else if (!hwloc_strncasecmp(string, "hostbridge", 6)) { + type = HWLOC_OBJ_BRIDGE; + ubtype = HWLOC_OBJ_BRIDGE_HOST; + } else if (!hwloc_strncasecmp(string, "pcibridge", 5)) { + type = HWLOC_OBJ_BRIDGE; + ubtype = HWLOC_OBJ_BRIDGE_PCI; + + } else if (!hwloc_strncasecmp(string, "pci", 3)) { + type = HWLOC_OBJ_PCI_DEVICE; + + /* types with depthattr */ + } else if ((string[0] == 'l' || string[0] == 'L') && string[1] >= '0' && string[1] <= '9') { + depthattr = strtol(string+1, &end, 10); + if (*end == 'i') { + if (depthattr >= 1 && depthattr <= 3) { + type = HWLOC_OBJ_L1ICACHE + depthattr-1; + cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION; + } else + return -1; + } else { + if (depthattr >= 1 && depthattr <= 5) { + type = HWLOC_OBJ_L1CACHE + depthattr-1; + cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED; + } else + return -1; + } + + } else if (!hwloc_strncasecmp(string, "group", 2)) { + size_t length; + type = HWLOC_OBJ_GROUP; + length = strcspn(string, "0123456789"); + if (length <= 5 && !hwloc_strncasecmp(string, "group", length) + && string[length] >= '0' && string[length] <= '9') { + depthattr = strtol(string+length, &end, 10); + } + + } else + return -1; + + *typep = type; + if (attrp) { + if (hwloc__obj_type_is_cache(type) && attrsize >= sizeof(attrp->cache)) { + attrp->cache.depth = depthattr; + attrp->cache.type = cachetypeattr; + } else if (type == HWLOC_OBJ_GROUP && attrsize >= sizeof(attrp->group)) { + attrp->group.depth = depthattr; + } else if (type == HWLOC_OBJ_BRIDGE && attrsize >= sizeof(attrp->bridge)) { + attrp->bridge.upstream_type = ubtype; + attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; /* nothing else so far */ + } else if (type == HWLOC_OBJ_OS_DEVICE && attrsize >= sizeof(attrp->osdev)) { + attrp->osdev.type = ostype; + } + } + return 0; +} + +int +hwloc_type_sscanf_as_depth(const char *string, hwloc_obj_type_t *typep, + hwloc_topology_t topology, int *depthp) +{ + union hwloc_obj_attr_u attr; + hwloc_obj_type_t type; + int depth; + int err; + + err = hwloc_type_sscanf(string, &type, &attr, sizeof(attr)); + if (err < 0) + return err; + + depth = hwloc_get_type_depth(topology, type); + if (type == HWLOC_OBJ_GROUP + && depth == HWLOC_TYPE_DEPTH_MULTIPLE + && attr.group.depth != (unsigned)-1) { + unsigned l; + depth = HWLOC_TYPE_DEPTH_UNKNOWN; + for(l=0; l<topology->nb_levels; l++) { + if (topology->levels[l][0]->type == HWLOC_OBJ_GROUP + && topology->levels[l][0]->attr->group.depth == attr.group.depth) { + depth = (int)l; + break; + } + } + } + + if (typep) + *typep = type; + *depthp = depth; + return 0; +} + +static const char* hwloc_obj_cache_type_letter(hwloc_obj_cache_type_t type) +{ + switch (type) { + case HWLOC_OBJ_CACHE_UNIFIED: return ""; + case HWLOC_OBJ_CACHE_DATA: return "d"; + case HWLOC_OBJ_CACHE_INSTRUCTION: return "i"; + default: return "unknown"; + } +} + +int +hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, int verbose) +{ + hwloc_obj_type_t type = obj->type; + switch (type) { + case HWLOC_OBJ_MISC: + case HWLOC_OBJ_MACHINE: + case HWLOC_OBJ_NUMANODE: + case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_CORE: + case HWLOC_OBJ_PU: + return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + return hwloc_snprintf(string, size, "L%u%s%s", obj->attr->cache.depth, + hwloc_obj_cache_type_letter(obj->attr->cache.type), + verbose ? "Cache" : ""); + case HWLOC_OBJ_GROUP: + if (obj->attr->group.depth != (unsigned) -1) + return hwloc_snprintf(string, size, "%s%u", hwloc_obj_type_string(type), obj->attr->group.depth); + else + return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); + case HWLOC_OBJ_BRIDGE: + return hwloc_snprintf(string, size, obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI ? "PCIBridge" : "HostBridge"); + case HWLOC_OBJ_PCI_DEVICE: + return hwloc_snprintf(string, size, "PCI"); + case HWLOC_OBJ_OS_DEVICE: + switch (obj->attr->osdev.type) { + case HWLOC_OBJ_OSDEV_BLOCK: return hwloc_snprintf(string, size, "Block"); + case HWLOC_OBJ_OSDEV_NETWORK: return hwloc_snprintf(string, size, verbose ? "Network" : "Net"); + case HWLOC_OBJ_OSDEV_OPENFABRICS: return hwloc_snprintf(string, size, "OpenFabrics"); + case HWLOC_OBJ_OSDEV_DMA: return hwloc_snprintf(string, size, "DMA"); + case HWLOC_OBJ_OSDEV_GPU: return hwloc_snprintf(string, size, "GPU"); + case HWLOC_OBJ_OSDEV_COPROC: return hwloc_snprintf(string, size, verbose ? "Co-Processor" : "CoProc"); + default: + if (size > 0) + *string = '\0'; + return 0; + } + break; + default: + if (size > 0) + *string = '\0'; + return 0; + } +} + +int +hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * separator, int verbose) +{ + const char *prefix = ""; + char *tmp = string; + ssize_t tmplen = size; + int ret = 0; + int res; + + /* make sure we output at least an empty string */ + if (size) + *string = '\0'; + + /* print memory attributes */ + res = 0; + if (verbose) { + if (obj->type == HWLOC_OBJ_NUMANODE && obj->attr->numanode.local_memory) + res = hwloc_snprintf(tmp, tmplen, "%slocal=%lu%s%stotal=%lu%s", + prefix, + (unsigned long) hwloc_memory_size_printf_value(obj->attr->numanode.local_memory, verbose), + hwloc_memory_size_printf_unit(obj->attr->numanode.local_memory, verbose), + separator, + (unsigned long) hwloc_memory_size_printf_value(obj->total_memory, verbose), + hwloc_memory_size_printf_unit(obj->total_memory, verbose)); + else if (obj->total_memory) + res = hwloc_snprintf(tmp, tmplen, "%stotal=%lu%s", + prefix, + (unsigned long) hwloc_memory_size_printf_value(obj->total_memory, verbose), + hwloc_memory_size_printf_unit(obj->total_memory, verbose)); + } else { + if (obj->type == HWLOC_OBJ_NUMANODE && obj->attr->numanode.local_memory) + res = hwloc_snprintf(tmp, tmplen, "%s%lu%s", + prefix, + (unsigned long) hwloc_memory_size_printf_value(obj->attr->numanode.local_memory, verbose), + hwloc_memory_size_printf_unit(obj->attr->numanode.local_memory, verbose)); + } + if (res < 0) + return -1; + ret += res; + if (ret > 0) + prefix = separator; + if (res >= tmplen) + res = tmplen>0 ? (int)tmplen - 1 : 0; + tmp += res; + tmplen -= res; + + /* printf type-specific attributes */ + res = 0; + switch (obj->type) { + case HWLOC_OBJ_L1CACHE: + case HWLOC_OBJ_L2CACHE: + case HWLOC_OBJ_L3CACHE: + case HWLOC_OBJ_L4CACHE: + case HWLOC_OBJ_L5CACHE: + case HWLOC_OBJ_L1ICACHE: + case HWLOC_OBJ_L2ICACHE: + case HWLOC_OBJ_L3ICACHE: + if (verbose) { + char assoc[32]; + if (obj->attr->cache.associativity == -1) + snprintf(assoc, sizeof(assoc), "%sfully-associative", separator); + else if (obj->attr->cache.associativity == 0) + *assoc = '\0'; + else + snprintf(assoc, sizeof(assoc), "%sways=%d", separator, obj->attr->cache.associativity); + res = hwloc_snprintf(tmp, tmplen, "%ssize=%lu%s%slinesize=%u%s", + prefix, + (unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose), + hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose), + separator, obj->attr->cache.linesize, + assoc); + } else + res = hwloc_snprintf(tmp, tmplen, "%s%lu%s", + prefix, + (unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose), + hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose)); + break; + case HWLOC_OBJ_BRIDGE: + if (verbose) { + char up[128], down[64]; + /* upstream is PCI or HOST */ + if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI) { + char linkspeed[64]= ""; + if (obj->attr->pcidev.linkspeed) + snprintf(linkspeed, sizeof(linkspeed), "%slink=%.2fGB/s", separator, obj->attr->pcidev.linkspeed); + snprintf(up, sizeof(up), "busid=%04x:%02x:%02x.%01x%sid=%04x:%04x%sclass=%04x(%s)%s", + obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func, separator, + obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id, separator, + obj->attr->pcidev.class_id, hwloc_pci_class_string(obj->attr->pcidev.class_id), linkspeed); + } else + *up = '\0'; + /* downstream is_PCI */ + snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]", + obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); + if (*up) + res = hwloc_snprintf(string, size, "%s%s%s", up, separator, down); + else + res = hwloc_snprintf(string, size, "%s", down); + } + break; + case HWLOC_OBJ_PCI_DEVICE: + if (verbose) { + char linkspeed[64]= ""; + if (obj->attr->pcidev.linkspeed) + snprintf(linkspeed, sizeof(linkspeed), "%slink=%.2fGB/s", separator, obj->attr->pcidev.linkspeed); + res = hwloc_snprintf(string, size, "busid=%04x:%02x:%02x.%01x%sid=%04x:%04x%sclass=%04x(%s)%s", + obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func, separator, + obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id, separator, + obj->attr->pcidev.class_id, hwloc_pci_class_string(obj->attr->pcidev.class_id), linkspeed); + } + break; + default: + break; + } + if (res < 0) + return -1; + ret += res; + if (ret > 0) + prefix = separator; + if (res >= tmplen) + res = tmplen>0 ? (int)tmplen - 1 : 0; + tmp += res; + tmplen -= res; + + /* printf infos */ + if (verbose) { + unsigned i; + for(i=0; i<obj->infos_count; i++) { + struct hwloc_info_s *info = &obj->infos[i]; + const char *quote = strchr(info->value, ' ') ? "\"" : ""; + res = hwloc_snprintf(tmp, tmplen, "%s%s=%s%s%s", + prefix, + info->name, + quote, info->value, quote); + if (res < 0) + return -1; + ret += res; + if (res >= tmplen) + res = tmplen>0 ? (int)tmplen - 1 : 0; + tmp += res; + tmplen -= res; + if (ret > 0) + prefix = separator; + } + } + + return ret; +} diff --git a/src/backend/cpu/cpu.cmake b/src/backend/cpu/cpu.cmake index b685d7e4b..1072df088 100644 --- a/src/backend/cpu/cpu.cmake +++ b/src/backend/cpu/cpu.cmake @@ -19,12 +19,18 @@ set(SOURCES_BACKEND_CPU if (WITH_HWLOC) - find_package(HWLOC REQUIRED) + if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + add_subdirectory(src/3rdparty/hwloc) + include_directories(src/3rdparty/hwloc/include) + set(CPUID_LIB hwloc) + else() + find_package(HWLOC REQUIRED) + include_directories(${HWLOC_INCLUDE_DIR}) + set(CPUID_LIB ${HWLOC_LIBRARY}) + endif() set(WITH_LIBCPUID OFF) - include_directories(${HWLOC_INCLUDE_DIR}) - remove_definitions(/DXMRIG_FEATURE_LIBCPUID) add_definitions(/DXMRIG_FEATURE_HWLOC) @@ -32,7 +38,6 @@ if (WITH_HWLOC) add_definitions(/DXMRIG_HWLOC_DEBUG) endif() - set(CPUID_LIB "") set(SOURCES_CPUID src/backend/cpu/platform/BasicCpuInfo.cpp src/backend/cpu/platform/BasicCpuInfo.h