From 02d7c2f977c1fead92d561585b1e1a07708d0fc0 Mon Sep 17 00:00:00 2001
From: XMRig <support@xmrig.com>
Date: Wed, 24 Jul 2019 03:24:01 +0700
Subject: [PATCH] Implemented autoconfig via hwloc.

---
 src/backend/cpu/platform/HwlocCpuInfo.cpp | 153 ++++++++++++++++++++--
 src/backend/cpu/platform/HwlocCpuInfo.h   |   7 +
 2 files changed, 150 insertions(+), 10 deletions(-)
diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp
index 156f3ae44..cdfbdc599 100644
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -32,7 +32,7 @@
 namespace xmrig {
 
 
-inline bool isCacheObject(hwloc_obj_t obj)
+static inline bool isCacheObject(hwloc_obj_t obj)
 {
 #   if HWLOC_API_VERSION >= 0x20000
     return hwloc_obj_type_is_cache(obj->type);
@@ -43,23 +43,38 @@ inline bool isCacheObject(hwloc_obj_t obj)
 
 
 template <typename func>
-inline void findCache(hwloc_obj_t obj, func lambda)
+static inline void findCache(hwloc_obj_t obj, unsigned min, unsigned max, func lambda)
 {
     for (size_t i = 0; i < obj->arity; i++) {
         if (isCacheObject(obj->children[i])) {
-            if (obj->children[i]->attr->cache.depth < 2) {
+            const unsigned depth = obj->children[i]->attr->cache.depth;
+            if (depth < min || depth > max) {
                 continue;
             }
 
             lambda(obj->children[i]);
         }
 
-        findCache(obj->children[i], lambda);
+        findCache(obj->children[i], min, max, lambda);
     }
 }
 
 
-inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t type)
+template <typename func>
+static inline void findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda)
+{
+    for (size_t i = 0; i < obj->arity; i++) {
+        if (obj->children[i]->type == type) {
+            lambda(obj->children[i]);
+        }
+        else {
+            findByType(obj->children[i], type, lambda);
+        }
+    }
+}
+
+
+static inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t type)
 {
     const int count = hwloc_get_nbobjs_by_type(topology, type);
 
@@ -67,6 +82,22 @@ inline size_t countByType(hwloc_topology_t topology, hwloc_obj_type_t type)
 }
 
 
+static inline size_t countByType(hwloc_obj_t obj, hwloc_obj_type_t type)
+{
+    size_t count = 0;
+    findByType(obj, type, [&count](hwloc_obj_t) { count++; });
+
+    return count;
+}
+
+
+static inline bool isCacheExclusive(hwloc_obj_t obj)
+{
+    const char *value = hwloc_obj_get_info_by_name(obj, "Inclusive");
+    return value == nullptr || value[0] != '1';
+}
+
+
 } // namespace xmrig
 
 
@@ -83,11 +114,7 @@ xmrig::HwlocCpuInfo::HwlocCpuInfo() : BasicCpuInfo(),
     hwloc_obj_t root = hwloc_get_root_obj(topology);
     snprintf(m_backend, sizeof m_backend, "hwloc/%s", hwloc_obj_get_info_by_name(root, "hwlocVersion"));
 
-    findCache(root, [this](hwloc_obj_t found) {
-        const unsigned depth = found->attr->cache.depth;
-
-        this->m_cache[depth] += found->attr->cache.size;
-    });
+    findCache(root, 2, 3, [this](hwloc_obj_t found) { this->m_cache[found->attr->cache.depth] += found->attr->cache.size; });
 
     m_threads   = countByType(topology, HWLOC_OBJ_PU);
     m_cores     = countByType(topology, HWLOC_OBJ_CORE);
@@ -96,3 +123,109 @@ xmrig::HwlocCpuInfo::HwlocCpuInfo() : BasicCpuInfo(),
 
     hwloc_topology_destroy(topology);
 }
+
+
+xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm) const
+{
+    if (L2() == 0 && L3() == 0) {
+        return BasicCpuInfo::threads(algorithm);
+    }
+
+    hwloc_topology_t topology;
+    hwloc_topology_init(&topology);
+    hwloc_topology_load(topology);
+
+    const unsigned depth = L3() > 0 ? 3 : 2;
+
+    CpuThreads threads;
+    threads.reserve(m_threads);
+
+    std::vector<hwloc_obj_t> caches;
+    caches.reserve(16);
+
+    findCache(hwloc_get_root_obj(topology), depth, depth, [&caches](hwloc_obj_t found) { caches.emplace_back(found); });
+
+    for (hwloc_obj_t cache : caches) {
+        processTopLevelCache(cache, algorithm, threads);
+    }
+
+    hwloc_topology_destroy(topology);
+
+    return threads;
+}
+
+
+void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads) const
+{
+    size_t PUs = countByType(cache, HWLOC_OBJ_PU);
+    if (PUs == 0) {
+        return;
+    }
+
+    size_t size             = cache->attr->cache.size;
+    const size_t scratchpad = algorithm.memory();
+
+    if (cache->attr->cache.depth == 3 && isCacheExclusive(cache)) {
+        for (size_t i = 0; i < cache->arity; ++i) {
+            hwloc_obj_t l2 = cache->children[i];
+            if (isCacheObject(l2) && l2->attr != nullptr && l2->attr->cache.size >= scratchpad) {
+                size += scratchpad;
+            }
+        }
+    }
+
+    std::vector<hwloc_obj_t> cores;
+    cores.reserve(m_cores);
+    findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
+
+    size_t cacheHashes = (size + (scratchpad / 2)) / scratchpad;
+
+#   ifdef XMRIG_ALGO_CN_GPU
+    if (algorithm == Algorithm::CN_GPU) {
+        cacheHashes = PUs;
+    }
+#   endif
+
+    if (cacheHashes >= PUs) {
+        for (hwloc_obj_t core : cores) {
+            if (core->arity == 0) {
+                continue;
+            }
+
+            for (unsigned i = 0; i < core->arity; ++i) {
+                if (core->children[i]->type == HWLOC_OBJ_PU) {
+                    threads.push_back(CpuThread(1, core->children[i]->os_index));
+                }
+            }
+        }
+
+        return;
+    }
+
+    size_t pu_id = 0;
+    while (cacheHashes > 0 && PUs > 0) {
+        bool allocated_pu = false;
+
+        for (hwloc_obj_t core : cores) {
+            if (core->arity <= pu_id || core->children[pu_id]->type != HWLOC_OBJ_PU) {
+                continue;
+            }
+
+            cacheHashes--;
+            PUs--;
+
+            allocated_pu = true;
+            threads.push_back(CpuThread(1, core->children[pu_id]->os_index));
+
+            if (cacheHashes == 0) {
+                break;
+            }
+        }
+
+        if (!allocated_pu) {
+            break;
+        }
+
+        pu_id++;
+    }
+}
diff --git a/src/backend/cpu/platform/HwlocCpuInfo.h b/src/backend/cpu/platform/HwlocCpuInfo.h
index a67680665..a6cd6b55c 100644
--- a/src/backend/cpu/platform/HwlocCpuInfo.h
+++ b/src/backend/cpu/platform/HwlocCpuInfo.h
@@ -29,6 +29,9 @@
 #include "backend/cpu/platform/BasicCpuInfo.h"
 
 
+typedef struct hwloc_obj *hwloc_obj_t;
+
+
 namespace xmrig {
 
 
@@ -38,6 +41,8 @@ public:
     HwlocCpuInfo();
 
 protected:
+    CpuThreads threads(const Algorithm &algorithm) const override;
+
     inline const char *backend() const override     { return m_backend; }
     inline size_t cores() const override            { return m_cores; }
     inline size_t L2() const override               { return m_cache[2]; }
@@ -46,6 +51,8 @@ protected:
     inline size_t packages() const override         { return m_packages; }
 
 private:
+    void processTopLevelCache(hwloc_obj_t obj, const Algorithm &algorithm, CpuThreads &threads) const;
+
     char m_backend[20];
     size_t m_cache[5];
     size_t m_cores      = 0;