RandomX: added cache QoS support

False by default. If set to true, all non-mining CPU cores will not have access to L3 cache.
2025-04-22 06:28:09 +00:00 · 2020-07-13 17:23:18 +02:00 · 2020-07-13 17:23:18 +02:00 · c83429c55c
commit c83429c55c
parent e5a2689052
10 changed files with 126 additions and 19 deletions
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@ -62,6 +62,7 @@ public:
        FLAG_SSSE3,
        FLAG_XOP,
        FLAG_POPCNT,
+        FLAG_CAT_L3,
        FLAG_MAX
    };

@ -79,6 +80,7 @@ public:
    virtual bool hasAVX2() const                                                    = 0;
    virtual bool hasBMI2() const                                                    = 0;
    virtual bool hasOneGbPages() const                                              = 0;
+    virtual bool hasCatL3() const                                                   = 0;
    virtual const char *backend() const                                             = 0;
    virtual const char *brand() const                                               = 0;
    virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const    = 0;
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@ -57,7 +57,7 @@
 namespace xmrig {


-static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames     = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt" };
+static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames     = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
 static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames   = { "none", "ryzen", "intel", "custom" };


@ -66,7 +66,7 @@ static inline void cpuid(uint32_t level, int32_t output[4])
    memset(output, 0, sizeof(int32_t) * 4);

 #   ifdef _MSC_VER
-    __cpuid(output, static_cast<int>(level));
+    __cpuidex(output, static_cast<int>(level), 0);
 #   else
    __cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
 #   endif
@ -143,6 +143,7 @@ static inline bool has_sse2()       { return has_feature(PROCESSOR_INFO,
 static inline bool has_ssse3()      { return has_feature(PROCESSOR_INFO,        ECX_Reg, 1 << 9); }
 static inline bool has_xop()        { return has_feature(0x80000001,            ECX_Reg, 1 << 11); }
 static inline bool has_popcnt()     { return has_feature(PROCESSOR_INFO,        ECX_Reg, 1 << 23); }
+static inline bool has_cat_l3()     { return has_feature(EXTENDED_FEATURES,     EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }


 } // namespace xmrig
@ -178,6 +179,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
    m_flags.set(FLAG_SSSE3,   has_ssse3());
    m_flags.set(FLAG_XOP,     has_xop());
    m_flags.set(FLAG_POPCNT,  has_popcnt());
+    m_flags.set(FLAG_CAT_L3,  has_cat_l3());

 #   ifdef XMRIG_FEATURE_ASM
    if (hasAES()) {
--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@ -51,6 +51,7 @@ protected:
    inline bool hasAVX2() const override            { return has(FLAG_AVX2); }
    inline bool hasBMI2() const override            { return has(FLAG_BMI2); }
    inline bool hasOneGbPages() const override      { return has(FLAG_PDPE1GB); }
+    inline bool hasCatL3() const override           { return has(FLAG_CAT_L3); }
    inline const char *brand() const override       { return m_brand; }
    inline MsrMod msrMod() const override           { return m_msrMod; }
    inline size_t cores() const override            { return 0; }
--- a/src/config.json
+++ b/src/config.json
@ -20,6 +20,7 @@
        "1gb-pages": false,
        "rdmsr": true,
        "wrmsr": true,
+        "cache_qos": false,
        "numa": true
    },
    "cpu": {
--- a/src/crypto/rx/Rx.cpp
+++ b/src/crypto/rx/Rx.cpp
@ -28,6 +28,7 @@
 #include "crypto/rx/Rx.h"
 #include "backend/common/Tags.h"
 #include "backend/cpu/CpuConfig.h"
+#include "backend/cpu/CpuThreads.h"
 #include "base/io/log/Log.h"
 #include "base/io/log/Tags.h"
 #include "crypto/rx/RxConfig.h"
@ -78,7 +79,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp
    }

    if (!msrInitialized) {
-        msrInit(config);
+        msrInit(config, cpu.threads().get(job.algorithm()).data());
        msrInitialized = true;
    }

--- a/src/crypto/rx/Rx.h
+++ b/src/crypto/rx/Rx.h
@ -30,6 +30,7 @@

 #include <cstdint>
 #include <utility>
+#include <vector>


 #include "crypto/common/HugePagesInfo.h"
@ -41,6 +42,7 @@ namespace xmrig

 class Algorithm;
 class CpuConfig;
+class CpuThread;
 class IRxListener;
 class Job;
 class RxConfig;
@ -62,7 +64,7 @@ public:
 #   endif

 private:
-    static void msrInit(const RxConfig &config);
+    static void msrInit(const RxConfig &config, const std::vector<CpuThread>& threads);
    static void msrDestroy();
    static void setupMainLoopExceptionFrame();
 };
--- a/src/crypto/rx/RxConfig.cpp
+++ b/src/crypto/rx/RxConfig.cpp
@ -51,6 +51,7 @@ static const char *kMode        = "mode";
 static const char *kOneGbPages  = "1gb-pages";
 static const char *kRdmsr       = "rdmsr";
 static const char *kWrmsr       = "wrmsr";
+static const char *kCacheQoS    = "cache_qos";

 #ifdef XMRIG_FEATURE_HWLOC
 static const char *kNUMA        = "numa";
@ -89,6 +90,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
        readMSR(Json::getValue(value, kWrmsr));
 #       endif

+        m_cacheQoS = Json::getBool(value, kCacheQoS, m_cacheQoS);
+
 #       ifdef XMRIG_OS_LINUX
        m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages);
 #       endif
@ -151,6 +154,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
    obj.AddMember(StringRef(kWrmsr), false, allocator);
 #   endif

+    obj.AddMember(StringRef(kCacheQoS), m_cacheQoS, allocator);
+
 #   ifdef XMRIG_FEATURE_HWLOC
    if (!m_nodeset.empty()) {
        Value numa(kArrayType);
--- a/src/crypto/rx/RxConfig.h
+++ b/src/crypto/rx/RxConfig.h
@ -65,6 +65,7 @@ public:
    inline bool isOneGbPages() const    { return m_oneGbPages; }
    inline bool rdmsr() const           { return m_rdmsr; }
    inline bool wrmsr() const           { return m_wrmsr; }
+    inline bool cacheQoS() const        { return m_cacheQoS; }
    inline Mode mode() const            { return m_mode; }

 #   ifdef XMRIG_FEATURE_MSR
@ -83,6 +84,8 @@ private:
    bool m_wrmsr = false;
 #   endif

+    bool m_cacheQoS = false;
+
    Mode readMode(const rapidjson::Value &value) const;

    bool m_numa         = true;
--- a/src/crypto/rx/Rx_linux.cpp
+++ b/src/crypto/rx/Rx_linux.cpp
@ -29,6 +29,7 @@

 #include "crypto/rx/Rx.h"
 #include "backend/cpu/Cpu.h"
+#include "backend/cpu/CpuThread.h"
 #include "base/io/log/Log.h"
 #include "base/tools/Chrono.h"
 #include "crypto/rx/RxConfig.h"
@ -123,14 +124,15 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma
 }


-static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask)
+template<typename T>
+static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask, T&& callback)
 {
    struct dirent **namelist;
    int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0);
    int errors      = 0;

    while (dir_entries--) {
-        if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
+        if (!callback(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
            ++errors;
        }

@ -159,7 +161,7 @@ static bool wrmsr_modprobe()
 }


-static bool wrmsr(const MsrItems &preset, bool save)
+static bool wrmsr(const MsrItems& preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
 {
    if (!wrmsr_modprobe()) {
        return false;
@ -177,12 +179,61 @@ static bool wrmsr(const MsrItems &preset, bool save)
    }

    for (const auto &i : preset) {
-        if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask())) {
+        if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask(), [](uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { return wrmsr_on_cpu(reg, cpu, value, mask); })) {
            return false;
        }
    }

-    return true;
+    const uint32_t n = Cpu::info()->threads();
+
+    // Which CPU cores will have access to the full L3 cache
+    std::vector<bool> cacheEnabled(n, false);
+    bool cacheQoSDisabled = threads.empty();
+
+    for (const CpuThread& t : threads) {
+        // If some thread has no affinity or wrong affinity, disable cache QoS
+        if ((t.affinity() < 0) || (t.affinity() >= n)) {
+            cacheQoSDisabled = true;
+            if (cache_qos) {
+                LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
+            }
+            break;
+        }
+
+        cacheEnabled[t.affinity()] = true;
+    }
+
+    if (cache_qos && !Cpu::info()->hasCatL3()) {
+        LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
+        cache_qos = false;
+    }
+
+    bool result = true;
+
+    if (cache_qos) {
+        result = wrmsr_on_all_cpus(0xC8F, 0, MsrItem::kNoMask, [&cacheEnabled, cacheQoSDisabled](uint32_t, uint32_t cpu, uint64_t, uint64_t) {
+            if (cacheQoSDisabled || (cpu >= cacheEnabled.size()) || cacheEnabled[cpu]) {
+                // Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
+                if (!wrmsr_on_cpu(0xC8F, cpu, 0, MsrItem::kNoMask)) {
+                    return false;
+                }
+            }
+            else {
+                // Disable L3 cache for Class Of Service 1
+                if (!wrmsr_on_cpu(0xC91, cpu, 0, MsrItem::kNoMask)) {
+                    return false;
+                }
+
+                // Assign Class Of Service 1 to current CPU core
+                if (!wrmsr_on_cpu(0xC8F, cpu, 1ULL << 32, MsrItem::kNoMask)) {
+                    return false;
+                }
+            }
+            return true;
+        });
+    }
+
+    return result;
 }


@ -216,7 +267,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
 } // namespace xmrig


-void xmrig::Rx::msrInit(const RxConfig &config)
+void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
 {
    const auto &preset = config.msrPreset();
    if (preset.empty()) {
@ -225,7 +276,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)

    const uint64_t ts = Chrono::steadyMSecs();

-    if (wrmsr(preset, config.rdmsr())) {
+    if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
        LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
    }
    else {
@ -242,7 +293,7 @@ void xmrig::Rx::msrDestroy()

    const uint64_t ts = Chrono::steadyMSecs();

-    if (!wrmsr(savedState, false)) {
+    if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
        LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
    }
 }
--- a/src/crypto/rx/Rx_win.cpp
+++ b/src/crypto/rx/Rx_win.cpp
@ -30,6 +30,7 @@

 #include "crypto/rx/Rx.h"
 #include "backend/cpu/Cpu.h"
+#include "backend/cpu/CpuThread.h"
 #include "base/io/log/Log.h"
 #include "base/kernel/Platform.h"
 #include "base/tools/Chrono.h"
@ -256,7 +257,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
 }


-static bool wrmsr(const MsrItems &preset, bool save)
+static bool wrmsr(const MsrItems &preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
 {
    bool success = true;

@ -282,14 +283,52 @@ static bool wrmsr(const MsrItems &preset, bool save)
        }
    }

-    std::thread wrmsr_thread([driver, &preset, &success]() {
-        for (uint32_t i = 0, n = Cpu::info()->threads(); i < n; ++i) {
+    const uint32_t n = Cpu::info()->threads();
+
+    // Which CPU cores will have access to the full L3 cache
+    std::vector<bool> cacheEnabled(n, false);
+    bool cacheQoSDisabled = threads.empty();
+
+    for (const CpuThread& t : threads) {
+        // If some thread has no affinity or wrong affinity, disable cache QoS
+        if ((t.affinity() < 0) || (t.affinity() >= n)) {
+            cacheQoSDisabled = true;
+            if (cache_qos) {
+                LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
+            }
+            break;
+        }
+
+        cacheEnabled[t.affinity()] = true;
+    }
+
+    if (cache_qos && !Cpu::info()->hasCatL3()) {
+        LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
+        cache_qos = false;
+    }
+
+    std::thread wrmsr_thread([n, driver, &preset, &cacheEnabled, cache_qos, cacheQoSDisabled, &success]() {
+        for (uint32_t i = 0; i < n; ++i) {
            if (!Platform::setThreadAffinity(i)) {
                continue;
            }

            for (const auto &i : preset) {
-                success = wrmsr(driver, i.reg(), i.value(), i.mask());
+                success &= wrmsr(driver, i.reg(), i.value(), i.mask());
+            }
+
+            if (cache_qos) {
+                if (cacheQoSDisabled || cacheEnabled[i]) {
+                    // Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
+                    success &= wrmsr(driver, 0xC8F, 0, MsrItem::kNoMask);
+                }
+                else {
+                    // Disable L3 cache for Class Of Service 1
+                    success &= wrmsr(driver, 0xC91, 0, MsrItem::kNoMask);
+
+                    // Assign Class Of Service 1 to current CPU core
+                    success &= wrmsr(driver, 0xC8F, 1ULL << 32, MsrItem::kNoMask);
+                }
            }

            if (!success) {
@ -349,7 +388,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
 } // namespace xmrig


-void xmrig::Rx::msrInit(const RxConfig &config)
+void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
 {
    const auto &preset = config.msrPreset();
    if (preset.empty()) {
@ -358,7 +397,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)

    const uint64_t ts = Chrono::steadyMSecs();

-    if (wrmsr(preset, config.rdmsr())) {
+    if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
        LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
    }
    else {
@ -375,7 +414,7 @@ void xmrig::Rx::msrDestroy()

    const uint64_t ts = Chrono::steadyMSecs();

-    if (!wrmsr(savedState, false)) {
+    if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
        LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
    }
 }