From c83429c55ccaf481c927df14fb798c3341b69765 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 13 Jul 2020 17:23:18 +0200 Subject: [PATCH] RandomX: added cache QoS support False by default. If set to true, all non-mining CPU cores will not have access to L3 cache. --- src/backend/cpu/interfaces/ICpuInfo.h | 2 + src/backend/cpu/platform/BasicCpuInfo.cpp | 6 +- src/backend/cpu/platform/BasicCpuInfo.h | 1 + src/config.json | 1 + src/crypto/rx/Rx.cpp | 3 +- src/crypto/rx/Rx.h | 4 +- src/crypto/rx/RxConfig.cpp | 5 ++ src/crypto/rx/RxConfig.h | 3 + src/crypto/rx/Rx_linux.cpp | 67 ++++++++++++++++++++--- src/crypto/rx/Rx_win.cpp | 53 +++++++++++++++--- 10 files changed, 126 insertions(+), 19 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index bc73e75aa..c33d26daa 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -62,6 +62,7 @@ public: FLAG_SSSE3, FLAG_XOP, FLAG_POPCNT, + FLAG_CAT_L3, FLAG_MAX }; @@ -79,6 +80,7 @@ public: virtual bool hasAVX2() const = 0; virtual bool hasBMI2() const = 0; virtual bool hasOneGbPages() const = 0; + virtual bool hasCatL3() const = 0; virtual const char *backend() const = 0; virtual const char *brand() const = 0; virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index b7a867ea4..ca022b2ea 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -57,7 +57,7 @@ namespace xmrig { -static const std::array flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt" }; +static const std::array flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" }; static const std::array msrNames = { "none", "ryzen", "intel", "custom" }; @@ -66,7 +66,7 @@ static inline void cpuid(uint32_t level, int32_t output[4]) memset(output, 0, sizeof(int32_t) * 4); # ifdef _MSC_VER - __cpuid(output, static_cast(level)); + __cpuidex(output, static_cast(level), 0); # else __cpuid_count(level, 0, output[0], output[1], output[2], output[3]); # endif @@ -143,6 +143,7 @@ static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); } static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); } static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); } +static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); } } // namespace xmrig @@ -178,6 +179,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_flags.set(FLAG_SSSE3, has_ssse3()); m_flags.set(FLAG_XOP, has_xop()); m_flags.set(FLAG_POPCNT, has_popcnt()); + m_flags.set(FLAG_CAT_L3, has_cat_l3()); # ifdef XMRIG_FEATURE_ASM if (hasAES()) { diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index ea478dbf1..05e5f442a 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -51,6 +51,7 @@ protected: inline bool hasAVX2() const override { return has(FLAG_AVX2); } inline bool hasBMI2() const override { return has(FLAG_BMI2); } inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); } + inline bool hasCatL3() const override { return has(FLAG_CAT_L3); } inline const char *brand() const override { return m_brand; } inline MsrMod msrMod() const override { return m_msrMod; } inline size_t cores() const override { return 0; } diff --git a/src/config.json b/src/config.json index 2ab891615..2df01a5b6 100644 --- a/src/config.json +++ b/src/config.json @@ -20,6 +20,7 @@ "1gb-pages": false, "rdmsr": true, "wrmsr": true, + "cache_qos": false, "numa": true }, "cpu": { diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 129ae875f..ad4258af4 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -28,6 +28,7 @@ #include "crypto/rx/Rx.h" #include "backend/common/Tags.h" #include "backend/cpu/CpuConfig.h" +#include "backend/cpu/CpuThreads.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" #include "crypto/rx/RxConfig.h" @@ -78,7 +79,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp } if (!msrInitialized) { - msrInit(config); + msrInit(config, cpu.threads().get(job.algorithm()).data()); msrInitialized = true; } diff --git a/src/crypto/rx/Rx.h b/src/crypto/rx/Rx.h index 75ba85e17..43be29cc2 100644 --- a/src/crypto/rx/Rx.h +++ b/src/crypto/rx/Rx.h @@ -30,6 +30,7 @@ #include #include +#include #include "crypto/common/HugePagesInfo.h" @@ -41,6 +42,7 @@ namespace xmrig class Algorithm; class CpuConfig; +class CpuThread; class IRxListener; class Job; class RxConfig; @@ -62,7 +64,7 @@ public: # endif private: - static void msrInit(const RxConfig &config); + static void msrInit(const RxConfig &config, const std::vector& threads); static void msrDestroy(); static void setupMainLoopExceptionFrame(); }; diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index 82336708b..f1cd09e97 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -51,6 +51,7 @@ static const char *kMode = "mode"; static const char *kOneGbPages = "1gb-pages"; static const char *kRdmsr = "rdmsr"; static const char *kWrmsr = "wrmsr"; +static const char *kCacheQoS = "cache_qos"; #ifdef XMRIG_FEATURE_HWLOC static const char *kNUMA = "numa"; @@ -89,6 +90,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value) readMSR(Json::getValue(value, kWrmsr)); # endif + m_cacheQoS = Json::getBool(value, kCacheQoS, m_cacheQoS); + # ifdef XMRIG_OS_LINUX m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); # endif @@ -151,6 +154,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kWrmsr), false, allocator); # endif + obj.AddMember(StringRef(kCacheQoS), m_cacheQoS, allocator); + # ifdef XMRIG_FEATURE_HWLOC if (!m_nodeset.empty()) { Value numa(kArrayType); diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index af2ac9df8..e3e06326e 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -65,6 +65,7 @@ public: inline bool isOneGbPages() const { return m_oneGbPages; } inline bool rdmsr() const { return m_rdmsr; } inline bool wrmsr() const { return m_wrmsr; } + inline bool cacheQoS() const { return m_cacheQoS; } inline Mode mode() const { return m_mode; } # ifdef XMRIG_FEATURE_MSR @@ -83,6 +84,8 @@ private: bool m_wrmsr = false; # endif + bool m_cacheQoS = false; + Mode readMode(const rapidjson::Value &value) const; bool m_numa = true; diff --git a/src/crypto/rx/Rx_linux.cpp b/src/crypto/rx/Rx_linux.cpp index c919109a2..61f745848 100644 --- a/src/crypto/rx/Rx_linux.cpp +++ b/src/crypto/rx/Rx_linux.cpp @@ -29,6 +29,7 @@ #include "crypto/rx/Rx.h" #include "backend/cpu/Cpu.h" +#include "backend/cpu/CpuThread.h" #include "base/io/log/Log.h" #include "base/tools/Chrono.h" #include "crypto/rx/RxConfig.h" @@ -123,14 +124,15 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma } -static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask) +template +static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask, T&& callback) { struct dirent **namelist; int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0); int errors = 0; while (dir_entries--) { - if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) { + if (!callback(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) { ++errors; } @@ -159,7 +161,7 @@ static bool wrmsr_modprobe() } -static bool wrmsr(const MsrItems &preset, bool save) +static bool wrmsr(const MsrItems& preset, const std::vector& threads, bool cache_qos, bool save) { if (!wrmsr_modprobe()) { return false; @@ -177,12 +179,61 @@ static bool wrmsr(const MsrItems &preset, bool save) } for (const auto &i : preset) { - if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask())) { + if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask(), [](uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { return wrmsr_on_cpu(reg, cpu, value, mask); })) { return false; } } - return true; + const uint32_t n = Cpu::info()->threads(); + + // Which CPU cores will have access to the full L3 cache + std::vector cacheEnabled(n, false); + bool cacheQoSDisabled = threads.empty(); + + for (const CpuThread& t : threads) { + // If some thread has no affinity or wrong affinity, disable cache QoS + if ((t.affinity() < 0) || (t.affinity() >= n)) { + cacheQoSDisabled = true; + if (cache_qos) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag); + } + break; + } + + cacheEnabled[t.affinity()] = true; + } + + if (cache_qos && !Cpu::info()->hasCatL3()) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag); + cache_qos = false; + } + + bool result = true; + + if (cache_qos) { + result = wrmsr_on_all_cpus(0xC8F, 0, MsrItem::kNoMask, [&cacheEnabled, cacheQoSDisabled](uint32_t, uint32_t cpu, uint64_t, uint64_t) { + if (cacheQoSDisabled || (cpu >= cacheEnabled.size()) || cacheEnabled[cpu]) { + // Assign Class Of Service 0 to current CPU core (default, full L3 cache available) + if (!wrmsr_on_cpu(0xC8F, cpu, 0, MsrItem::kNoMask)) { + return false; + } + } + else { + // Disable L3 cache for Class Of Service 1 + if (!wrmsr_on_cpu(0xC91, cpu, 0, MsrItem::kNoMask)) { + return false; + } + + // Assign Class Of Service 1 to current CPU core + if (!wrmsr_on_cpu(0xC8F, cpu, 1ULL << 32, MsrItem::kNoMask)) { + return false; + } + } + return true; + }); + } + + return result; } @@ -216,7 +267,7 @@ void Rx::setMainLoopBounds(const std::pair& bounds) } // namespace xmrig -void xmrig::Rx::msrInit(const RxConfig &config) +void xmrig::Rx::msrInit(const RxConfig &config, const std::vector& threads) { const auto &preset = config.msrPreset(); if (preset.empty()) { @@ -225,7 +276,7 @@ void xmrig::Rx::msrInit(const RxConfig &config) const uint64_t ts = Chrono::steadyMSecs(); - if (wrmsr(preset, config.rdmsr())) { + if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) { LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts); } else { @@ -242,7 +293,7 @@ void xmrig::Rx::msrDestroy() const uint64_t ts = Chrono::steadyMSecs(); - if (!wrmsr(savedState, false)) { + if (!wrmsr(savedState, std::vector(), true, false)) { LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts); } } diff --git a/src/crypto/rx/Rx_win.cpp b/src/crypto/rx/Rx_win.cpp index dc8a36fb2..bac3d5166 100644 --- a/src/crypto/rx/Rx_win.cpp +++ b/src/crypto/rx/Rx_win.cpp @@ -30,6 +30,7 @@ #include "crypto/rx/Rx.h" #include "backend/cpu/Cpu.h" +#include "backend/cpu/CpuThread.h" #include "base/io/log/Log.h" #include "base/kernel/Platform.h" #include "base/tools/Chrono.h" @@ -256,7 +257,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask) } -static bool wrmsr(const MsrItems &preset, bool save) +static bool wrmsr(const MsrItems &preset, const std::vector& threads, bool cache_qos, bool save) { bool success = true; @@ -282,14 +283,52 @@ static bool wrmsr(const MsrItems &preset, bool save) } } - std::thread wrmsr_thread([driver, &preset, &success]() { - for (uint32_t i = 0, n = Cpu::info()->threads(); i < n; ++i) { + const uint32_t n = Cpu::info()->threads(); + + // Which CPU cores will have access to the full L3 cache + std::vector cacheEnabled(n, false); + bool cacheQoSDisabled = threads.empty(); + + for (const CpuThread& t : threads) { + // If some thread has no affinity or wrong affinity, disable cache QoS + if ((t.affinity() < 0) || (t.affinity() >= n)) { + cacheQoSDisabled = true; + if (cache_qos) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag); + } + break; + } + + cacheEnabled[t.affinity()] = true; + } + + if (cache_qos && !Cpu::info()->hasCatL3()) { + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag); + cache_qos = false; + } + + std::thread wrmsr_thread([n, driver, &preset, &cacheEnabled, cache_qos, cacheQoSDisabled, &success]() { + for (uint32_t i = 0; i < n; ++i) { if (!Platform::setThreadAffinity(i)) { continue; } for (const auto &i : preset) { - success = wrmsr(driver, i.reg(), i.value(), i.mask()); + success &= wrmsr(driver, i.reg(), i.value(), i.mask()); + } + + if (cache_qos) { + if (cacheQoSDisabled || cacheEnabled[i]) { + // Assign Class Of Service 0 to current CPU core (default, full L3 cache available) + success &= wrmsr(driver, 0xC8F, 0, MsrItem::kNoMask); + } + else { + // Disable L3 cache for Class Of Service 1 + success &= wrmsr(driver, 0xC91, 0, MsrItem::kNoMask); + + // Assign Class Of Service 1 to current CPU core + success &= wrmsr(driver, 0xC8F, 1ULL << 32, MsrItem::kNoMask); + } } if (!success) { @@ -349,7 +388,7 @@ void Rx::setMainLoopBounds(const std::pair& bounds) } // namespace xmrig -void xmrig::Rx::msrInit(const RxConfig &config) +void xmrig::Rx::msrInit(const RxConfig &config, const std::vector& threads) { const auto &preset = config.msrPreset(); if (preset.empty()) { @@ -358,7 +397,7 @@ void xmrig::Rx::msrInit(const RxConfig &config) const uint64_t ts = Chrono::steadyMSecs(); - if (wrmsr(preset, config.rdmsr())) { + if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) { LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts); } else { @@ -375,7 +414,7 @@ void xmrig::Rx::msrDestroy() const uint64_t ts = Chrono::steadyMSecs(); - if (!wrmsr(savedState, false)) { + if (!wrmsr(savedState, std::vector(), true, false)) { LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts); } }