RandomX: added cache QoS support

False by default. If set to true, all non-mining CPU cores will not have access to L3 cache.
This commit is contained in:
SChernykh 2020-07-13 17:23:18 +02:00
parent e5a2689052
commit c83429c55c
10 changed files with 126 additions and 19 deletions

View file

@ -62,6 +62,7 @@ public:
FLAG_SSSE3, FLAG_SSSE3,
FLAG_XOP, FLAG_XOP,
FLAG_POPCNT, FLAG_POPCNT,
FLAG_CAT_L3,
FLAG_MAX FLAG_MAX
}; };
@ -79,6 +80,7 @@ public:
virtual bool hasAVX2() const = 0; virtual bool hasAVX2() const = 0;
virtual bool hasBMI2() const = 0; virtual bool hasBMI2() const = 0;
virtual bool hasOneGbPages() const = 0; virtual bool hasOneGbPages() const = 0;
virtual bool hasCatL3() const = 0;
virtual const char *backend() const = 0; virtual const char *backend() const = 0;
virtual const char *brand() const = 0; virtual const char *brand() const = 0;
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0; virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;

View file

@ -57,7 +57,7 @@
namespace xmrig { namespace xmrig {
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt" }; static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" }; static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
@ -66,7 +66,7 @@ static inline void cpuid(uint32_t level, int32_t output[4])
memset(output, 0, sizeof(int32_t) * 4); memset(output, 0, sizeof(int32_t) * 4);
# ifdef _MSC_VER # ifdef _MSC_VER
__cpuid(output, static_cast<int>(level)); __cpuidex(output, static_cast<int>(level), 0);
# else # else
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]); __cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
# endif # endif
@ -143,6 +143,7 @@ static inline bool has_sse2() { return has_feature(PROCESSOR_INFO,
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); } static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); } static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); } static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
} // namespace xmrig } // namespace xmrig
@ -178,6 +179,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_flags.set(FLAG_SSSE3, has_ssse3()); m_flags.set(FLAG_SSSE3, has_ssse3());
m_flags.set(FLAG_XOP, has_xop()); m_flags.set(FLAG_XOP, has_xop());
m_flags.set(FLAG_POPCNT, has_popcnt()); m_flags.set(FLAG_POPCNT, has_popcnt());
m_flags.set(FLAG_CAT_L3, has_cat_l3());
# ifdef XMRIG_FEATURE_ASM # ifdef XMRIG_FEATURE_ASM
if (hasAES()) { if (hasAES()) {

View file

@ -51,6 +51,7 @@ protected:
inline bool hasAVX2() const override { return has(FLAG_AVX2); } inline bool hasAVX2() const override { return has(FLAG_AVX2); }
inline bool hasBMI2() const override { return has(FLAG_BMI2); } inline bool hasBMI2() const override { return has(FLAG_BMI2); }
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); } inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
inline bool hasCatL3() const override { return has(FLAG_CAT_L3); }
inline const char *brand() const override { return m_brand; } inline const char *brand() const override { return m_brand; }
inline MsrMod msrMod() const override { return m_msrMod; } inline MsrMod msrMod() const override { return m_msrMod; }
inline size_t cores() const override { return 0; } inline size_t cores() const override { return 0; }

View file

@ -20,6 +20,7 @@
"1gb-pages": false, "1gb-pages": false,
"rdmsr": true, "rdmsr": true,
"wrmsr": true, "wrmsr": true,
"cache_qos": false,
"numa": true "numa": true
}, },
"cpu": { "cpu": {

View file

@ -28,6 +28,7 @@
#include "crypto/rx/Rx.h" #include "crypto/rx/Rx.h"
#include "backend/common/Tags.h" #include "backend/common/Tags.h"
#include "backend/cpu/CpuConfig.h" #include "backend/cpu/CpuConfig.h"
#include "backend/cpu/CpuThreads.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/io/log/Tags.h" #include "base/io/log/Tags.h"
#include "crypto/rx/RxConfig.h" #include "crypto/rx/RxConfig.h"
@ -78,7 +79,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp
} }
if (!msrInitialized) { if (!msrInitialized) {
msrInit(config); msrInit(config, cpu.threads().get(job.algorithm()).data());
msrInitialized = true; msrInitialized = true;
} }

View file

@ -30,6 +30,7 @@
#include <cstdint> #include <cstdint>
#include <utility> #include <utility>
#include <vector>
#include "crypto/common/HugePagesInfo.h" #include "crypto/common/HugePagesInfo.h"
@ -41,6 +42,7 @@ namespace xmrig
class Algorithm; class Algorithm;
class CpuConfig; class CpuConfig;
class CpuThread;
class IRxListener; class IRxListener;
class Job; class Job;
class RxConfig; class RxConfig;
@ -62,7 +64,7 @@ public:
# endif # endif
private: private:
static void msrInit(const RxConfig &config); static void msrInit(const RxConfig &config, const std::vector<CpuThread>& threads);
static void msrDestroy(); static void msrDestroy();
static void setupMainLoopExceptionFrame(); static void setupMainLoopExceptionFrame();
}; };

View file

@ -51,6 +51,7 @@ static const char *kMode = "mode";
static const char *kOneGbPages = "1gb-pages"; static const char *kOneGbPages = "1gb-pages";
static const char *kRdmsr = "rdmsr"; static const char *kRdmsr = "rdmsr";
static const char *kWrmsr = "wrmsr"; static const char *kWrmsr = "wrmsr";
static const char *kCacheQoS = "cache_qos";
#ifdef XMRIG_FEATURE_HWLOC #ifdef XMRIG_FEATURE_HWLOC
static const char *kNUMA = "numa"; static const char *kNUMA = "numa";
@ -89,6 +90,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
readMSR(Json::getValue(value, kWrmsr)); readMSR(Json::getValue(value, kWrmsr));
# endif # endif
m_cacheQoS = Json::getBool(value, kCacheQoS, m_cacheQoS);
# ifdef XMRIG_OS_LINUX # ifdef XMRIG_OS_LINUX
m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages); m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages);
# endif # endif
@ -151,6 +154,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
obj.AddMember(StringRef(kWrmsr), false, allocator); obj.AddMember(StringRef(kWrmsr), false, allocator);
# endif # endif
obj.AddMember(StringRef(kCacheQoS), m_cacheQoS, allocator);
# ifdef XMRIG_FEATURE_HWLOC # ifdef XMRIG_FEATURE_HWLOC
if (!m_nodeset.empty()) { if (!m_nodeset.empty()) {
Value numa(kArrayType); Value numa(kArrayType);

View file

@ -65,6 +65,7 @@ public:
inline bool isOneGbPages() const { return m_oneGbPages; } inline bool isOneGbPages() const { return m_oneGbPages; }
inline bool rdmsr() const { return m_rdmsr; } inline bool rdmsr() const { return m_rdmsr; }
inline bool wrmsr() const { return m_wrmsr; } inline bool wrmsr() const { return m_wrmsr; }
inline bool cacheQoS() const { return m_cacheQoS; }
inline Mode mode() const { return m_mode; } inline Mode mode() const { return m_mode; }
# ifdef XMRIG_FEATURE_MSR # ifdef XMRIG_FEATURE_MSR
@ -83,6 +84,8 @@ private:
bool m_wrmsr = false; bool m_wrmsr = false;
# endif # endif
bool m_cacheQoS = false;
Mode readMode(const rapidjson::Value &value) const; Mode readMode(const rapidjson::Value &value) const;
bool m_numa = true; bool m_numa = true;

View file

@ -29,6 +29,7 @@
#include "crypto/rx/Rx.h" #include "crypto/rx/Rx.h"
#include "backend/cpu/Cpu.h" #include "backend/cpu/Cpu.h"
#include "backend/cpu/CpuThread.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/tools/Chrono.h" #include "base/tools/Chrono.h"
#include "crypto/rx/RxConfig.h" #include "crypto/rx/RxConfig.h"
@ -123,14 +124,15 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma
} }
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask) template<typename T>
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask, T&& callback)
{ {
struct dirent **namelist; struct dirent **namelist;
int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0); int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0);
int errors = 0; int errors = 0;
while (dir_entries--) { while (dir_entries--) {
if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) { if (!callback(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
++errors; ++errors;
} }
@ -159,7 +161,7 @@ static bool wrmsr_modprobe()
} }
static bool wrmsr(const MsrItems &preset, bool save) static bool wrmsr(const MsrItems& preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
{ {
if (!wrmsr_modprobe()) { if (!wrmsr_modprobe()) {
return false; return false;
@ -177,12 +179,61 @@ static bool wrmsr(const MsrItems &preset, bool save)
} }
for (const auto &i : preset) { for (const auto &i : preset) {
if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask())) { if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask(), [](uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { return wrmsr_on_cpu(reg, cpu, value, mask); })) {
return false; return false;
} }
} }
return true; const uint32_t n = Cpu::info()->threads();
// Which CPU cores will have access to the full L3 cache
std::vector<bool> cacheEnabled(n, false);
bool cacheQoSDisabled = threads.empty();
for (const CpuThread& t : threads) {
// If some thread has no affinity or wrong affinity, disable cache QoS
if ((t.affinity() < 0) || (t.affinity() >= n)) {
cacheQoSDisabled = true;
if (cache_qos) {
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
}
break;
}
cacheEnabled[t.affinity()] = true;
}
if (cache_qos && !Cpu::info()->hasCatL3()) {
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
cache_qos = false;
}
bool result = true;
if (cache_qos) {
result = wrmsr_on_all_cpus(0xC8F, 0, MsrItem::kNoMask, [&cacheEnabled, cacheQoSDisabled](uint32_t, uint32_t cpu, uint64_t, uint64_t) {
if (cacheQoSDisabled || (cpu >= cacheEnabled.size()) || cacheEnabled[cpu]) {
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
if (!wrmsr_on_cpu(0xC8F, cpu, 0, MsrItem::kNoMask)) {
return false;
}
}
else {
// Disable L3 cache for Class Of Service 1
if (!wrmsr_on_cpu(0xC91, cpu, 0, MsrItem::kNoMask)) {
return false;
}
// Assign Class Of Service 1 to current CPU core
if (!wrmsr_on_cpu(0xC8F, cpu, 1ULL << 32, MsrItem::kNoMask)) {
return false;
}
}
return true;
});
}
return result;
} }
@ -216,7 +267,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
} // namespace xmrig } // namespace xmrig
void xmrig::Rx::msrInit(const RxConfig &config) void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
{ {
const auto &preset = config.msrPreset(); const auto &preset = config.msrPreset();
if (preset.empty()) { if (preset.empty()) {
@ -225,7 +276,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
const uint64_t ts = Chrono::steadyMSecs(); const uint64_t ts = Chrono::steadyMSecs();
if (wrmsr(preset, config.rdmsr())) { if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts); LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
} }
else { else {
@ -242,7 +293,7 @@ void xmrig::Rx::msrDestroy()
const uint64_t ts = Chrono::steadyMSecs(); const uint64_t ts = Chrono::steadyMSecs();
if (!wrmsr(savedState, false)) { if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts); LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
} }
} }

View file

@ -30,6 +30,7 @@
#include "crypto/rx/Rx.h" #include "crypto/rx/Rx.h"
#include "backend/cpu/Cpu.h" #include "backend/cpu/Cpu.h"
#include "backend/cpu/CpuThread.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/kernel/Platform.h" #include "base/kernel/Platform.h"
#include "base/tools/Chrono.h" #include "base/tools/Chrono.h"
@ -256,7 +257,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
} }
static bool wrmsr(const MsrItems &preset, bool save) static bool wrmsr(const MsrItems &preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
{ {
bool success = true; bool success = true;
@ -282,14 +283,52 @@ static bool wrmsr(const MsrItems &preset, bool save)
} }
} }
std::thread wrmsr_thread([driver, &preset, &success]() { const uint32_t n = Cpu::info()->threads();
for (uint32_t i = 0, n = Cpu::info()->threads(); i < n; ++i) {
// Which CPU cores will have access to the full L3 cache
std::vector<bool> cacheEnabled(n, false);
bool cacheQoSDisabled = threads.empty();
for (const CpuThread& t : threads) {
// If some thread has no affinity or wrong affinity, disable cache QoS
if ((t.affinity() < 0) || (t.affinity() >= n)) {
cacheQoSDisabled = true;
if (cache_qos) {
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
}
break;
}
cacheEnabled[t.affinity()] = true;
}
if (cache_qos && !Cpu::info()->hasCatL3()) {
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
cache_qos = false;
}
std::thread wrmsr_thread([n, driver, &preset, &cacheEnabled, cache_qos, cacheQoSDisabled, &success]() {
for (uint32_t i = 0; i < n; ++i) {
if (!Platform::setThreadAffinity(i)) { if (!Platform::setThreadAffinity(i)) {
continue; continue;
} }
for (const auto &i : preset) { for (const auto &i : preset) {
success = wrmsr(driver, i.reg(), i.value(), i.mask()); success &= wrmsr(driver, i.reg(), i.value(), i.mask());
}
if (cache_qos) {
if (cacheQoSDisabled || cacheEnabled[i]) {
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
success &= wrmsr(driver, 0xC8F, 0, MsrItem::kNoMask);
}
else {
// Disable L3 cache for Class Of Service 1
success &= wrmsr(driver, 0xC91, 0, MsrItem::kNoMask);
// Assign Class Of Service 1 to current CPU core
success &= wrmsr(driver, 0xC8F, 1ULL << 32, MsrItem::kNoMask);
}
} }
if (!success) { if (!success) {
@ -349,7 +388,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
} // namespace xmrig } // namespace xmrig
void xmrig::Rx::msrInit(const RxConfig &config) void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
{ {
const auto &preset = config.msrPreset(); const auto &preset = config.msrPreset();
if (preset.empty()) { if (preset.empty()) {
@ -358,7 +397,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
const uint64_t ts = Chrono::steadyMSecs(); const uint64_t ts = Chrono::steadyMSecs();
if (wrmsr(preset, config.rdmsr())) { if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts); LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
} }
else { else {
@ -375,7 +414,7 @@ void xmrig::Rx::msrDestroy()
const uint64_t ts = Chrono::steadyMSecs(); const uint64_t ts = Chrono::steadyMSecs();
if (!wrmsr(savedState, false)) { if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts); LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
} }
} }