mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 20:19:23 +00:00
Merge branch 'dev'
This commit is contained in:
commit
028d6503aa
25 changed files with 5019 additions and 2258 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
# v6.3.0
|
||||||
|
- [#1771](https://github.com/xmrig/xmrig/pull/1771) Adopted new SSE2NEON and reduced ARM-specific changes.
|
||||||
|
- [#1774](https://github.com/xmrig/xmrig/pull/1774) RandomX: Added new option `cache_qos` in `randomx` object for cache QoS support.
|
||||||
|
- [#1777](https://github.com/xmrig/xmrig/pull/1777) Added support for upcoming Haven offshore fork.
|
||||||
|
- [#1780](https://github.com/xmrig/xmrig/pull/1780) CryptoNight OpenCL: fix for long input data.
|
||||||
|
|
||||||
# v6.2.3
|
# v6.2.3
|
||||||
- [#1745](https://github.com/xmrig/xmrig/pull/1745) AstroBWT: fixed OpenCL compilation on some systems.
|
- [#1745](https://github.com/xmrig/xmrig/pull/1745) AstroBWT: fixed OpenCL compilation on some systems.
|
||||||
- [#1749](https://github.com/xmrig/xmrig/pull/1749) KawPow: optimized CPU share verification.
|
- [#1749](https://github.com/xmrig/xmrig/pull/1749) KawPow: optimized CPU share verification.
|
||||||
|
|
|
@ -62,6 +62,7 @@ public:
|
||||||
FLAG_SSSE3,
|
FLAG_SSSE3,
|
||||||
FLAG_XOP,
|
FLAG_XOP,
|
||||||
FLAG_POPCNT,
|
FLAG_POPCNT,
|
||||||
|
FLAG_CAT_L3,
|
||||||
FLAG_MAX
|
FLAG_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,6 +80,7 @@ public:
|
||||||
virtual bool hasAVX2() const = 0;
|
virtual bool hasAVX2() const = 0;
|
||||||
virtual bool hasBMI2() const = 0;
|
virtual bool hasBMI2() const = 0;
|
||||||
virtual bool hasOneGbPages() const = 0;
|
virtual bool hasOneGbPages() const = 0;
|
||||||
|
virtual bool hasCatL3() const = 0;
|
||||||
virtual const char *backend() const = 0;
|
virtual const char *backend() const = 0;
|
||||||
virtual const char *brand() const = 0;
|
virtual const char *brand() const = 0;
|
||||||
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;
|
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;
|
||||||
|
|
|
@ -57,7 +57,7 @@
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt" };
|
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
|
||||||
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ static inline void cpuid(uint32_t level, int32_t output[4])
|
||||||
memset(output, 0, sizeof(int32_t) * 4);
|
memset(output, 0, sizeof(int32_t) * 4);
|
||||||
|
|
||||||
# ifdef _MSC_VER
|
# ifdef _MSC_VER
|
||||||
__cpuid(output, static_cast<int>(level));
|
__cpuidex(output, static_cast<int>(level), 0);
|
||||||
# else
|
# else
|
||||||
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
||||||
# endif
|
# endif
|
||||||
|
@ -143,6 +143,7 @@ static inline bool has_sse2() { return has_feature(PROCESSOR_INFO,
|
||||||
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
||||||
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
||||||
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
||||||
|
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
|
||||||
|
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
@ -178,6 +179,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
m_flags.set(FLAG_SSSE3, has_ssse3());
|
m_flags.set(FLAG_SSSE3, has_ssse3());
|
||||||
m_flags.set(FLAG_XOP, has_xop());
|
m_flags.set(FLAG_XOP, has_xop());
|
||||||
m_flags.set(FLAG_POPCNT, has_popcnt());
|
m_flags.set(FLAG_POPCNT, has_popcnt());
|
||||||
|
m_flags.set(FLAG_CAT_L3, has_cat_l3());
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_ASM
|
# ifdef XMRIG_FEATURE_ASM
|
||||||
if (hasAES()) {
|
if (hasAES()) {
|
||||||
|
|
|
@ -51,6 +51,7 @@ protected:
|
||||||
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
|
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
|
||||||
inline bool hasBMI2() const override { return has(FLAG_BMI2); }
|
inline bool hasBMI2() const override { return has(FLAG_BMI2); }
|
||||||
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
|
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
|
||||||
|
inline bool hasCatL3() const override { return has(FLAG_CAT_L3); }
|
||||||
inline const char *brand() const override { return m_brand; }
|
inline const char *brand() const override { return m_brand; }
|
||||||
inline MsrMod msrMod() const override { return m_msrMod; }
|
inline MsrMod msrMod() const override { return m_msrMod; }
|
||||||
inline size_t cores() const override { return 0; }
|
inline size_t cores() const override { return 0; }
|
||||||
|
|
|
@ -71,7 +71,7 @@ inline ulong getIdx()
|
||||||
|
|
||||||
|
|
||||||
__attribute__((reqd_work_group_size(8, 8, 1)))
|
__attribute__((reqd_work_group_size(8, 8, 1)))
|
||||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
__kernel void cn0(__global ulong *input, int inlen, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||||
{
|
{
|
||||||
uint ExpandedKey1[40];
|
uint ExpandedKey1[40];
|
||||||
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
||||||
|
@ -109,34 +109,25 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
||||||
if (get_local_id(1) == 0) {
|
if (get_local_id(1) == 0) {
|
||||||
__local ulong* State = State_buf + get_local_id(0) * 25;
|
__local ulong* State = State_buf + get_local_id(0) * 25;
|
||||||
|
|
||||||
((__local ulong8 *)State)[0] = vload8(0, input);
|
#pragma unroll
|
||||||
State[8] = input[8];
|
for (int i = 0; i < 25; ++i) {
|
||||||
State[9] = input[9];
|
State[i] = 0;
|
||||||
State[10] = input[10];
|
}
|
||||||
State[11] = input[11];
|
|
||||||
State[12] = input[12];
|
|
||||||
State[13] = input[13];
|
|
||||||
State[14] = input[14];
|
|
||||||
State[15] = input[15];
|
|
||||||
|
|
||||||
|
// Input length must be a multiple of 136 and padded on the host side
|
||||||
|
for (int i = 0; inlen > 0; i += 17, inlen -= 136) {
|
||||||
|
#pragma unroll
|
||||||
|
for (int j = 0; j < 17; ++j) {
|
||||||
|
State[j] ^= input[i + j];
|
||||||
|
}
|
||||||
|
if (i == 0) {
|
||||||
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
||||||
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
||||||
((__local uint *)State)[10] &= 0xFF000000U;
|
((__local uint *)State)[10] &= 0xFF000000U;
|
||||||
/* explicit cast to `uint` is required because some OpenCL implementations (e.g. NVIDIA)
|
|
||||||
* handle get_global_id and get_global_offset as signed long long int and add
|
|
||||||
* 0xFFFFFFFF... to `get_global_id` if we set on host side a 32bit offset where the first bit is `1`
|
|
||||||
* (even if it is correct casted to unsigned on the host)
|
|
||||||
*/
|
|
||||||
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
||||||
|
|
||||||
// Last bit of padding
|
|
||||||
State[16] = 0x8000000000000000UL;
|
|
||||||
|
|
||||||
for (int i = 17; i < 25; ++i) {
|
|
||||||
State[i] = 0x00UL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
keccakf1600_2(State);
|
keccakf1600_2(State);
|
||||||
|
}
|
||||||
|
|
||||||
#pragma unroll 1
|
#pragma unroll 1
|
||||||
for (int i = 0; i < 25; ++i) {
|
for (int i = 0; i < 25; ++i) {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -38,10 +38,11 @@ void xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t th
|
||||||
|
|
||||||
|
|
||||||
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||||
void xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
void xmrig::Cn0Kernel::setArgs(cl_mem input, int inlen, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
||||||
{
|
{
|
||||||
setArg(0, sizeof(cl_mem), &input);
|
setArg(0, sizeof(cl_mem), &input);
|
||||||
setArg(1, sizeof(cl_mem), &scratchpads);
|
setArg(1, sizeof(int), &inlen);
|
||||||
setArg(2, sizeof(cl_mem), &states);
|
setArg(2, sizeof(cl_mem), &scratchpads);
|
||||||
setArg(3, sizeof(uint32_t), &threads);
|
setArg(3, sizeof(cl_mem), &states);
|
||||||
|
setArg(4, sizeof(uint32_t), &threads);
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ public:
|
||||||
inline Cn0Kernel(cl_program program) : OclKernel(program, "cn0") {}
|
inline Cn0Kernel(cl_program program) : OclKernel(program, "cn0") {}
|
||||||
|
|
||||||
void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
||||||
void setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
void setArgs(cl_mem input, int inlen, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -122,10 +122,16 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
||||||
throw std::length_error("job size too big");
|
throw std::length_error("job size too big");
|
||||||
}
|
}
|
||||||
|
|
||||||
blob[job.size()] = 0x01;
|
const int inlen = static_cast<int>(job.size() + 136 - (job.size() % 136));
|
||||||
memset(blob + job.size() + 1, 0, Job::kMaxBlobSize - job.size() - 1);
|
|
||||||
|
|
||||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
blob[job.size()] = 0x01;
|
||||||
|
memset(blob + job.size() + 1, 0, inlen - job.size() - 1);
|
||||||
|
|
||||||
|
blob[inlen - 1] |= 0x80;
|
||||||
|
|
||||||
|
enqueueWriteBuffer(m_input, CL_TRUE, 0, inlen, blob);
|
||||||
|
|
||||||
|
m_cn0->setArg(1, sizeof(int), &inlen);
|
||||||
|
|
||||||
if (m_algorithm == Algorithm::CN_R && m_height != job.height()) {
|
if (m_algorithm == Algorithm::CN_R && m_height != job.height()) {
|
||||||
delete m_cn1;
|
delete m_cn1;
|
||||||
|
@ -152,7 +158,7 @@ void xmrig::OclCnRunner::build()
|
||||||
OclBaseRunner::build();
|
OclBaseRunner::build();
|
||||||
|
|
||||||
m_cn0 = new Cn0Kernel(m_program);
|
m_cn0 = new Cn0Kernel(m_program);
|
||||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
m_cn0->setArgs(m_input, 0, m_scratchpads, m_states, m_intensity);
|
||||||
|
|
||||||
m_cn2 = new Cn2Kernel(m_program);
|
m_cn2 = new Cn2Kernel(m_program);
|
||||||
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
|
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
|
||||||
|
|
|
@ -36,9 +36,7 @@ static const rapidjson::Value kNullValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
xmrig::JsonChain::JsonChain()
|
xmrig::JsonChain::JsonChain() = default;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool xmrig::JsonChain::add(rapidjson::Document &&doc)
|
bool xmrig::JsonChain::add(rapidjson::Document &&doc)
|
||||||
|
@ -66,8 +64,10 @@ bool xmrig::JsonChain::addFile(const char *fileName)
|
||||||
if (doc.HasParseError()) {
|
if (doc.HasParseError()) {
|
||||||
const size_t offset = doc.GetErrorOffset();
|
const size_t offset = doc.GetErrorOffset();
|
||||||
|
|
||||||
size_t line, pos;
|
size_t line;
|
||||||
|
size_t pos;
|
||||||
std::vector<std::string> s;
|
std::vector<std::string> s;
|
||||||
|
|
||||||
if (Json::convertOffset(fileName, offset, line, pos, s)) {
|
if (Json::convertOffset(fileName, offset, line, pos, s)) {
|
||||||
for (const auto& t : s) {
|
for (const auto& t : s) {
|
||||||
LOG_ERR("%s", t.c_str());
|
LOG_ERR("%s", t.c_str());
|
||||||
|
|
|
@ -73,6 +73,16 @@ const char *xmrig::Tags::randomx()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_PROXY_PROJECT
|
||||||
|
const char *xmrig::Tags::proxy()
|
||||||
|
{
|
||||||
|
static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " proxy ");
|
||||||
|
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_FEATURE_CUDA
|
#ifdef XMRIG_FEATURE_CUDA
|
||||||
const char *xmrig::Tags::nvidia()
|
const char *xmrig::Tags::nvidia()
|
||||||
{
|
{
|
||||||
|
|
|
@ -42,6 +42,10 @@ public:
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# ifdef XMRIG_PROXY_PROJECT
|
||||||
|
static const char *proxy();
|
||||||
|
# endif
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_CUDA
|
# ifdef XMRIG_FEATURE_CUDA
|
||||||
static const char *nvidia();
|
static const char *nvidia();
|
||||||
# endif
|
# endif
|
||||||
|
|
|
@ -45,7 +45,9 @@ class Job
|
||||||
public:
|
public:
|
||||||
// Max blob size is 84 (75 fixed + 9 variable), aligned to 96. https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
|
// Max blob size is 84 (75 fixed + 9 variable), aligned to 96. https://github.com/xmrig/xmrig/issues/1 Thanks fireice-uk.
|
||||||
// SECOR increase requirements for blob size: https://github.com/xmrig/xmrig/issues/913
|
// SECOR increase requirements for blob size: https://github.com/xmrig/xmrig/issues/913
|
||||||
static constexpr const size_t kMaxBlobSize = 128;
|
// Haven (XHV) offshore increases requirements by adding pricing_record struct (192 bytes) to block_header.
|
||||||
|
// Round it up to 408 (136*3) for a convenient keccak calculation in OpenCL
|
||||||
|
static constexpr const size_t kMaxBlobSize = 408;
|
||||||
static constexpr const size_t kMaxSeedSize = 32;
|
static constexpr const size_t kMaxSeedSize = 32;
|
||||||
|
|
||||||
Job() = default;
|
Job() = default;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
"1gb-pages": false,
|
"1gb-pages": false,
|
||||||
"rdmsr": true,
|
"rdmsr": true,
|
||||||
"wrmsr": true,
|
"wrmsr": true,
|
||||||
|
"cache_qos": false,
|
||||||
"numa": true
|
"numa": true
|
||||||
},
|
},
|
||||||
"cpu": {
|
"cpu": {
|
||||||
|
|
|
@ -33,7 +33,6 @@
|
||||||
#include "crypto/cn/CryptoNight_monero.h"
|
#include "crypto/cn/CryptoNight_monero.h"
|
||||||
#include "crypto/cn/CryptoNight.h"
|
#include "crypto/cn/CryptoNight.h"
|
||||||
#include "crypto/cn/soft_aes.h"
|
#include "crypto/cn/soft_aes.h"
|
||||||
#include "crypto/common/portable/mm_malloc.h"
|
|
||||||
|
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -68,34 +67,6 @@ static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *outp
|
||||||
void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||||
|
|
||||||
|
|
||||||
static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b)
|
|
||||||
{
|
|
||||||
return vcombine_u64(vcreate_u64(b), vcreate_u64(a));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#if __ARM_FEATURE_CRYPTO
|
|
||||||
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
|
|
||||||
{
|
|
||||||
alignas(16) const __m128i zero = { 0 };
|
|
||||||
return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey );
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
|
|
||||||
{
|
|
||||||
alignas(16) const __m128i zero = { 0 };
|
|
||||||
return zero;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* this one was not implemented yet so here it is */
|
|
||||||
static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
|
|
||||||
{
|
|
||||||
return vgetq_lane_u64(a, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#if defined (__arm64__) || defined (__aarch64__)
|
#if defined (__arm64__) || defined (__aarch64__)
|
||||||
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
|
|
||||||
#if defined(XMRIG_ARM)
|
#if defined(XMRIG_ARM)
|
||||||
# include "crypto/cn/SSE2NEON.h"
|
# include "crypto/cn/sse2neon.h"
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
# include <x86intrin.h>
|
# include <x86intrin.h>
|
||||||
#else
|
#else
|
||||||
|
|
4151
src/crypto/cn/sse2neon.h
Normal file
4151
src/crypto/cn/sse2neon.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -28,6 +28,7 @@
|
||||||
#include "crypto/rx/Rx.h"
|
#include "crypto/rx/Rx.h"
|
||||||
#include "backend/common/Tags.h"
|
#include "backend/common/Tags.h"
|
||||||
#include "backend/cpu/CpuConfig.h"
|
#include "backend/cpu/CpuConfig.h"
|
||||||
|
#include "backend/cpu/CpuThreads.h"
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
#include "base/io/log/Tags.h"
|
#include "base/io/log/Tags.h"
|
||||||
#include "crypto/rx/RxConfig.h"
|
#include "crypto/rx/RxConfig.h"
|
||||||
|
@ -78,7 +79,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, const CpuConfig &cp
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!msrInitialized) {
|
if (!msrInitialized) {
|
||||||
msrInit(config);
|
msrInit(config, cpu.threads().get(job.algorithm()).data());
|
||||||
msrInitialized = true;
|
msrInitialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +131,7 @@ void xmrig::Rx::init(IRxListener *listener)
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_FEATURE_MSR
|
#ifndef XMRIG_FEATURE_MSR
|
||||||
void xmrig::Rx::msrInit(const RxConfig &)
|
void xmrig::Rx::msrInit(const RxConfig &, const std::vector<CpuThread> &)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
#include "crypto/common/HugePagesInfo.h"
|
#include "crypto/common/HugePagesInfo.h"
|
||||||
|
@ -41,6 +42,7 @@ namespace xmrig
|
||||||
|
|
||||||
class Algorithm;
|
class Algorithm;
|
||||||
class CpuConfig;
|
class CpuConfig;
|
||||||
|
class CpuThread;
|
||||||
class IRxListener;
|
class IRxListener;
|
||||||
class Job;
|
class Job;
|
||||||
class RxConfig;
|
class RxConfig;
|
||||||
|
@ -62,7 +64,7 @@ public:
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void msrInit(const RxConfig &config);
|
static void msrInit(const RxConfig &config, const std::vector<CpuThread>& threads);
|
||||||
static void msrDestroy();
|
static void msrDestroy();
|
||||||
static void setupMainLoopExceptionFrame();
|
static void setupMainLoopExceptionFrame();
|
||||||
};
|
};
|
||||||
|
|
|
@ -51,6 +51,7 @@ static const char *kMode = "mode";
|
||||||
static const char *kOneGbPages = "1gb-pages";
|
static const char *kOneGbPages = "1gb-pages";
|
||||||
static const char *kRdmsr = "rdmsr";
|
static const char *kRdmsr = "rdmsr";
|
||||||
static const char *kWrmsr = "wrmsr";
|
static const char *kWrmsr = "wrmsr";
|
||||||
|
static const char *kCacheQoS = "cache_qos";
|
||||||
|
|
||||||
#ifdef XMRIG_FEATURE_HWLOC
|
#ifdef XMRIG_FEATURE_HWLOC
|
||||||
static const char *kNUMA = "numa";
|
static const char *kNUMA = "numa";
|
||||||
|
@ -89,6 +90,8 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
||||||
readMSR(Json::getValue(value, kWrmsr));
|
readMSR(Json::getValue(value, kWrmsr));
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
m_cacheQoS = Json::getBool(value, kCacheQoS, m_cacheQoS);
|
||||||
|
|
||||||
# ifdef XMRIG_OS_LINUX
|
# ifdef XMRIG_OS_LINUX
|
||||||
m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages);
|
m_oneGbPages = Json::getBool(value, kOneGbPages, m_oneGbPages);
|
||||||
# endif
|
# endif
|
||||||
|
@ -151,6 +154,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
|
||||||
obj.AddMember(StringRef(kWrmsr), false, allocator);
|
obj.AddMember(StringRef(kWrmsr), false, allocator);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
obj.AddMember(StringRef(kCacheQoS), m_cacheQoS, allocator);
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_HWLOC
|
# ifdef XMRIG_FEATURE_HWLOC
|
||||||
if (!m_nodeset.empty()) {
|
if (!m_nodeset.empty()) {
|
||||||
Value numa(kArrayType);
|
Value numa(kArrayType);
|
||||||
|
|
|
@ -65,6 +65,7 @@ public:
|
||||||
inline bool isOneGbPages() const { return m_oneGbPages; }
|
inline bool isOneGbPages() const { return m_oneGbPages; }
|
||||||
inline bool rdmsr() const { return m_rdmsr; }
|
inline bool rdmsr() const { return m_rdmsr; }
|
||||||
inline bool wrmsr() const { return m_wrmsr; }
|
inline bool wrmsr() const { return m_wrmsr; }
|
||||||
|
inline bool cacheQoS() const { return m_cacheQoS; }
|
||||||
inline Mode mode() const { return m_mode; }
|
inline Mode mode() const { return m_mode; }
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_MSR
|
# ifdef XMRIG_FEATURE_MSR
|
||||||
|
@ -83,6 +84,8 @@ private:
|
||||||
bool m_wrmsr = false;
|
bool m_wrmsr = false;
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
bool m_cacheQoS = false;
|
||||||
|
|
||||||
Mode readMode(const rapidjson::Value &value) const;
|
Mode readMode(const rapidjson::Value &value) const;
|
||||||
|
|
||||||
bool m_numa = true;
|
bool m_numa = true;
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#include "crypto/rx/Rx.h"
|
#include "crypto/rx/Rx.h"
|
||||||
#include "backend/cpu/Cpu.h"
|
#include "backend/cpu/Cpu.h"
|
||||||
|
#include "backend/cpu/CpuThread.h"
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
#include "base/tools/Chrono.h"
|
#include "base/tools/Chrono.h"
|
||||||
#include "crypto/rx/RxConfig.h"
|
#include "crypto/rx/RxConfig.h"
|
||||||
|
@ -123,14 +124,15 @@ static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t ma
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask)
|
template<typename T>
|
||||||
|
static bool wrmsr_on_all_cpus(uint32_t reg, uint64_t value, uint64_t mask, T&& callback)
|
||||||
{
|
{
|
||||||
struct dirent **namelist;
|
struct dirent **namelist;
|
||||||
int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0);
|
int dir_entries = scandir("/dev/cpu", &namelist, dir_filter, 0);
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
|
|
||||||
while (dir_entries--) {
|
while (dir_entries--) {
|
||||||
if (!wrmsr_on_cpu(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
|
if (!callback(reg, strtoul(namelist[dir_entries]->d_name, nullptr, 10), value, mask)) {
|
||||||
++errors;
|
++errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -159,7 +161,7 @@ static bool wrmsr_modprobe()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool wrmsr(const MsrItems &preset, bool save)
|
static bool wrmsr(const MsrItems& preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
|
||||||
{
|
{
|
||||||
if (!wrmsr_modprobe()) {
|
if (!wrmsr_modprobe()) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -177,12 +179,66 @@ static bool wrmsr(const MsrItems &preset, bool save)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &i : preset) {
|
for (const auto &i : preset) {
|
||||||
if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask())) {
|
if (!wrmsr_on_all_cpus(i.reg(), i.value(), i.mask(), [](uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { return wrmsr_on_cpu(reg, cpu, value, mask); })) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const uint32_t n = Cpu::info()->threads();
|
||||||
|
|
||||||
|
// Which CPU cores will have access to the full L3 cache
|
||||||
|
std::vector<bool> cacheEnabled(n, false);
|
||||||
|
bool cacheQoSDisabled = threads.empty();
|
||||||
|
|
||||||
|
for (const CpuThread& t : threads) {
|
||||||
|
// If some thread has no affinity or wrong affinity, disable cache QoS
|
||||||
|
if ((t.affinity() < 0) || (t.affinity() >= n)) {
|
||||||
|
cacheQoSDisabled = true;
|
||||||
|
if (cache_qos) {
|
||||||
|
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cacheEnabled[t.affinity()] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_qos && !Cpu::info()->hasCatL3()) {
|
||||||
|
if (!threads.empty()) {
|
||||||
|
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
|
||||||
|
}
|
||||||
|
cache_qos = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool result = true;
|
||||||
|
|
||||||
|
if (cache_qos) {
|
||||||
|
result = wrmsr_on_all_cpus(0xC8F, 0, MsrItem::kNoMask, [&cacheEnabled, cacheQoSDisabled](uint32_t, uint32_t cpu, uint64_t, uint64_t) {
|
||||||
|
if (cacheQoSDisabled || (cpu >= cacheEnabled.size()) || cacheEnabled[cpu]) {
|
||||||
|
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
|
||||||
|
if (!wrmsr_on_cpu(0xC8F, cpu, 0, MsrItem::kNoMask)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Disable L3 cache for Class Of Service 1
|
||||||
|
if (!wrmsr_on_cpu(0xC91, cpu, 0, MsrItem::kNoMask)) {
|
||||||
|
// Some CPUs don't let set it to all zeros
|
||||||
|
if (!wrmsr_on_cpu(0xC91, cpu, 1, MsrItem::kNoMask)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign Class Of Service 1 to current CPU core
|
||||||
|
if (!wrmsr_on_cpu(0xC8F, cpu, 1ULL << 32, MsrItem::kNoMask)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -216,7 +272,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
void xmrig::Rx::msrInit(const RxConfig &config)
|
void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
|
||||||
{
|
{
|
||||||
const auto &preset = config.msrPreset();
|
const auto &preset = config.msrPreset();
|
||||||
if (preset.empty()) {
|
if (preset.empty()) {
|
||||||
|
@ -225,7 +281,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
|
||||||
|
|
||||||
const uint64_t ts = Chrono::steadyMSecs();
|
const uint64_t ts = Chrono::steadyMSecs();
|
||||||
|
|
||||||
if (wrmsr(preset, config.rdmsr())) {
|
if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
|
||||||
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -242,7 +298,7 @@ void xmrig::Rx::msrDestroy()
|
||||||
|
|
||||||
const uint64_t ts = Chrono::steadyMSecs();
|
const uint64_t ts = Chrono::steadyMSecs();
|
||||||
|
|
||||||
if (!wrmsr(savedState, false)) {
|
if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
|
||||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
|
|
||||||
#include "crypto/rx/Rx.h"
|
#include "crypto/rx/Rx.h"
|
||||||
#include "backend/cpu/Cpu.h"
|
#include "backend/cpu/Cpu.h"
|
||||||
|
#include "backend/cpu/CpuThread.h"
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
#include "base/kernel/Platform.h"
|
#include "base/kernel/Platform.h"
|
||||||
#include "base/tools/Chrono.h"
|
#include "base/tools/Chrono.h"
|
||||||
|
@ -256,7 +257,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool wrmsr(const MsrItems &preset, bool save)
|
static bool wrmsr(const MsrItems &preset, const std::vector<CpuThread>& threads, bool cache_qos, bool save)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
|
@ -282,14 +283,59 @@ static bool wrmsr(const MsrItems &preset, bool save)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::thread wrmsr_thread([driver, &preset, &success]() {
|
const uint32_t n = Cpu::info()->threads();
|
||||||
for (uint32_t i = 0, n = Cpu::info()->threads(); i < n; ++i) {
|
|
||||||
|
// Which CPU cores will have access to the full L3 cache
|
||||||
|
std::vector<bool> cacheEnabled(n, false);
|
||||||
|
bool cacheQoSDisabled = threads.empty();
|
||||||
|
|
||||||
|
for (const CpuThread& t : threads) {
|
||||||
|
// If some thread has no affinity or wrong affinity, disable cache QoS
|
||||||
|
if ((t.affinity() < 0) || (t.affinity() >= n)) {
|
||||||
|
cacheQoSDisabled = true;
|
||||||
|
if (cache_qos) {
|
||||||
|
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "Cache QoS can only be enabled when all mining threads have affinity set", tag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cacheEnabled[t.affinity()] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_qos && !Cpu::info()->hasCatL3()) {
|
||||||
|
if (!threads.empty()) {
|
||||||
|
LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "This CPU doesn't support cat_l3, cache QoS is unavailable", tag);
|
||||||
|
}
|
||||||
|
cache_qos = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::thread wrmsr_thread([n, driver, &preset, &cacheEnabled, cache_qos, cacheQoSDisabled, &success]() {
|
||||||
|
for (uint32_t i = 0; i < n; ++i) {
|
||||||
if (!Platform::setThreadAffinity(i)) {
|
if (!Platform::setThreadAffinity(i)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &i : preset) {
|
for (const auto &i : preset) {
|
||||||
success = wrmsr(driver, i.reg(), i.value(), i.mask());
|
success &= wrmsr(driver, i.reg(), i.value(), i.mask());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_qos) {
|
||||||
|
if (cacheQoSDisabled || cacheEnabled[i]) {
|
||||||
|
// Assign Class Of Service 0 to current CPU core (default, full L3 cache available)
|
||||||
|
success &= wrmsr(driver, 0xC8F, 0, MsrItem::kNoMask);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Disable L3 cache for Class Of Service 1
|
||||||
|
if (!wrmsr(driver, 0xC91, 0, MsrItem::kNoMask)) {
|
||||||
|
// Some CPUs don't let set it to all zeros
|
||||||
|
if (!wrmsr(driver, 0xC91, 1, MsrItem::kNoMask)) {
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign Class Of Service 1 to current CPU core
|
||||||
|
success &= wrmsr(driver, 0xC8F, 1ULL << 32, MsrItem::kNoMask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -349,7 +395,7 @@ void Rx::setMainLoopBounds(const std::pair<const void*, const void*>& bounds)
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
void xmrig::Rx::msrInit(const RxConfig &config)
|
void xmrig::Rx::msrInit(const RxConfig &config, const std::vector<CpuThread>& threads)
|
||||||
{
|
{
|
||||||
const auto &preset = config.msrPreset();
|
const auto &preset = config.msrPreset();
|
||||||
if (preset.empty()) {
|
if (preset.empty()) {
|
||||||
|
@ -358,7 +404,7 @@ void xmrig::Rx::msrInit(const RxConfig &config)
|
||||||
|
|
||||||
const uint64_t ts = Chrono::steadyMSecs();
|
const uint64_t ts = Chrono::steadyMSecs();
|
||||||
|
|
||||||
if (wrmsr(preset, config.rdmsr())) {
|
if (wrmsr(preset, threads, config.cacheQoS(), config.rdmsr())) {
|
||||||
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
LOG_NOTICE(CLEAR "%s" GREEN_BOLD_S "register values for \"%s\" preset has been set successfully" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, config.msrPresetName(), Chrono::steadyMSecs() - ts);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -375,7 +421,7 @@ void xmrig::Rx::msrDestroy()
|
||||||
|
|
||||||
const uint64_t ts = Chrono::steadyMSecs();
|
const uint64_t ts = Chrono::steadyMSecs();
|
||||||
|
|
||||||
if (!wrmsr(savedState, false)) {
|
if (!wrmsr(savedState, std::vector<CpuThread>(), true, false)) {
|
||||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
LOG_ERR(CLEAR "%s" RED_BOLD_S "failed to restore initial state" BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,15 +28,15 @@
|
||||||
#define APP_ID "xmrig"
|
#define APP_ID "xmrig"
|
||||||
#define APP_NAME "XMRig"
|
#define APP_NAME "XMRig"
|
||||||
#define APP_DESC "XMRig miner"
|
#define APP_DESC "XMRig miner"
|
||||||
#define APP_VERSION "6.2.3"
|
#define APP_VERSION "6.3.0-dev"
|
||||||
#define APP_DOMAIN "xmrig.com"
|
#define APP_DOMAIN "xmrig.com"
|
||||||
#define APP_SITE "www.xmrig.com"
|
#define APP_SITE "www.xmrig.com"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
||||||
#define APP_KIND "miner"
|
#define APP_KIND "miner"
|
||||||
|
|
||||||
#define APP_VER_MAJOR 6
|
#define APP_VER_MAJOR 6
|
||||||
#define APP_VER_MINOR 2
|
#define APP_VER_MINOR 3
|
||||||
#define APP_VER_PATCH 3
|
#define APP_VER_PATCH 0
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1920)
|
# if (_MSC_VER >= 1920)
|
||||||
|
|
Loading…
Reference in a new issue