From a2e9b3456d000e212cf3a2dab6ffd402a90535bb Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 4 Apr 2023 00:34:54 +0700 Subject: [PATCH 1/8] v6.19.3-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index e013e31a..68d96fde 100644 --- a/src/version.h +++ b/src/version.h @@ -22,7 +22,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.19.2" +#define APP_VERSION "6.19.3-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2023 xmrig.com" @@ -30,7 +30,7 @@ #define APP_VER_MAJOR 6 #define APP_VER_MINOR 19 -#define APP_VER_PATCH 2 +#define APP_VER_PATCH 3 #ifdef _MSC_VER # if (_MSC_VER >= 1930) From c4e136314815f6892136f6be604121143fed10f9 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 7 Apr 2023 23:35:05 +0700 Subject: [PATCH 2/8] #3245 Improved algorithm negotiation for donation rounds by sending extra information about current mining job. --- src/crypto/kawpow/KPHash.h | 18 +++++------ src/net/Network.cpp | 7 ++--- src/net/Network.h | 8 ++--- src/net/strategies/DonateStrategy.cpp | 45 ++++++++++++++++++--------- src/net/strategies/DonateStrategy.h | 21 +++++++------ 5 files changed, 58 insertions(+), 41 deletions(-) diff --git a/src/crypto/kawpow/KPHash.h b/src/crypto/kawpow/KPHash.h index 15bb1902..7ce2d75e 100644 --- a/src/crypto/kawpow/KPHash.h +++ b/src/crypto/kawpow/KPHash.h @@ -7,8 +7,8 @@ * Copyright 2017-2019 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2018-2019 tevador - * Copyright 2018-2020 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2023 SChernykh + * Copyright 2016-2023 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ #define XMRIG_KP_HASH_H -#include +#include namespace xmrig @@ -43,16 +43,16 @@ class KPHash public: static constexpr uint32_t EPOCH_LENGTH = 7500; static constexpr uint32_t PERIOD_LENGTH = 3; - static constexpr int CNT_CACHE = 11; - static constexpr int CNT_MATH = 18; - static constexpr uint32_t REGS = 32; - static constexpr uint32_t LANES = 16; + static constexpr int CNT_CACHE = 11; + static constexpr int CNT_MATH = 18; + static constexpr uint32_t REGS = 32; + static constexpr uint32_t LANES = 16; static void calculate(const KPCache& light_cache, uint32_t block_height, const uint8_t (&header_hash)[32], uint64_t nonce, uint32_t (&output)[8], uint32_t (&mix_hash)[8]); }; -} /* namespace xmrig */ +} // namespace xmrig -#endif /* XMRIG_KP_HASH_H */ +#endif // XMRIG_KP_HASH_H diff --git a/src/net/Network.cpp b/src/net/Network.cpp index 5b743d5d..a3566242 100644 --- a/src/net/Network.cpp +++ b/src/net/Network.cpp @@ -1,7 +1,7 @@ /* XMRig * Copyright (c) 2019 Howard Chu - * Copyright (c) 2018-2021 SChernykh - * Copyright (c) 2016-2021 XMRig , + * Copyright (c) 2018-2023 SChernykh + * Copyright (c) 2016-2023 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -292,8 +292,7 @@ void xmrig::Network::setJob(IClient *client, const Job &job, bool donate) } if (!donate && m_donate) { - m_donate->setAlgo(job.algorithm()); - m_donate->setProxy(client->pool().proxy()); + static_cast(m_donate)->update(client, job); } m_controller->miner()->setJob(job, donate); diff --git a/src/net/Network.h b/src/net/Network.h index 907e6110..b936c0d5 100644 --- a/src/net/Network.h +++ b/src/net/Network.h @@ -1,7 +1,7 @@ /* XMRig * Copyright (c) 2019 Howard Chu - * Copyright (c) 2018-2021 SChernykh - * Copyright (c) 2016-2021 XMRig , + * Copyright (c) 2018-2023 SChernykh + * Copyright (c) 2016-2023 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -89,7 +89,7 @@ private: }; -} /* namespace xmrig */ +} // namespace xmrig -#endif /* XMRIG_NETWORK_H */ +#endif // XMRIG_NETWORK_H diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 50e98889..03447a01 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -1,6 +1,6 @@ /* XMRig - * Copyright (c) 2018-2022 SChernykh - * Copyright (c) 2016-2022 XMRig , + * Copyright (c) 2018-2023 SChernykh + * Copyright (c) 2016-2023 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,7 +48,7 @@ static const char *kDonateHost = "donate.v2.xmrig.com"; static const char *kDonateHostTls = "donate.ssl.xmrig.com"; #endif -} /* namespace xmrig */ +} // namespace xmrig xmrig::DonateStrategy::DonateStrategy(Controller *controller, IStrategyListener *listener) : @@ -98,6 +98,17 @@ xmrig::DonateStrategy::~DonateStrategy() } +void xmrig::DonateStrategy::update(IClient *client, const Job &job) +{ + setAlgo(job.algorithm()); + setProxy(client->pool().proxy()); + + m_diff = job.diff(); + m_height = job.height(); + m_seed = job.seed(); +} + + int64_t xmrig::DonateStrategy::submit(const JobResult &result) { return m_proxy ? m_proxy->submit(result) : m_strategy->submit(result); @@ -199,13 +210,13 @@ void xmrig::DonateStrategy::onLogin(IClient *, rapidjson::Document &doc, rapidjs params.AddMember("url", m_pools[0].url().toJSON(), allocator); # endif - setAlgorithms(doc, params); + setParams(doc, params); } void xmrig::DonateStrategy::onLogin(IStrategy *, IClient *, rapidjson::Document &doc, rapidjson::Value ¶ms) { - setAlgorithms(doc, params); + setParams(doc, params); } @@ -270,12 +281,20 @@ void xmrig::DonateStrategy::idle(double min, double max) } -void xmrig::DonateStrategy::setAlgorithms(rapidjson::Document &doc, rapidjson::Value ¶ms) +void xmrig::DonateStrategy::setJob(IClient *client, const Job &job, const rapidjson::Value ¶ms) +{ + if (isActive()) { + m_listener->onJob(this, client, job, params); + } +} + + +void xmrig::DonateStrategy::setParams(rapidjson::Document &doc, rapidjson::Value ¶ms) { using namespace rapidjson; auto &allocator = doc.GetAllocator(); + auto algorithms = m_controller->miner()->algorithms(); - Algorithms algorithms = m_controller->miner()->algorithms(); const size_t index = static_cast(std::distance(algorithms.begin(), std::find(algorithms.begin(), algorithms.end(), m_algorithm))); if (index > 0 && index < algorithms.size()) { std::swap(algorithms[0], algorithms[index]); @@ -287,14 +306,12 @@ void xmrig::DonateStrategy::setAlgorithms(rapidjson::Document &doc, rapidjson::V algo.PushBack(StringRef(a.name()), allocator); } - params.AddMember("algo", algo, allocator); -} + params.AddMember("algo", algo, allocator); + params.AddMember("diff", m_diff, allocator); + params.AddMember("height", m_height, allocator); - -void xmrig::DonateStrategy::setJob(IClient *client, const Job &job, const rapidjson::Value ¶ms) -{ - if (isActive()) { - m_listener->onJob(this, client, job, params); + if (!m_seed.empty()) { + params.AddMember("seed_hash", Cvt::toHex(m_seed, doc), allocator); } } diff --git a/src/net/strategies/DonateStrategy.h b/src/net/strategies/DonateStrategy.h index 56a0580e..80ec45ca 100644 --- a/src/net/strategies/DonateStrategy.h +++ b/src/net/strategies/DonateStrategy.h @@ -1,6 +1,6 @@ /* XMRig - * Copyright (c) 2018-2022 SChernykh - * Copyright (c) 2016-2022 XMRig , + * Copyright (c) 2018-2023 SChernykh + * Copyright (c) 2016-2023 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,15 +20,12 @@ #define XMRIG_DONATESTRATEGY_H -#include - - #include "base/kernel/interfaces/IClientListener.h" #include "base/kernel/interfaces/IStrategy.h" #include "base/kernel/interfaces/IStrategyListener.h" #include "base/kernel/interfaces/ITimerListener.h" #include "base/net/stratum/Pool.h" -#include "base/tools/Object.h" +#include "base/tools/Buffer.h" namespace xmrig { @@ -36,7 +33,6 @@ namespace xmrig { class Client; class Controller; -class IStrategyListener; class DonateStrategy : public IStrategy, public IStrategyListener, public ITimerListener, public IClientListener @@ -47,6 +43,8 @@ public: DonateStrategy(Controller *controller, IStrategyListener *listener); ~DonateStrategy() override; + void update(IClient *client, const Job &job); + protected: inline bool isActive() const override { return state() == STATE_ACTIVE; } inline IClient *client() const override { return m_proxy ? m_proxy : m_strategy->client(); } @@ -88,13 +86,14 @@ private: IClient *createProxy(); void idle(double min, double max); - void setAlgorithms(rapidjson::Document &doc, rapidjson::Value ¶ms); void setJob(IClient *client, const Job &job, const rapidjson::Value ¶ms); + void setParams(rapidjson::Document &doc, rapidjson::Value ¶ms); void setResult(IClient *client, const SubmitResult &result, const char *error); void setState(State state); Algorithm m_algorithm; bool m_tls = false; + Buffer m_seed; char m_userId[65] = { 0 }; const uint64_t m_donateTime; const uint64_t m_idleTime; @@ -105,12 +104,14 @@ private: State m_state = STATE_NEW; std::vector m_pools; Timer *m_timer = nullptr; + uint64_t m_diff = 0; + uint64_t m_height = 0; uint64_t m_now = 0; uint64_t m_timestamp = 0; }; -} /* namespace xmrig */ +} // namespace xmrig -#endif /* XMRIG_DONATESTRATEGY_H */ +#endif // XMRIG_DONATESTRATEGY_H From 7f7fc363e1b28e48340102b963e4ac7b51e0a7b4 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 18 Apr 2023 21:20:45 +0200 Subject: [PATCH 3/8] Tweaked auto-tuning for Intel CPUs Alder Lake and newer CPUs have exclusive L3 cache and benefit from more threads until L3+L2 is filled. --- src/backend/cpu/platform/HwlocCpuInfo.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index ee2cfca0..d4db2039 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -298,8 +298,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith cores.reserve(m_cores); findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); }); + const bool L3_exclusive = isCacheExclusive(cache); + # ifdef XMRIG_ALGO_GHOSTRIDER - if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) { + if ((algorithm == Algorithm::GHOSTRIDER_RTM) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) { // Don't use E-cores on Alder Lake cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end()); @@ -311,7 +313,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith # endif size_t L3 = cache->attr->cache.size; - const bool L3_exclusive = isCacheExclusive(cache); size_t L2 = 0; int L2_associativity = 0; size_t extra = 0; @@ -349,6 +350,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith } # ifdef XMRIG_ALGO_RANDOMX + if ((algorithm.family() == Algorithm::RANDOM_X) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) { + // Use all L3+L2 on latest Intel CPUs with P-cores, E-cores and exclusive L3 cache + cacheHashes = (L3 + L2) / scratchpad; + } if (extra == 0 && algorithm.l2() > 0) { cacheHashes = std::min(std::max(L2 / algorithm.l2(), cores.size()), cacheHashes); } From 5dcbab7e3a784a60eaa7133478e829ba1ccff493 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sun, 21 May 2023 00:39:32 +0200 Subject: [PATCH 4/8] RandomX: optimized program generation --- src/crypto/randomx/aes_hash.cpp | 62 +++++++++----- src/crypto/randomx/intrin_portable.h | 14 ++++ src/crypto/randomx/jit_compiler_a64.cpp | 4 - src/crypto/randomx/jit_compiler_x86.cpp | 106 +++++++++++++----------- 4 files changed, 113 insertions(+), 73 deletions(-) diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index ee5989e1..8401d2c8 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "base/tools/Chrono.h" #include "crypto/randomx/randomx.h" #include "crypto/randomx/soft_aes.h" +#include "crypto/randomx/instruction.hpp" +#include "crypto/randomx/common.hpp" #include "crypto/rx/Profiler.h" #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d @@ -165,6 +167,17 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); +static const rx_vec_i128 inst_mask = []() { + constexpr randomx::Instruction inst{ 0xFF, randomx::RegistersCount - 1, randomx::RegistersCount - 1, 0xFF, 0xFFFFFFFFU }; + + union { + randomx::Instruction mask[2]; + rx_vec_i128 vec; + } result = { inst, inst }; + + return result.vec; +}(); + template void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { const uint8_t* outptr = (uint8_t*)buffer; @@ -187,32 +200,41 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); - while (outptr < outputEnd) { - state0 = aesdec(state0, key0); - state1 = aesenc(state1, key0); - state2 = aesdec(state2, key4); - state3 = aesenc(state3, key4); - - state0 = aesdec(state0, key1); - state1 = aesenc(state1, key1); - state2 = aesdec(state2, key5); - state3 = aesenc(state3, key5); - - state0 = aesdec(state0, key2); - state1 = aesenc(state1, key2); - state2 = aesdec(state2, key6); - state3 = aesenc(state3, key6); - - state0 = aesdec(state0, key3); - state1 = aesenc(state1, key3); - state2 = aesdec(state2, key7); - state3 = aesenc(state3, key7); +#define TRANSFORM do { \ + state0 = aesdec(state0, key0); \ + state1 = aesenc(state1, key0); \ + state2 = aesdec(state2, key4); \ + state3 = aesenc(state3, key4); \ + state0 = aesdec(state0, key1); \ + state1 = aesenc(state1, key1); \ + state2 = aesdec(state2, key5); \ + state3 = aesenc(state3, key5); \ + state0 = aesdec(state0, key2); \ + state1 = aesenc(state1, key2); \ + state2 = aesdec(state2, key6); \ + state3 = aesenc(state3, key6); \ + state0 = aesdec(state0, key3); \ + state1 = aesenc(state1, key3); \ + state2 = aesdec(state2, key7); \ + state3 = aesenc(state3, key7); \ +} while (0) + for (int i = 0; i < 2; ++i, outptr += 64) { + TRANSFORM; rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); + } + const rx_vec_i128 mask = inst_mask; + + while (outptr < outputEnd) { + TRANSFORM; + rx_store_vec_i128((rx_vec_i128*)outptr + 0, rx_and_vec_i128(state0, mask)); + rx_store_vec_i128((rx_vec_i128*)outptr + 1, rx_and_vec_i128(state1, mask)); + rx_store_vec_i128((rx_vec_i128*)outptr + 2, rx_and_vec_i128(state2, mask)); + rx_store_vec_i128((rx_vec_i128*)outptr + 3, rx_and_vec_i128(state3, mask)); outptr += 64; } } diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index df98a543..820bf685 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -126,6 +126,7 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { #define rx_xor_vec_f128 _mm_xor_pd #define rx_and_vec_f128 _mm_and_pd +#define rx_and_vec_i128 _mm_and_si128 #define rx_or_vec_f128 _mm_or_pd #ifdef __AES__ @@ -278,6 +279,10 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return (rx_vec_f128)vec_and(a,b); } +FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) { + return (rx_vec_i128)vec_and(a, b); +} + FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return (rx_vec_f128)vec_or(a,b); } @@ -444,6 +449,8 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); } +#define rx_and_vec_i128 vandq_u8 + FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); } @@ -635,6 +642,13 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { return x; } +FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) { + rx_vec_i128 x; + x.u64[0] = a.u64[0] & b.u64[0]; + x.u64[1] = a.u64[1] & b.u64[1]; + return x; +} + FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { rx_vec_f128 x; x.i.u64[0] = a.i.u64[0] | b.i.u64[0]; diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index c4f0f002..530658db 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -144,8 +144,6 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con for (uint32_t i = 0; i < program.getSize(); ++i) { Instruction& instr = program(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; (this->*engine[instr.opcode])(instr, codePos); } @@ -204,8 +202,6 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration for (uint32_t i = 0; i < program.getSize(); ++i) { Instruction& instr = program(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; (this->*engine[instr.opcode])(instr, codePos); } diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 7d2603e5..7f9fb3b6 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -312,11 +312,19 @@ namespace randomx { freePagedMemory(allocatedCode, allocatedSize); } + template + static FORCE_INLINE void prefetch_data(const void* data) { + rx_prefetch_nta(data); + prefetch_data(reinterpret_cast(data) + 64); + } + + template<> FORCE_INLINE void prefetch_data<0>(const void*) {} + + template static FORCE_INLINE void prefetch_data(const T& data) { prefetch_data<(sizeof(T) + 63) / 64>(&data); } + void JitCompilerX86::prepare() { - for (size_t i = 0; i < sizeof(engine); i += 64) - rx_prefetch_nta((const char*)(&engine) + i); - for (size_t i = 0; i < sizeof(RandomX_CurrentConfig); i += 64) - rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i); + prefetch_data(engine); + prefetch_data(RandomX_CurrentConfig); } void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { @@ -748,7 +756,7 @@ namespace randomx { template void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) { - const uint32_t dst = static_cast(instr.dst % RegistersCount) << 16; + const uint32_t dst = static_cast(instr.dst) << 16; *(uint32_t*)(code + codePos) = 0x24808d41 + dst; codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3; @@ -768,8 +776,8 @@ namespace randomx { uint32_t pos = codePos; uint8_t* const p = code + pos; - const uint32_t dst = instr.dst % RegistersCount; - const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst; + const uint32_t dst = instr.dst; + const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst; uint32_t k = 0x048d4f + (dst << 19); if (dst == RegisterNeedsDisplacement) @@ -788,8 +796,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -809,8 +817,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; if (src != dst) { *(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16); @@ -830,8 +838,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -851,8 +859,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; if (src != dst) { emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos); @@ -871,8 +879,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -892,8 +900,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; *(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16); *(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16); @@ -908,8 +916,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); @@ -923,8 +931,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -947,8 +955,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -970,8 +978,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; *(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40); pos += 8; @@ -985,8 +993,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -1011,7 +1019,7 @@ namespace randomx { uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t dst = instr.dst; const uint64_t reciprocal = randomx_reciprocal_fast(divisor); if (imul_rcp_storage_used < 16) { @@ -1040,7 +1048,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t dst = instr.dst; *(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16); pos += 3; @@ -1052,8 +1060,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { *(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16); @@ -1073,8 +1081,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -1094,8 +1102,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { *(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40); @@ -1115,8 +1123,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src % RegistersCount; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; if (src != dst) { *(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40); @@ -1136,8 +1144,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src % RegistersCount; - const uint32_t dst = instr.dst % RegistersCount; + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; if (src != dst) { *(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16); @@ -1153,7 +1161,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t dst = instr.dst % RegistersCount; + const uint64_t dst = instr.dst; *(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24); pos += 5; @@ -1182,7 +1190,7 @@ namespace randomx { prevFPOperation = pos; - const uint32_t src = instr.src % RegistersCount; + const uint32_t src = instr.src; const uint32_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -1214,7 +1222,7 @@ namespace randomx { prevFPOperation = pos; - const uint32_t src = instr.src % RegistersCount; + const uint32_t src = instr.src; const uint32_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -1257,7 +1265,7 @@ namespace randomx { prevFPOperation = pos; - const uint32_t src = instr.src % RegistersCount; + const uint32_t src = instr.src; const uint64_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -1307,7 +1315,7 @@ namespace randomx { uint32_t pos = codePos; prevCFROUND = pos; - const uint32_t src = instr.src % RegistersCount; + const uint32_t src = instr.src; *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); const int rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; @@ -1343,7 +1351,7 @@ namespace randomx { uint32_t pos = codePos; prevCFROUND = pos; - const uint64_t src = instr.src % RegistersCount; + const uint64_t src = instr.src; const uint64_t rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; *(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40); @@ -1367,7 +1375,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const int reg = instr.dst % RegistersCount; + const int reg = instr.dst; int32_t jmp_offset = registerUsage[reg]; // if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now @@ -1426,7 +1434,7 @@ namespace randomx { uint32_t pos = codePos; genAddressRegDst(instr, p, pos); - emit32(0x0604894c + (static_cast(instr.src % RegistersCount) << 19), p, pos); + emit32(0x0604894c + (static_cast(instr.src) << 19), p, pos); codePos = pos; } From 1252a4710e638965783343a14772659790a6b0d8 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 23 May 2023 14:37:09 +0200 Subject: [PATCH 5/8] RandomX: fixed undefined behavior Using an inactive member of a `union` is an undefined behavior in C++ --- src/crypto/randomx/aes_hash.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index 8401d2c8..38eb4d64 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -167,16 +167,8 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); -static const rx_vec_i128 inst_mask = []() { - constexpr randomx::Instruction inst{ 0xFF, randomx::RegistersCount - 1, randomx::RegistersCount - 1, 0xFF, 0xFFFFFFFFU }; - - union { - randomx::Instruction mask[2]; - rx_vec_i128 vec; - } result = { inst, inst }; - - return result.vec; -}(); +static constexpr randomx::Instruction inst{ 0xFF, 7, 7, 0xFF, 0xFFFFFFFFU }; +alignas(16) static const randomx::Instruction inst_mask[2] = { inst, inst }; template void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { @@ -227,7 +219,8 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); } - const rx_vec_i128 mask = inst_mask; + static_assert(sizeof(inst_mask) == sizeof(rx_vec_i128), "Incorrect inst_mask size"); + const rx_vec_i128 mask = *reinterpret_cast(inst_mask); while (outptr < outputEnd) { TRANSFORM; From 826e23b4c4ba72ddd06c6476e1d9c1050402def9 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 26 May 2023 12:46:59 +0200 Subject: [PATCH 6/8] Fixed `jccErratum` list --- src/backend/cpu/platform/BasicCpuInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index 0680d1bf..3ddce3e7 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -296,7 +296,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf m_jccErratum = ((model == 0x4E) && (stepping == 0x3)) || - ((model == 0x55) && (stepping == 0x4)) || + ((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) || ((model == 0x5E) && (stepping == 0x3)) || ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) || ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) || From af87369e4f15c256e247da54a72c0cfec45ddf68 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 2 Jun 2023 09:34:26 +0200 Subject: [PATCH 7/8] Updated example scripts - Hashvault is top 1 pool now, so changed it to a smaller pool - node.xmr.to doesn't exist anymore --- scripts/pool_mine_example.cmd | 2 +- scripts/solo_mine_example.cmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pool_mine_example.cmd b/scripts/pool_mine_example.cmd index 41a42340..38f93e2e 100644 --- a/scripts/pool_mine_example.cmd +++ b/scripts/pool_mine_example.cmd @@ -16,5 +16,5 @@ :: Smaller pools also often have smaller fees/payout limits. cd /d "%~dp0" -xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x +xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x pause diff --git a/scripts/solo_mine_example.cmd b/scripts/solo_mine_example.cmd index 4cebf567..ab912293 100644 --- a/scripts/solo_mine_example.cmd +++ b/scripts/solo_mine_example.cmd @@ -12,5 +12,5 @@ :: But you will only get a payout when you find a block which can take more than a year for a single low-end PC. cd /d "%~dp0" -xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon +xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon pause From e6bf4c00779a81c10e857505daefd8bc1fc2936b Mon Sep 17 00:00:00 2001 From: xmrig Date: Fri, 2 Jun 2023 22:12:18 +0700 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4910d5ef..4206cd54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# v6.19.3 +- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job. +- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs. +- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation. +- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior. +- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list. +- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts. + # v6.19.2 - [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`. - [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.