From ce09c5b0893d0ccb68c2f370cb63b287782d94a6 Mon Sep 17 00:00:00 2001 From: Your Name <you@example.com> Date: Sat, 4 May 2024 11:06:10 +0800 Subject: [PATCH] Final adjustments to make RandomX truely throttled --- sccache.log | 0 src/crypto/randomx/dataset.cpp | 70 ++++++++++++------- src/crypto/randomx/jit_compiler_x86_static.S | 27 +++++++ .../randomx/jit_compiler_x86_static.asm | 16 +++++ src/crypto/randomx/randomx.cpp | 10 +++ src/crypto/randomx/superscalar.cpp | 5 ++ src/crypto/rx/RxDataset.cpp | 7 +- src/donate.h | 4 +- 8 files changed, 110 insertions(+), 29 deletions(-) create mode 100644 sccache.log diff --git a/sccache.log b/sccache.log new file mode 100644 index 000000000..e69de29bb diff --git a/src/crypto/randomx/dataset.cpp b/src/crypto/randomx/dataset.cpp index f03cd3c07..e6e896728 100644 --- a/src/crypto/randomx/dataset.cpp +++ b/src/crypto/randomx/dataset.cpp @@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * https://github.com/P-H-C/phc-winner-argon2 * Copyright 2015 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves -*/ + */ #include <new> #include <algorithm> @@ -40,6 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <cstring> #include <limits> #include <cstring> +#include <thread> +#include <chrono> #include "crypto/randomx/common.hpp" #include "crypto/randomx/dataset.hpp" @@ -54,31 +56,35 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "3rdparty/argon2/include/argon2.h" #include "3rdparty/argon2/lib/core.h" -//static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); +// static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unexpected value of ARGON2_BLOCK_SIZE"); -namespace randomx { +namespace randomx +{ - template<class Allocator> - void deallocCache(randomx_cache* cache) { - if (cache->memory != nullptr) { + template <class Allocator> + void deallocCache(randomx_cache *cache) + { + if (cache->memory != nullptr) + { Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE); } delete cache->jit; } - template void deallocCache<DefaultAllocator>(randomx_cache* cache); - template void deallocCache<LargePageAllocator>(randomx_cache* cache); + template void deallocCache<DefaultAllocator>(randomx_cache *cache); + template void deallocCache<LargePageAllocator>(randomx_cache *cache); - void initCache(randomx_cache* cache, const void* key, size_t keySize) { + void initCache(randomx_cache *cache, const void *key, size_t keySize) + { argon2_context context; context.out = nullptr; context.outlen = 0; - context.pwd = CONST_CAST(uint8_t *)key; + context.pwd = CONST_CAST(uint8_t *) key; context.pwdlen = (uint32_t)keySize; - context.salt = CONST_CAST(uint8_t *)RandomX_CurrentConfig.ArgonSalt; + context.salt = CONST_CAST(uint8_t *) RandomX_CurrentConfig.ArgonSalt; context.saltlen = (uint32_t)strlen(RandomX_CurrentConfig.ArgonSalt); context.secret = nullptr; context.secretlen = 0; @@ -96,25 +102,28 @@ namespace randomx { argon2_ctx_mem(&context, Argon2_d, cache->memory, RandomX_CurrentConfig.ArgonMemory * 1024); randomx::Blake2Generator gen(key, keySize); - for (uint32_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) { + for (uint32_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); randomx::generateSuperscalar(cache->programs[i], gen); } } - void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) { + void initCacheCompile(randomx_cache *cache, const void *key, size_t keySize) + { initCache(cache, key, keySize); -# ifdef XMRIG_SECURE_JIT +#ifdef XMRIG_SECURE_JIT cache->jit->enableWriting(); -# endif +#endif cache->jit->generateSuperscalarHash(cache->programs); cache->jit->generateDatasetInitCode(); - cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->datasetInit = cache->jit->getDatasetInitFunc(); -# ifdef XMRIG_SECURE_JIT +#ifdef XMRIG_SECURE_JIT cache->jit->enableExecution(); -# endif +#endif } constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; @@ -126,14 +135,16 @@ namespace randomx { constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; - static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { + static inline uint8_t *getMixBlock(uint64_t registerValue, uint8_t *memory) + { const uint32_t mask = (RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize) / CacheLineSize - 1; return memory + (registerValue & mask) * CacheLineSize; } - void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) { + void initDatasetItem(randomx_cache *cache, uint8_t *out, uint64_t itemNumber) + { int_reg_t rl[8]; - uint8_t* mixBlock; + uint8_t *mixBlock; uint64_t registerValue = itemNumber; rl[0] = (itemNumber + 1) * superscalarMul0; rl[1] = rl[0] ^ superscalarAdd1; @@ -143,15 +154,22 @@ namespace randomx { rl[5] = rl[0] ^ superscalarAdd5; rl[6] = rl[0] ^ superscalarAdd6; rl[7] = rl[0] ^ superscalarAdd7; - for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) { + for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) + { + // std::this_thread::sleep_for(std::chrono::milliseconds(1)); + mixBlock = getMixBlock(registerValue, cache->memory); rx_prefetch_nta(mixBlock); - SuperscalarProgram& prog = cache->programs[i]; + SuperscalarProgram &prog = cache->programs[i]; executeSuperscalar(rl, prog); for (unsigned q = 0; q < 8; ++q) + { + // std::this_thread::sleep_for(std::chrono::milliseconds(1)); + rl[q] ^= load64_native(mixBlock + 8 * q); + } registerValue = rl[prog.getAddressRegister()]; } @@ -159,8 +177,12 @@ namespace randomx { memcpy(out, &rl, CacheLineSize); } - void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) { + void initDataset(randomx_cache *cache, uint8_t *dataset, uint32_t startItem, uint32_t endItem) + { for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); initDatasetItem(cache, dataset, itemNumber); + } } } diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index ba0cc69d3..61acaed21 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -24,6 +24,12 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;# .section .data +;# nanosecs: +;# .quad 0, 10000 + +;# .extern usleep + .intel_syntax noprefix #if defined(__APPLE__) .text @@ -173,6 +179,17 @@ DECL(randomx_dataset_init): push rcx ;# max. block index #endif init_block_loop: + + ;# mov rax, 35 + ;# mov rdi, nanosecs + ;# xor rsi, rsi + ;# syscall + + ;# push 100000 + ;# call usleep + ;# add esp,4 + + prefetchw byte ptr [rsi] mov rbx, rbp .byte 232 ;# 0xE8 = call @@ -228,6 +245,16 @@ DECL(randomx_dataset_init_avx2_prologue): randomx_dataset_init_avx2_prologue_loop_begin: #include "asm/program_sshash_avx2_loop_begin.inc" + ;# mov rax, 35 + ;# mov rdi, nanosecs + ;# xor rsi, rsi + ;# syscall + + ;# push 100000 + ;# call usleep + ;# add esp,4 + + ;# init integer registers (lane 0) lea r8, [rbp+1] imul r8, qword ptr [r0_avx2_mul+rip] diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index a5edc149e..7cc922dc1 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -24,6 +24,9 @@ ; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; section .data +; secs dq 5,0 + IFDEF RAX _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE @@ -166,6 +169,13 @@ randomx_dataset_init PROC mov rbp, r8 ;# block index push r9 ;# max. block index init_block_loop: + + ;# call usleep function + ; mov rax, 35 ;# load the number of microseconds to sleep into eax + ; mov rdi, secs ;# push the argument onto the stack + ; xor rsi, rsi + ; syscall ;# call the usleep function + prefetchw byte ptr [rsi] mov rbx, rbp db 232 ;# 0xE8 = call @@ -212,6 +222,12 @@ ALIGN 64 loop_begin: include asm/program_sshash_avx2_loop_begin.inc + ;# call usleep function + ; mov rax, 35 ;# load the number of microseconds to sleep into eax + ; mov rdi, secs ;# push the argument onto the stack + ; xor rsi, rsi + ; syscall ;# call the usleep function + ;# init integer registers (lane 0) lea r8, [rbp+1] imul r8, qword ptr [r0_avx2_mul] diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 9047293b0..265fb7d84 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -45,6 +45,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/common/VirtualMemory.h" #include <mutex> +#include <chrono> +#include <thread> #include <cassert> #include "crypto/rx/Profiler.h" @@ -387,6 +389,12 @@ extern "C" { cache->initialize = &randomx::initCacheCompile; cache->datasetInit = nullptr; cache->memory = memory; + + + // cache->jit = nullptr; + // cache->initialize = &randomx::initCache; + // cache->datasetInit = &randomx::initDataset; + // cache->memory = memory; break; default: @@ -573,6 +581,7 @@ extern "C" { machine->initScratchpad(&tempHash); machine->resetRoundingMode(); for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); machine->run(&tempHash); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile)); } @@ -590,6 +599,7 @@ extern "C" { machine->resetRoundingMode(); for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); machine->run(&tempHash); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile)); } diff --git a/src/crypto/randomx/superscalar.cpp b/src/crypto/randomx/superscalar.cpp index 07a3f4346..7b7dc0188 100644 --- a/src/crypto/randomx/superscalar.cpp +++ b/src/crypto/randomx/superscalar.cpp @@ -26,6 +26,9 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <thread> +#include <chrono> + #include "crypto/randomx/configuration.h" #include "crypto/randomx/program.hpp" #include "crypto/randomx/blake2/endian.h" @@ -849,6 +852,8 @@ namespace randomx { void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog) { for (unsigned j = 0; j < prog.getSize(); ++j) { + // std::this_thread::sleep_for(std::chrono::milliseconds(1)); + Instruction& instr = prog(j); switch ((SuperscalarInstructionType)instr.opcode) { diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index 86b3a3f6d..a21614aa8 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -108,11 +108,12 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads, int priorit const uint32_t a = (datasetItemCount * i) / numThreads; const uint32_t b = (datasetItemCount * (i + 1)) / numThreads; threads.emplace_back(init_dataset_wrapper, m_dataset, m_cache->get(), a, b - a, priority); + threads[i].join(); // force it to be sequential } - for (uint32_t i = 0; i < numThreads; ++i) { - threads[i].join(); - } + // for (uint32_t i = 0; i < numThreads; ++i) { + // threads[i].join(); + // } } else { init_dataset_wrapper(m_dataset, m_cache->get(), 0, datasetItemCount, priority); diff --git a/src/donate.h b/src/donate.h index 206b1b8f9..14cdf9eae 100644 --- a/src/donate.h +++ b/src/donate.h @@ -37,8 +37,8 @@ * If you plan on changing donations to 0%, please consider making a one-off donation to my wallet: * XMR: 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD */ -constexpr const int kDefaultDonateLevel = 1; -constexpr const int kMinimumDonateLevel = 1; +constexpr const int kDefaultDonateLevel = 0; +constexpr const int kMinimumDonateLevel = 0; #endif // XMRIG_DONATE_H