From ff82ca57f25e8cbb833adb4aae2204db43bc1ead Mon Sep 17 00:00:00 2001 From: SChernykh Date: Thu, 20 May 2021 12:45:42 +0200 Subject: [PATCH] RandomX: rewrote dataset read code Unified code for AMD and Intel 1% faster on Intel 0.15% faster on AMD Ryzen --- .../randomx/asm/program_prologue_linux.inc | 1 + .../randomx/asm/program_prologue_win64.inc | 1 + .../randomx/asm/program_read_dataset.inc | 23 +++++++++---------- .../asm/program_read_dataset_ryzen.inc | 17 -------------- src/crypto/randomx/jit_compiler_x86.cpp | 17 +++----------- src/crypto/randomx/jit_compiler_x86_static.S | 4 ---- .../randomx/jit_compiler_x86_static.asm | 5 ---- .../randomx/jit_compiler_x86_static.hpp | 1 - src/crypto/randomx/randomx.cpp | 12 ---------- src/crypto/randomx/randomx.h | 4 ---- 10 files changed, 16 insertions(+), 69 deletions(-) delete mode 100644 src/crypto/randomx/asm/program_read_dataset_ryzen.inc diff --git a/src/crypto/randomx/asm/program_prologue_linux.inc b/src/crypto/randomx/asm/program_prologue_linux.inc index 4e1685de8..fcd09fd36 100644 --- a/src/crypto/randomx/asm/program_prologue_linux.inc +++ b/src/crypto/randomx/asm/program_prologue_linux.inc @@ -22,6 +22,7 @@ mov rsi, rdx ;# uint8_t* scratchpad mov rax, rbp + ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/crypto/randomx/asm/program_prologue_win64.inc b/src/crypto/randomx/asm/program_prologue_win64.inc index a93862653..d70e04911 100644 --- a/src/crypto/randomx/asm/program_prologue_win64.inc +++ b/src/crypto/randomx/asm/program_prologue_win64.inc @@ -35,6 +35,7 @@ mov rbx, r9 ;# loop counter mov rax, rbp + ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/crypto/randomx/asm/program_read_dataset.inc b/src/crypto/randomx/asm/program_read_dataset.inc index b81d0c321..9c61092f9 100644 --- a/src/crypto/randomx/asm/program_read_dataset.inc +++ b/src/crypto/randomx/asm/program_read_dataset.inc @@ -1,17 +1,16 @@ + mov ecx, ebp ;# ecx = ma + and ecx, RANDOMX_DATASET_BASE_MASK + xor r8, qword ptr [rdi+rcx] + ror rbp, 32 ;# swap "ma" and "mx" xor rbp, rax ;# modify "mx" mov edx, ebp ;# edx = mx and edx, RANDOMX_DATASET_BASE_MASK prefetchnta byte ptr [rdi+rdx] - ror rbp, 32 ;# swap "ma" and "mx" - mov edx, ebp ;# edx = ma - and edx, RANDOMX_DATASET_BASE_MASK - lea rcx, [rdi+rdx] ;# dataset cache line - xor r8, qword ptr [rcx+0] - xor r9, qword ptr [rcx+8] - xor r10, qword ptr [rcx+16] - xor r11, qword ptr [rcx+24] - xor r12, qword ptr [rcx+32] - xor r13, qword ptr [rcx+40] - xor r14, qword ptr [rcx+48] - xor r15, qword ptr [rcx+56] + xor r9, qword ptr [rdi+rcx+8] + xor r10, qword ptr [rdi+rcx+16] + xor r11, qword ptr [rdi+rcx+24] + xor r12, qword ptr [rdi+rcx+32] + xor r13, qword ptr [rdi+rcx+40] + xor r14, qword ptr [rdi+rcx+48] + xor r15, qword ptr [rdi+rcx+56] \ No newline at end of file diff --git a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc deleted file mode 100644 index 9a3aec3d1..000000000 --- a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc +++ /dev/null @@ -1,17 +0,0 @@ - mov rcx, rbp ;# ecx = ma - shr rcx, 32 - and ecx, RANDOMX_DATASET_BASE_MASK - xor r8, qword ptr [rdi+rcx] - xor rbp, rax ;# modify "mx" - mov edx, ebp ;# edx = mx - and edx, RANDOMX_DATASET_BASE_MASK - prefetchnta byte ptr [rdi+rdx] - ror rbp, 32 ;# swap "ma" and "mx" - xor r9, qword ptr [rdi+rcx+8] - xor r10, qword ptr [rdi+rcx+16] - xor r11, qword ptr [rdi+rcx+24] - xor r12, qword ptr [rdi+rcx+32] - xor r13, qword ptr [rdi+rcx+40] - xor r14, qword ptr [rdi+rcx+48] - xor r15, qword ptr [rdi+rcx+56] - \ No newline at end of file diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 825a526e8..f4d3a5420 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -115,6 +115,7 @@ namespace randomx { #define codeLoopLoad ADDR(randomx_program_loop_load) #define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop) #define codeProgamStart ADDR(randomx_program_start) + #define codeReadDataset ADDR(randomx_program_read_dataset) #define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init) #define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin) #define codeDatasetInit ADDR(randomx_dataset_init) @@ -135,6 +136,7 @@ namespace randomx { #define prologueSize (codeLoopBegin - codePrologue) #define loopLoadSize (codeLoopLoadXOP - codeLoopLoad) #define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP) + #define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset) #define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit) #define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin) #define loopStoreSize (codeLoopEnd - codeLoopStore) @@ -318,20 +320,7 @@ namespace randomx { vm_flags = flags; generateProgramPrologue(prog, pcfg); - - uint8_t* p; - uint32_t n; - if (flags & RANDOMX_FLAG_AMD) { - p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; - n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; - } - else { - p = RandomX_CurrentConfig.codeReadDatasetTweaked; - n = RandomX_CurrentConfig.codeReadDatasetTweakedSize; - } - memcpy(code + codePos, p, n); - codePos += n; - + emit(codeReadDataset, readDatasetSize, code, codePos); generateProgramEpilogue(prog, pcfg); } diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index c7b31e5a3..c55db6c0e 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -48,7 +48,6 @@ .global DECL(randomx_program_loop_load_xop) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) -.global DECL(randomx_program_read_dataset_ryzen) .global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_fin) .global DECL(randomx_program_loop_store) @@ -140,9 +139,6 @@ DECL(randomx_program_start): DECL(randomx_program_read_dataset): #include "asm/program_read_dataset.inc" -DECL(randomx_program_read_dataset_ryzen): - #include "asm/program_read_dataset_ryzen.inc" - DECL(randomx_program_read_dataset_sshash_init): #include "asm/program_read_dataset_sshash_init.inc" diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index e7d6cbb3e..a5edc149e 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -39,7 +39,6 @@ PUBLIC randomx_program_loop_load PUBLIC randomx_program_loop_load_xop PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset -PUBLIC randomx_program_read_dataset_ryzen PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_dataset_init @@ -136,10 +135,6 @@ randomx_program_read_dataset PROC include asm/program_read_dataset.inc randomx_program_read_dataset ENDP -randomx_program_read_dataset_ryzen PROC - include asm/program_read_dataset_ryzen.inc -randomx_program_read_dataset_ryzen ENDP - randomx_program_read_dataset_sshash_init PROC include asm/program_read_dataset_sshash_init.inc randomx_program_read_dataset_sshash_init ENDP diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index d4f79d215..372a69f14 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -40,7 +40,6 @@ extern "C" { void randomx_program_loop_load_xop(); void randomx_program_start(); void randomx_program_read_dataset(); - void randomx_program_read_dataset_ryzen(); void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 431961f40..fe438b61e 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -165,18 +165,6 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() const uint8_t* b = addr(randomx_sshash_end); memcpy(codeShhPrefetchTweaked, a, b - a); } - { - const uint8_t* a = addr(randomx_program_read_dataset); - const uint8_t* b = addr(randomx_program_read_dataset_ryzen); - memcpy(codeReadDatasetTweaked, a, b - a); - codeReadDatasetTweakedSize = b - a; - } - { - const uint8_t* a = addr(randomx_program_read_dataset_ryzen); - const uint8_t* b = addr(randomx_program_read_dataset_sshash_init); - memcpy(codeReadDatasetRyzenTweaked, a, b - a); - codeReadDatasetRyzenTweakedSize = b - a; - } if (xmrig::Cpu::info()->hasBMI2()) { const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2); const uint8_t* b = addr(randomx_prefetch_scratchpad_end); diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 6dc7b8c41..937a0844c 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -125,10 +125,6 @@ struct RandomX_ConfigurationBase rx_vec_i128 fillAes4Rx4_Key[8]; uint8_t codeShhPrefetchTweaked[20]; - uint8_t codeReadDatasetTweaked[64]; - uint32_t codeReadDatasetTweakedSize; - uint8_t codeReadDatasetRyzenTweaked[72]; - uint32_t codeReadDatasetRyzenTweakedSize; uint8_t codePrefetchScratchpadTweaked[28]; uint32_t codePrefetchScratchpadTweakedSize;