From d443dd86f1f12c1be5636ec6796efe6f420ee710 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 19 May 2021 17:52:16 +0200 Subject: [PATCH] RandomX: added BMI2 version for scratchpad prefetch Saves 1 instruction and 1 byte in the main loop. --- src/crypto/randomx/jit_compiler_x86.cpp | 5 +--- src/crypto/randomx/jit_compiler_x86_static.S | 8 ++++++ .../randomx/jit_compiler_x86_static.asm | 9 +++++++ .../randomx/jit_compiler_x86_static.hpp | 1 + src/crypto/randomx/randomx.cpp | 25 +++++++++++++------ src/crypto/randomx/randomx.h | 3 ++- 6 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 04db010a9..825a526e8 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -110,8 +110,6 @@ namespace randomx { #define ADDR(x) ((uint8_t*)&x) # endif - #define codePrefetchScratchpad ADDR(randomx_prefetch_scratchpad) - #define codePrefetchScratchpadEnd ADDR(randomx_prefetch_scratchpad_end) #define codePrologue ADDR(randomx_program_prologue) #define codeLoopBegin ADDR(randomx_program_loop_begin) #define codeLoopLoad ADDR(randomx_program_loop_load) @@ -134,7 +132,6 @@ namespace randomx { #define codeShhEnd ADDR(randomx_sshash_end) #define codeShhInit ADDR(randomx_sshash_init) - #define prefetchScratchpadSize (codePrefetchScratchpadEnd - codePrefetchScratchpad) #define prologueSize (codeLoopBegin - codePrologue) #define loopLoadSize (codeLoopLoadXOP - codeLoopLoad) #define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP) @@ -467,7 +464,7 @@ namespace randomx { void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) { *(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast(pcfg.readReg0) << 16) + (static_cast(pcfg.readReg1) << 40); codePos += 6; - emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos); + emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, RandomX_CurrentConfig.codePrefetchScratchpadTweakedSize, code, codePos); memcpy(code + codePos, codeLoopStore, loopStoreSize); codePos += loopStoreSize; diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index 954c5ba29..c7b31e5a3 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -38,6 +38,7 @@ #endif .global DECL(randomx_prefetch_scratchpad) +.global DECL(randomx_prefetch_scratchpad_bmi2) .global DECL(randomx_prefetch_scratchpad_end) .global DECL(randomx_program_prologue) .global DECL(randomx_program_prologue_first_load) @@ -80,6 +81,13 @@ DECL(randomx_prefetch_scratchpad): and edx, RANDOMX_SCRATCHPAD_MASK prefetcht0 [rsi+rdx] +DECL(randomx_prefetch_scratchpad_bmi2): + rorx rdx, rax, 32 + and eax, RANDOMX_SCRATCHPAD_MASK + prefetcht0 [rsi+rax] + and edx, RANDOMX_SCRATCHPAD_MASK + prefetcht0 [rsi+rdx] + DECL(randomx_prefetch_scratchpad_end): .balign 64 diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index f4c52d436..e7d6cbb3e 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -29,6 +29,7 @@ IFDEF RAX _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE PUBLIC randomx_prefetch_scratchpad +PUBLIC randomx_prefetch_scratchpad_bmi2 PUBLIC randomx_prefetch_scratchpad_end PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue_first_load @@ -70,6 +71,14 @@ randomx_prefetch_scratchpad PROC prefetcht0 [rsi+rdx] randomx_prefetch_scratchpad ENDP +randomx_prefetch_scratchpad_bmi2 PROC + rorx rdx, rax, 32 + and eax, RANDOMX_SCRATCHPAD_MASK + prefetcht0 [rsi+rax] + and edx, RANDOMX_SCRATCHPAD_MASK + prefetcht0 [rsi+rdx] +randomx_prefetch_scratchpad_bmi2 ENDP + randomx_prefetch_scratchpad_end PROC randomx_prefetch_scratchpad_end ENDP diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 33fee9e2a..d4f79d215 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern "C" { void randomx_prefetch_scratchpad(); + void randomx_prefetch_scratchpad_bmi2(); void randomx_prefetch_scratchpad_end(); void randomx_program_prologue(); void randomx_program_prologue_first_load(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 00af0dc5d..431961f40 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -177,10 +177,17 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() memcpy(codeReadDatasetRyzenTweaked, a, b - a); codeReadDatasetRyzenTweakedSize = b - a; } - { - const uint8_t* a = addr(randomx_prefetch_scratchpad); + if (xmrig::Cpu::info()->hasBMI2()) { + const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2); const uint8_t* b = addr(randomx_prefetch_scratchpad_end); memcpy(codePrefetchScratchpadTweaked, a, b - a); + codePrefetchScratchpadTweakedSize = b - a; + } + else { + const uint8_t* a = addr(randomx_prefetch_scratchpad); + const uint8_t* b = addr(randomx_prefetch_scratchpad_bmi2); + memcpy(codePrefetchScratchpadTweaked, a, b - a); + codePrefetchScratchpadTweakedSize = b - a; } # endif } @@ -217,13 +224,15 @@ void RandomX_ConfigurationBase::Apply() //*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask; //*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask; - *(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; - *(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated; + const bool hasBMI2 = xmrig::Cpu::info()->hasBMI2(); + + *(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 7 : 4)) = ScratchpadL3Mask64_Calculated; + *(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 17 : 18)) = ScratchpadL3Mask64_Calculated; // Apply scratchpad prefetch mode { - uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8); - uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22); + uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 11 : 8)); + uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 21 : 22)); switch (scratchpadPrefetchMode) { @@ -290,7 +299,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx INST_HANDLE(IMUL_M, IMUL_R); #if defined(_M_X64) || defined(__x86_64__) - if (xmrig::Cpu::info()->hasBMI2()) { + if (hasBMI2) { INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M); INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R); } @@ -332,7 +341,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx #endif #if defined(_M_X64) || defined(__x86_64__) - if (xmrig::Cpu::info()->hasBMI2()) { + if (hasBMI2) { INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH); } else diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index ca79f7782..6dc7b8c41 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -129,7 +129,8 @@ struct RandomX_ConfigurationBase uint32_t codeReadDatasetTweakedSize; uint8_t codeReadDatasetRyzenTweaked[72]; uint32_t codeReadDatasetRyzenTweakedSize; - uint8_t codePrefetchScratchpadTweaked[32]; + uint8_t codePrefetchScratchpadTweaked[28]; + uint32_t codePrefetchScratchpadTweakedSize; uint32_t AddressMask_Calculated[4]; uint32_t ScratchpadL3Mask_Calculated;