diff --git a/src/crypto/randomx/asm/program_imul_rcp_store.inc b/src/crypto/randomx/asm/program_imul_rcp_store.inc new file mode 100644 index 000000000..ce0b27db4 --- /dev/null +++ b/src/crypto/randomx/asm/program_imul_rcp_store.inc @@ -0,0 +1,17 @@ + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81 + add rsp, 128 diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index aec051eda..3442e1f4f 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -428,7 +428,10 @@ namespace randomx { xmrig::RxFix::setMainLoopBounds(mainLoopBounds); # endif - memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask)); + imul_rcp_storage = code + (ADDR(randomx_program_imul_rcp_store) - codePrologue) + 2; + imul_rcp_storage_used = 0; + + memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask)); codePos = codePosFirst; prevCFROUND = 0; @@ -1012,13 +1015,24 @@ namespace randomx { uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { - *(uint32_t*)(p + pos) = 0xb848; - pos += 2; - - emit64(randomx_reciprocal_fast(divisor), p, pos); - const uint32_t dst = instr.dst % RegistersCount; - emit32(0xc0af0f4c + (dst << 27), p, pos); + + const uint64_t reciprocal = randomx_reciprocal_fast(divisor); + if (imul_rcp_storage_used < 16) { + *(uint64_t*)(imul_rcp_storage) = reciprocal; + *(uint64_t*)(p + pos) = 0x2444AF0F4Cull + (dst << 27) + (static_cast(248 - imul_rcp_storage_used * 8) << 40); + ++imul_rcp_storage_used; + imul_rcp_storage += 11; + pos += 6; + } + else { + *(uint32_t*)(p + pos) = 0xb848; + pos += 2; + + emit64(reciprocal, p, pos); + + emit32(0xc0af0f4c + (dst << 27), p, pos); + } registerUsage[dst] = pos; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 0d2b4321b..abc8e74f1 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -104,6 +104,9 @@ namespace randomx { uint8_t* allocatedCode = nullptr; size_t allocatedSize = 0; + uint8_t* imul_rcp_storage = nullptr; + uint32_t imul_rcp_storage_used = 0; + void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&); template diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index 2ead26e93..954c5ba29 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -41,6 +41,7 @@ .global DECL(randomx_prefetch_scratchpad_end) .global DECL(randomx_program_prologue) .global DECL(randomx_program_prologue_first_load) +.global DECL(randomx_program_imul_rcp_store) .global DECL(randomx_program_loop_begin) .global DECL(randomx_program_loop_load) .global DECL(randomx_program_loop_load_xop) @@ -106,11 +107,15 @@ DECL(randomx_program_prologue_first_load): nop nop nop - jmp DECL(randomx_program_loop_begin) + jmp DECL(randomx_program_imul_rcp_store) .balign 64 #include "asm/program_xmm_constants.inc" +DECL(randomx_program_imul_rcp_store): + #include "asm/program_imul_rcp_store.inc" + jmp DECL(randomx_program_loop_begin) + .balign 64 DECL(randomx_program_loop_begin): nop diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 2c5d1bbe4..f4c52d436 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -32,6 +32,7 @@ PUBLIC randomx_prefetch_scratchpad PUBLIC randomx_prefetch_scratchpad_end PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue_first_load +PUBLIC randomx_program_imul_rcp_store PUBLIC randomx_program_loop_begin PUBLIC randomx_program_loop_load PUBLIC randomx_program_loop_load_xop @@ -94,12 +95,17 @@ randomx_program_prologue_first_load PROC nop nop nop - jmp randomx_program_loop_begin + jmp randomx_program_imul_rcp_store randomx_program_prologue_first_load ENDP ALIGN 64 include asm/program_xmm_constants.inc +randomx_program_imul_rcp_store PROC + include asm/program_imul_rcp_store.inc + jmp randomx_program_loop_begin +randomx_program_imul_rcp_store ENDP + ALIGN 64 randomx_program_loop_begin PROC nop diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 121db5bed..33fee9e2a 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -33,6 +33,7 @@ extern "C" { void randomx_prefetch_scratchpad_end(); void randomx_program_prologue(); void randomx_program_prologue_first_load(); + void randomx_program_imul_rcp_store(); void randomx_program_loop_begin(); void randomx_program_loop_load(); void randomx_program_loop_load_xop();