From 9f1753cc4f54dfe22dad97a5b45a9b69027d19b7 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 22 Jan 2020 20:11:00 +0100 Subject: [PATCH] Optimized CFROUND --- .../randomx/asm/program_epilogue_store.inc | 2 +- src/crypto/randomx/asm/program_loop_load.inc | 4 ++-- src/crypto/randomx/asm/program_loop_store.inc | 4 ++-- src/crypto/randomx/jit_compiler_x86.cpp | 23 +++++++++++-------- .../randomx/jit_compiler_x86_static.asm | 8 +++++-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/crypto/randomx/asm/program_epilogue_store.inc b/src/crypto/randomx/asm/program_epilogue_store.inc index 6fa1a1fcf..82067d191 100644 --- a/src/crypto/randomx/asm/program_epilogue_store.inc +++ b/src/crypto/randomx/asm/program_epilogue_store.inc @@ -1,5 +1,5 @@ ;# save VM register values - add rsp, 24 + add rsp, 40 pop rcx mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 diff --git a/src/crypto/randomx/asm/program_loop_load.inc b/src/crypto/randomx/asm/program_loop_load.inc index f5b23a931..5d8a84918 100644 --- a/src/crypto/randomx/asm/program_loop_load.inc +++ b/src/crypto/randomx/asm/program_loop_load.inc @@ -1,5 +1,5 @@ lea rcx, [rsi+rax] - mov [rsp+8], rcx + mov [rsp+16], rcx xor r8, qword ptr [rcx+0] xor r9, qword ptr [rcx+8] xor r10, qword ptr [rcx+16] @@ -9,7 +9,7 @@ xor r14, qword ptr [rcx+48] xor r15, qword ptr [rcx+56] lea rcx, [rsi+rdx] - mov [rsp+16], rcx + mov [rsp+24], rcx cvtdq2pd xmm0, qword ptr [rcx+0] cvtdq2pd xmm1, qword ptr [rcx+8] cvtdq2pd xmm2, qword ptr [rcx+16] diff --git a/src/crypto/randomx/asm/program_loop_store.inc b/src/crypto/randomx/asm/program_loop_store.inc index f778f134f..f579bb0c5 100644 --- a/src/crypto/randomx/asm/program_loop_store.inc +++ b/src/crypto/randomx/asm/program_loop_store.inc @@ -1,4 +1,4 @@ - mov rcx, [rsp+16] + mov rcx, [rsp+24] mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 mov qword ptr [rcx+16], r10 @@ -7,7 +7,7 @@ mov qword ptr [rcx+40], r13 mov qword ptr [rcx+48], r14 mov qword ptr [rcx+56], r15 - mov rcx, [rsp+8] + mov rcx, [rsp+16] xorpd xmm0, xmm4 xorpd xmm1, xmm5 xorpd xmm2, xmm6 diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index b31355263..34f98cb96 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -400,7 +400,7 @@ namespace randomx { *(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; *(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; if (hasAVX) { - uint32_t* p = (uint32_t*)(code + codePos + 32); + uint32_t* p = (uint32_t*)(code + codePos + 67); *p = (*p & 0xFF000000U) | 0x0077F8C5U; } @@ -1072,18 +1072,21 @@ namespace randomx { uint8_t* const p = code; int pos = codePos; - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.src, p, pos); - int rotate = (13 - (instr.getImm32() & 63)) & 63; - if (rotate != 0) { - emit(ROL_RAX, p, pos); - emitByte(rotate, p, pos); - } + const uint32_t src = instr.src; + + *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); + const int rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; + *(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24); + if (vm_flags & RANDOMX_FLAG_AMD) { - emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos); + *(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL; + *(uint8_t*)(p + pos + 15) = 8; + *(uint64_t*)(p + pos + 16) = 0x202444890414AE0FULL; + pos += 24; } else { - emit(AND_OR_MOV_LDMXCSR, p, pos); + *(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL; + pos += 14; } codePos = pos; diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 8e7714002..e36e5aafa 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -82,8 +82,12 @@ randomx_program_prologue_first_load PROC and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK - sub rsp, 24 - stmxcsr dword ptr [rsp] + sub rsp, 40 + mov dword ptr [rsp], 9FC0h + mov dword ptr [rsp+4], 0BFC0h + mov dword ptr [rsp+8], 0DFC0h + mov dword ptr [rsp+12], 0FFC0h + mov dword ptr [rsp+32], -1 nop nop nop