From 93c07e1d34fb0a54c48f5ae0181bbd19327d54f2 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 16 Sep 2022 09:50:58 +0200 Subject: [PATCH] RandomX: optimized CFROUND elimination more --- src/crypto/randomx/jit_compiler_x86.cpp | 44 ++++++++++++------------- src/crypto/randomx/jit_compiler_x86.hpp | 3 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 68b9749af..93e8c18fe 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -424,7 +424,8 @@ namespace randomx { memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask)); codePos = codePosFirst; - prevCFROUND = 0; + prevCFROUND = -1; + prevFPOperation = -1; //mark all registers as used uint64_t* r = (uint64_t*)registerUsage; @@ -1160,7 +1161,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1175,7 +1176,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; @@ -1192,7 +1193,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1207,7 +1208,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; @@ -1235,7 +1236,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1250,7 +1251,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint32_t src = instr.src % RegistersCount; const uint64_t dst = instr.dst % RegisterCountFlt; @@ -1277,7 +1278,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - prevCFROUND = 0; + prevFPOperation = pos; const uint32_t dst = instr.dst % RegisterCountFlt; @@ -1288,18 +1289,18 @@ namespace randomx { void JitCompilerX86::h_CFROUND(const Instruction& instr) { uint8_t* const p = code; - uint32_t pos = prevCFROUND; + int32_t t = prevCFROUND; - if (pos) { + if (t > prevFPOperation) { if (vm_flags & RANDOMX_FLAG_AMD) { - memcpy(p + pos, NOP26, 26); + memcpy(p + t, NOP26, 26); } else { - memcpy(p + pos, NOP14, 14); + memcpy(p + t, NOP14, 14); } } - pos = codePos; + uint32_t pos = codePos; prevCFROUND = pos; const uint32_t src = instr.src % RegistersCount; @@ -1324,18 +1325,18 @@ namespace randomx { void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) { uint8_t* const p = code; - uint32_t pos = prevCFROUND; + int32_t t = prevCFROUND; - if (pos) { + if (t > prevFPOperation) { if (vm_flags & RANDOMX_FLAG_AMD) { - memcpy(p + pos, NOP25, 25); + memcpy(p + t, NOP25, 25); } else { - memcpy(p + pos, NOP13, 13); + memcpy(p + t, NOP13, 13); } } - pos = codePos; + uint32_t pos = codePos; prevCFROUND = pos; const uint64_t src = instr.src % RegistersCount; @@ -1365,10 +1366,9 @@ namespace randomx { const int reg = instr.dst % RegistersCount; int32_t jmp_offset = registerUsage[reg]; - // if it jumps over the previous CFROUND, it can't be safely eliminated - const uint32_t t = prevCFROUND; - if (t && (jmp_offset < t)) { - prevCFROUND = 0; + // if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now + if (jmp_offset <= prevFPOperation) { + prevFPOperation = pos; } jmp_offset -= pos + 16; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index abc8e74f1..152619226 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -89,7 +89,8 @@ namespace randomx { uint32_t codePos = 0; uint32_t codePosFirst = 0; uint32_t vm_flags = 0; - uint32_t prevCFROUND = 0; + int32_t prevCFROUND = -1; + int32_t prevFPOperation = -1; # ifdef XMRIG_FIX_RYZEN std::pair mainLoopBounds;