From 9768bf65d165c9715a6a2ac2a303773741d04e4d Mon Sep 17 00:00:00 2001 From: SChernykh <sergey.v.chernykh@gmail.com> Date: Tue, 22 Sep 2020 13:48:11 +0200 Subject: [PATCH] RandomX improved performance of GCC compiled binaries JIT compilator was slower compared to MSVC compiled binary. Up to +0.1% speedup on rx/wow in Linux. --- src/base/tools/Profiler.cpp | 1 + src/base/tools/Profiler.h | 1 + src/crypto/randomx/jit_compiler_x86.cpp | 16 +++++++++++----- src/crypto/randomx/jit_compiler_x86.hpp | 2 +- src/crypto/randomx/randomx.cpp | 7 ++++++- 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/base/tools/Profiler.cpp b/src/base/tools/Profiler.cpp index f6f066f37..ac2a6d2cb 100644 --- a/src/base/tools/Profiler.cpp +++ b/src/base/tools/Profiler.cpp @@ -20,6 +20,7 @@ #include "base/tools/Profiler.h" #include "base/io/log/Log.h" #include "base/io/log/Tags.h" +#include <cstring> #include <sstream> #include <thread> #include <chrono> diff --git a/src/base/tools/Profiler.h b/src/base/tools/Profiler.h index c74277151..ae3470f8f 100644 --- a/src/base/tools/Profiler.h +++ b/src/base/tools/Profiler.h @@ -37,6 +37,7 @@ #include <cstdint> +#include <cstddef> #include <type_traits> #if defined(_MSC_VER) diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 8edf5a720..437f1040d 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -168,6 +168,12 @@ namespace randomx { # endif } +# ifdef _MSC_VER + static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); } +# else + static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); } +# endif + static std::atomic<size_t> codeOffset; JitCompilerX86::JitCompilerX86() { @@ -310,10 +316,10 @@ namespace randomx { InstructionGeneratorX86 gen3 = engine[instr3.opcode]; InstructionGeneratorX86 gen4 = engine[instr4.opcode]; - (this->*gen1)(instr1); - (this->*gen2)(instr2); - (this->*gen3)(instr3); - (this->*gen4)(instr4); + (*gen1)(this, instr1); + (*gen2)(this, instr2); + (*gen3)(this, instr3); + (*gen4)(this, instr4); } *(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40); @@ -1060,7 +1066,7 @@ namespace randomx { *(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16); const int shift = instr.getModCond(); const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift; - const uint32_t and_mask = ~((1UL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift); + const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift); *(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask; *(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16); *(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 3a9163b5e..b8e6a9fe7 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -41,7 +41,7 @@ namespace randomx { class JitCompilerX86; class Instruction; - typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&); + typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&); constexpr uint32_t CodeSize = 64 * 1024; diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 5cfaddca3..2804b1b78 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -267,7 +267,12 @@ void RandomX_ConfigurationBase::Apply() } } -#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x +typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&); + +#define JIT_HANDLE(x, prev) do { \ + const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \ + memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \ + } while (0) #elif defined(XMRIG_ARMv8)