RandomX improved performance of GCC compiled binaries

JIT compilator was slower compared to MSVC compiled binary. Up to +0.1% speedup on rx/wow in Linux.
This commit is contained in:
SChernykh 2020-09-22 13:48:11 +02:00
parent 1584cca6d1
commit 9768bf65d1
5 changed files with 20 additions and 7 deletions

View file

@ -20,6 +20,7 @@
#include "base/tools/Profiler.h" #include "base/tools/Profiler.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "base/io/log/Tags.h" #include "base/io/log/Tags.h"
#include <cstring>
#include <sstream> #include <sstream>
#include <thread> #include <thread>
#include <chrono> #include <chrono>

View file

@ -37,6 +37,7 @@
#include <cstdint> #include <cstdint>
#include <cstddef>
#include <type_traits> #include <type_traits>
#if defined(_MSC_VER) #if defined(_MSC_VER)

View file

@ -168,6 +168,12 @@ namespace randomx {
# endif # endif
} }
# ifdef _MSC_VER
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
# else
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); }
# endif
static std::atomic<size_t> codeOffset; static std::atomic<size_t> codeOffset;
JitCompilerX86::JitCompilerX86() { JitCompilerX86::JitCompilerX86() {
@ -310,10 +316,10 @@ namespace randomx {
InstructionGeneratorX86 gen3 = engine[instr3.opcode]; InstructionGeneratorX86 gen3 = engine[instr3.opcode];
InstructionGeneratorX86 gen4 = engine[instr4.opcode]; InstructionGeneratorX86 gen4 = engine[instr4.opcode];
(this->*gen1)(instr1); (*gen1)(this, instr1);
(this->*gen2)(instr2); (*gen2)(this, instr2);
(this->*gen3)(instr3); (*gen3)(this, instr3);
(this->*gen4)(instr4); (*gen4)(this, instr4);
} }
*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40); *(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
@ -1060,7 +1066,7 @@ namespace randomx {
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16); *(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
const int shift = instr.getModCond(); const int shift = instr.getModCond();
const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift; const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
const uint32_t and_mask = ~((1UL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift); const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift);
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask; *(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16); *(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
*(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift; *(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;

View file

@ -41,7 +41,7 @@ namespace randomx {
class JitCompilerX86; class JitCompilerX86;
class Instruction; class Instruction;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&); typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&);
constexpr uint32_t CodeSize = 64 * 1024; constexpr uint32_t CodeSize = 64 * 1024;

View file

@ -267,7 +267,12 @@ void RandomX_ConfigurationBase::Apply()
} }
} }
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&);
#define JIT_HANDLE(x, prev) do { \
const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \
memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \
} while (0)
#elif defined(XMRIG_ARMv8) #elif defined(XMRIG_ARMv8)