From 9768bf65d165c9715a6a2ac2a303773741d04e4d Mon Sep 17 00:00:00 2001
From: SChernykh <sergey.v.chernykh@gmail.com>
Date: Tue, 22 Sep 2020 13:48:11 +0200
Subject: [PATCH] RandomX improved performance of GCC compiled binaries

JIT compilator was slower compared to MSVC compiled binary. Up to +0.1% speedup on rx/wow in Linux.
---
 src/base/tools/Profiler.cpp             |  1 +
 src/base/tools/Profiler.h               |  1 +
 src/crypto/randomx/jit_compiler_x86.cpp | 16 +++++++++++-----
 src/crypto/randomx/jit_compiler_x86.hpp |  2 +-
 src/crypto/randomx/randomx.cpp          |  7 ++++++-
 5 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/base/tools/Profiler.cpp b/src/base/tools/Profiler.cpp
index f6f066f37..ac2a6d2cb 100644
--- a/src/base/tools/Profiler.cpp
+++ b/src/base/tools/Profiler.cpp
@@ -20,6 +20,7 @@
 #include "base/tools/Profiler.h"
 #include "base/io/log/Log.h"
 #include "base/io/log/Tags.h"
+#include <cstring>
 #include <sstream>
 #include <thread>
 #include <chrono>
diff --git a/src/base/tools/Profiler.h b/src/base/tools/Profiler.h
index c74277151..ae3470f8f 100644
--- a/src/base/tools/Profiler.h
+++ b/src/base/tools/Profiler.h
@@ -37,6 +37,7 @@
 
 
 #include <cstdint>
+#include <cstddef>
 #include <type_traits>
 
 #if defined(_MSC_VER)
diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp
index 8edf5a720..437f1040d 100644
--- a/src/crypto/randomx/jit_compiler_x86.cpp
+++ b/src/crypto/randomx/jit_compiler_x86.cpp
@@ -168,6 +168,12 @@ namespace randomx {
 #   endif
     }
 
+#	ifdef _MSC_VER
+	static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
+#	else
+	static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); }
+#	endif
+
 	static std::atomic<size_t> codeOffset;
 
 	JitCompilerX86::JitCompilerX86() {
@@ -310,10 +316,10 @@ namespace randomx {
 			InstructionGeneratorX86 gen3 = engine[instr3.opcode];
 			InstructionGeneratorX86 gen4 = engine[instr4.opcode];
 
-			(this->*gen1)(instr1);
-			(this->*gen2)(instr2);
-			(this->*gen3)(instr3);
-			(this->*gen4)(instr4);
+			(*gen1)(this, instr1);
+			(*gen2)(this, instr2);
+			(*gen3)(this, instr3);
+			(*gen4)(this, instr4);
 		}
 
 		*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
@@ -1060,7 +1066,7 @@ namespace randomx {
 		*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
 		const int shift = instr.getModCond();
 		const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
-		const uint32_t and_mask = ~((1UL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
+		const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift);
 		*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
 		*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
 		*(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp
index 3a9163b5e..b8e6a9fe7 100644
--- a/src/crypto/randomx/jit_compiler_x86.hpp
+++ b/src/crypto/randomx/jit_compiler_x86.hpp
@@ -41,7 +41,7 @@ namespace randomx {
 	class JitCompilerX86;
 	class Instruction;
 
-	typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
+	typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&);
 
 	constexpr uint32_t CodeSize = 64 * 1024;
 
diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp
index 5cfaddca3..2804b1b78 100644
--- a/src/crypto/randomx/randomx.cpp
+++ b/src/crypto/randomx/randomx.cpp
@@ -267,7 +267,12 @@ void RandomX_ConfigurationBase::Apply()
 		}
 	}
 
-#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
+typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&);
+
+#define JIT_HANDLE(x, prev) do { \
+		const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \
+		memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \
+	} while (0)
 
 #elif defined(XMRIG_ARMv8)