RandomX: added huge-pages-jit config parameter

Set to false by default, gives 0.2% boost on Ryzen 7 3700X with 16 threads, but hashrate might be unstable on Ryzen between launches. Use with caution.
This commit is contained in:
SChernykh 2020-10-07 17:42:55 +02:00
parent a8466a139c
commit 44dcded866
23 changed files with 62 additions and 26 deletions

View file

@ -36,6 +36,7 @@ namespace xmrig {
static const char *kEnabled = "enabled"; static const char *kEnabled = "enabled";
static const char *kHugePages = "huge-pages"; static const char *kHugePages = "huge-pages";
static const char *kHugePagesJit = "huge-pages-jit";
static const char *kHwAes = "hw-aes"; static const char *kHwAes = "hw-aes";
static const char *kMaxThreadsHint = "max-threads-hint"; static const char *kMaxThreadsHint = "max-threads-hint";
static const char *kMemoryPool = "memory-pool"; static const char *kMemoryPool = "memory-pool";
@ -76,6 +77,7 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
obj.AddMember(StringRef(kEnabled), m_enabled, allocator); obj.AddMember(StringRef(kEnabled), m_enabled, allocator);
obj.AddMember(StringRef(kHugePages), m_hugePages, allocator); obj.AddMember(StringRef(kHugePages), m_hugePages, allocator);
obj.AddMember(StringRef(kHugePagesJit), m_hugePagesJit, allocator);
obj.AddMember(StringRef(kHwAes), m_aes == AES_AUTO ? Value(kNullType) : Value(m_aes == AES_HW), allocator); obj.AddMember(StringRef(kHwAes), m_aes == AES_AUTO ? Value(kNullType) : Value(m_aes == AES_HW), allocator);
obj.AddMember(StringRef(kPriority), priority() != -1 ? Value(priority()) : Value(kNullType), allocator); obj.AddMember(StringRef(kPriority), priority() != -1 ? Value(priority()) : Value(kNullType), allocator);
obj.AddMember(StringRef(kMemoryPool), m_memoryPool < 1 ? Value(m_memoryPool < 0) : Value(m_memoryPool), allocator); obj.AddMember(StringRef(kMemoryPool), m_memoryPool < 1 ? Value(m_memoryPool < 0) : Value(m_memoryPool), allocator);
@ -134,6 +136,7 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value)
if (value.IsObject()) { if (value.IsObject()) {
m_enabled = Json::getBool(value, kEnabled, m_enabled); m_enabled = Json::getBool(value, kEnabled, m_enabled);
m_hugePages = Json::getBool(value, kHugePages, m_hugePages); m_hugePages = Json::getBool(value, kHugePages, m_hugePages);
m_hugePagesJit = Json::getBool(value, kHugePagesJit, m_hugePagesJit);
m_limit = Json::getUint(value, kMaxThreadsHint, m_limit); m_limit = Json::getUint(value, kMaxThreadsHint, m_limit);
m_yield = Json::getBool(value, kYield, m_yield); m_yield = Json::getBool(value, kYield, m_yield);

View file

@ -54,6 +54,7 @@ public:
inline bool isEnabled() const { return m_enabled; } inline bool isEnabled() const { return m_enabled; }
inline bool isHugePages() const { return m_hugePages; } inline bool isHugePages() const { return m_hugePages; }
inline bool isHugePagesJit() const { return m_hugePagesJit; }
inline bool isShouldSave() const { return m_shouldSave; } inline bool isShouldSave() const { return m_shouldSave; }
inline bool isYield() const { return m_yield; } inline bool isYield() const { return m_yield; }
inline const Assembly &assembly() const { return m_assembly; } inline const Assembly &assembly() const { return m_assembly; }
@ -76,6 +77,7 @@ private:
bool m_astrobwtAVX2 = false; bool m_astrobwtAVX2 = false;
bool m_enabled = true; bool m_enabled = true;
bool m_hugePages = true; bool m_hugePages = true;
bool m_hugePagesJit = false;
bool m_shouldSave = false; bool m_shouldSave = false;
bool m_yield = true; bool m_yield = true;
int m_astrobwtMaxSize = 550; int m_astrobwtMaxSize = 550;

View file

@ -27,6 +27,7 @@
"cpu": { "cpu": {
"enabled": true, "enabled": true,
"huge-pages": true, "huge-pages": true,
"huge-pages-jit": false,
"hw-aes": null, "hw-aes": null,
"priority": null, "priority": null,
"memory-pool": false, "memory-pool": false,

View file

@ -61,6 +61,7 @@ R"===(
"cpu": { "cpu": {
"enabled": true, "enabled": true,
"huge-pages": true, "huge-pages": true,
"huge-pages-jit": false,
"hw-aes": null, "hw-aes": null,
"priority": null, "priority": null,
"memory-pool": false, "memory-pool": false,

View file

@ -39,7 +39,7 @@ void xmrig::CnCtx::create(cryptonight_ctx **ctx, uint8_t *memory, size_t size, s
cryptonight_ctx *c = static_cast<cryptonight_ctx *>(_mm_malloc(sizeof(cryptonight_ctx), 4096)); cryptonight_ctx *c = static_cast<cryptonight_ctx *>(_mm_malloc(sizeof(cryptonight_ctx), 4096));
c->memory = memory + (i * size); c->memory = memory + (i * size);
c->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(VirtualMemory::allocateExecutableMemory(0x4000)); c->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(VirtualMemory::allocateExecutableMemory(0x4000, false));
c->generated_code_data.algo = Algorithm::INVALID; c->generated_code_data.algo = Algorithm::INVALID;
c->generated_code_data.height = std::numeric_limits<uint64_t>::max(); c->generated_code_data.height = std::numeric_limits<uint64_t>::max();

View file

@ -139,7 +139,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
static void patchAsmVariants() static void patchAsmVariants()
{ {
const int allocation_size = 81920; const int allocation_size = 81920;
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size)); auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000); cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000); cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);

View file

@ -47,7 +47,11 @@ xmrig::MemoryPool::MemoryPool(size_t size, bool hugePages, uint32_t node)
return; return;
} }
m_memory = new VirtualMemory(size * pageSize, hugePages, false, false, node); constexpr size_t alignment = 1 << 24;
m_memory = new VirtualMemory(size * pageSize + alignment, hugePages, false, false, node);
m_alignOffset = (alignment - (((size_t)m_memory->scratchpad()) % alignment)) % alignment;
} }
@ -71,7 +75,7 @@ uint8_t *xmrig::MemoryPool::get(size_t size, uint32_t)
return nullptr; return nullptr;
} }
uint8_t *out = m_memory->scratchpad() + m_offset; uint8_t *out = m_memory->scratchpad() + m_alignOffset + m_offset;
m_offset += size; m_offset += size;
++m_refs; ++m_refs;

View file

@ -54,6 +54,7 @@ protected:
private: private:
size_t m_refs = 0; size_t m_refs = 0;
size_t m_offset = 0; size_t m_offset = 0;
size_t m_alignOffset = 0;
VirtualMemory *m_memory = nullptr; VirtualMemory *m_memory = nullptr;
}; };

View file

@ -61,7 +61,7 @@ public:
static bool isHugepagesAvailable(); static bool isHugepagesAvailable();
static bool isOneGbPagesAvailable(); static bool isOneGbPagesAvailable();
static uint32_t bindToNUMANode(int64_t affinity); static uint32_t bindToNUMANode(int64_t affinity);
static void *allocateExecutableMemory(size_t size); static void *allocateExecutableMemory(size_t size, bool hugePages);
static void *allocateLargePagesMemory(size_t size); static void *allocateLargePagesMemory(size_t size);
static void *allocateOneGbPagesMemory(size_t size); static void *allocateOneGbPagesMemory(size_t size);
static void destroy(); static void destroy();

View file

@ -63,7 +63,7 @@ bool xmrig::VirtualMemory::isOneGbPagesAvailable()
} }
void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages)
{ {
# if defined(__APPLE__) # if defined(__APPLE__)
void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@ -77,7 +77,12 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size)
constexpr int flag_2mb = 0; constexpr int flag_2mb = 0;
# endif # endif
void *mem = mmap(0, align(size), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | flag_2mb, -1, 0); void *mem = nullptr;
if (hugePages) {
mem = mmap(0, align(size), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | flag_2mb, -1, 0);
}
if (!mem) { if (!mem) {
void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
} }

View file

@ -162,9 +162,13 @@ bool xmrig::VirtualMemory::isOneGbPagesAvailable()
} }
void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size) void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages)
{ {
void* result = VirtualAlloc(nullptr, align(size), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_EXECUTE_READWRITE); void* result = nullptr;
if (hugePages) {
result = VirtualAlloc(nullptr, align(size), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_EXECUTE_READWRITE);
}
if (!result) { if (!result) {
result = VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); result = VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);

View file

@ -43,7 +43,7 @@ struct randomx_dataset {
/* Global scope for C binding */ /* Global scope for C binding */
struct randomx_cache { struct randomx_cache {
uint8_t* memory = nullptr; uint8_t* memory = nullptr;
randomx::JitCompiler* jit; randomx::JitCompiler* jit = nullptr;
randomx::CacheInitializeFunc* initialize; randomx::CacheInitializeFunc* initialize;
randomx::DatasetInitFunc* datasetInit; randomx::DatasetInitFunc* datasetInit;
randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES]; randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES];

View file

@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/reciprocal.h" #include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp" #include "crypto/randomx/virtual_memory.hpp"
void randomx_set_huge_pages_jit(bool) {}
namespace ARMV8A { namespace ARMV8A {
constexpr uint32_t B = 0x14000000; constexpr uint32_t B = 0x14000000;
@ -89,7 +91,7 @@ static size_t CalcDatasetItemSize()
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
JitCompilerA64::JitCompilerA64() JitCompilerA64::JitCompilerA64(bool)
: code((uint8_t*) allocExecutableMemory(CodeSize + CalcDatasetItemSize())) : code((uint8_t*) allocExecutableMemory(CodeSize + CalcDatasetItemSize()))
, literalPos(ImulRcpLiteralsEnd) , literalPos(ImulRcpLiteralsEnd)
, num32bitLiterals(0) , num32bitLiterals(0)

View file

@ -46,7 +46,7 @@ namespace randomx {
class JitCompilerA64 { class JitCompilerA64 {
public: public:
JitCompilerA64(); explicit JitCompilerA64(bool);
~JitCompilerA64(); ~JitCompilerA64();
void prepare() {} void prepare() {}

View file

@ -41,7 +41,7 @@ namespace randomx {
class JitCompilerFallback { class JitCompilerFallback {
public: public:
JitCompilerFallback() { explicit JitCompilerFallback(bool) {
throw std::runtime_error("JIT compilation is not supported on this platform"); throw std::runtime_error("JIT compilation is not supported on this platform");
} }
void prepare() {} void prepare() {}

View file

@ -49,6 +49,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# include <cpuid.h> # include <cpuid.h>
#endif #endif
static bool hugePagesJIT = false;
void randomx_set_huge_pages_jit(bool hugePages)
{
hugePagesJIT = hugePages;
}
namespace randomx { namespace randomx {
/* /*
@ -175,8 +182,9 @@ namespace randomx {
# endif # endif
static std::atomic<size_t> codeOffset; static std::atomic<size_t> codeOffset;
constexpr size_t codeOffsetIncrement = 59 * 64;
JitCompilerX86::JitCompilerX86() { JitCompilerX86::JitCompilerX86(bool hugePagesEnable) {
BranchesWithin32B = xmrig::Cpu::info()->jccErratum(); BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
int32_t info[4]; int32_t info[4];
@ -186,9 +194,11 @@ namespace randomx {
cpuid(0x80000001, info); cpuid(0x80000001, info);
hasXOP = ((info[2] & (1 << 11)) != 0); hasXOP = ((info[2] & (1 << 11)) != 0);
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2); allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2, hugePagesJIT && hugePagesEnable);
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets // Shift code base address to improve caching - all threads will use different L2/L3 cache sets
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize); code = allocatedCode + (codeOffset.fetch_add(codeOffsetIncrement) % CodeSize);
memcpy(code, codePrologue, prologueSize); memcpy(code, codePrologue, prologueSize);
if (hasXOP) { if (hasXOP) {
memcpy(code + prologueSize, codeLoopLoadXOP, loopLoadXOPSize); memcpy(code + prologueSize, codeLoopLoadXOP, loopLoadXOPSize);
@ -207,6 +217,7 @@ namespace randomx {
} }
JitCompilerX86::~JitCompilerX86() { JitCompilerX86::~JitCompilerX86() {
codeOffset.fetch_sub(codeOffsetIncrement);
freePagedMemory(allocatedCode, CodeSize); freePagedMemory(allocatedCode, CodeSize);
} }

View file

@ -47,7 +47,7 @@ namespace randomx {
class JitCompilerX86 { class JitCompilerX86 {
public: public:
JitCompilerX86(); explicit JitCompilerX86(bool hugePagesEnable);
~JitCompilerX86(); ~JitCompilerX86();
void prepare(); void prepare();
void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgram(Program&, ProgramConfiguration&, uint32_t);

View file

@ -381,7 +381,7 @@ extern "C" {
break; break;
case RANDOMX_FLAG_JIT: case RANDOMX_FLAG_JIT:
cache->jit = new randomx::JitCompiler(); cache->jit = new randomx::JitCompiler(false);
cache->initialize = &randomx::initCacheCompile; cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc(); cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = memory; cache->memory = memory;

View file

@ -169,6 +169,7 @@ void randomx_apply_config(const T& config)
} }
void randomx_set_scratchpad_prefetch_mode(int mode); void randomx_set_scratchpad_prefetch_mode(int mode);
void randomx_set_huge_pages_jit(bool hugePages);
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {

View file

@ -33,8 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/virtual_memory.hpp" #include "crypto/randomx/virtual_memory.hpp"
void* allocExecutableMemory(std::size_t bytes) { void* allocExecutableMemory(std::size_t bytes, bool hugePages) {
void *mem = xmrig::VirtualMemory::allocateExecutableMemory(bytes); void *mem = xmrig::VirtualMemory::allocateExecutableMemory(bytes, hugePages);
if (mem == nullptr) { if (mem == nullptr) {
throw std::runtime_error("Failed to allocate executable memory"); throw std::runtime_error("Failed to allocate executable memory");
} }

View file

@ -30,6 +30,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef> #include <cstddef>
void* allocExecutableMemory(std::size_t); void* allocExecutableMemory(std::size_t, bool);
void* allocLargePagesMemory(std::size_t); void* allocLargePagesMemory(std::size_t);
void freePagedMemory(void*, std::size_t); void freePagedMemory(void*, std::size_t);

View file

@ -58,7 +58,7 @@ namespace randomx {
protected: protected:
void execute(); void execute();
JitCompiler compiler; JitCompiler compiler{ true };
}; };
using CompiledVmDefault = CompiledVm<1>; using CompiledVmDefault = CompiledVm<1>;

View file

@ -100,6 +100,7 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
} }
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode()); randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
if (isReady(seed)) { if (isReady(seed)) {
return true; return true;