From 6ae37a9519f488ee682d054f91abe7166d474cfb Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 7 Apr 2020 18:31:35 +0200 Subject: [PATCH 1/5] Pooled allocation of RandomX VMs +0.5% speedup on Zen2 when the whole L3 cache is used (16 threads on 3700X/3800X, 32 threads on 3950X). --- src/backend/cpu/CpuWorker.cpp | 2 +- src/crypto/randomx/configuration.h | 2 +- src/crypto/randomx/randomx.cpp | 67 ++++++++++++++++----- src/crypto/randomx/randomx.h | 2 +- src/crypto/randomx/vm_compiled.hpp | 12 +--- src/crypto/randomx/vm_compiled_light.hpp | 12 +--- src/crypto/randomx/vm_interpreted.hpp | 12 +--- src/crypto/randomx/vm_interpreted_light.hpp | 12 +--- src/crypto/rx/RxVm.cpp | 4 +- src/crypto/rx/RxVm.h | 2 +- src/net/JobResults.cpp | 2 +- 11 files changed, 68 insertions(+), 61 deletions(-) diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 968978382..7d2aa671f 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -118,7 +118,7 @@ void xmrig::CpuWorker::allocateRandomX_VM() } if (!m_vm) { - m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly); + m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_affinity); } } #endif diff --git a/src/crypto/randomx/configuration.h b/src/crypto/randomx/configuration.h index 678cb2f8b..e51b2a92a 100644 --- a/src/crypto/randomx/configuration.h +++ b/src/crypto/randomx/configuration.h @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RANDOMX_DATASET_MAX_SIZE 2181038080 // Increase it if some configs use larger programs -#define RANDOMX_PROGRAM_MAX_SIZE 512 +#define RANDOMX_PROGRAM_MAX_SIZE 320 // Increase it if some configs use larger scratchpad #define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152 diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index a24c414f6..df9a3c99e 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #include "backend/cpu/Cpu.h" +#include "crypto/common/VirtualMemory.h" +#include #include @@ -311,6 +313,8 @@ RandomX_ConfigurationKeva RandomX_KevaConfig; alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig; +static std::mutex vm_pool_mutex; + extern "C" { randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory) { @@ -395,45 +399,76 @@ extern "C" { delete dataset; } - randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad) { + randomx_vm* randomx_create_vm(randomx_flags flags, randomx_cache* cache, randomx_dataset* dataset, uint8_t* scratchpad, int64_t affinity) { assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM)); assert(cache == nullptr || cache->isInitialized()); assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM)); - randomx_vm *vm = nullptr; + randomx_vm* vm = nullptr; + + std::lock_guard lock(vm_pool_mutex); + + static uint8_t* vm_pool[64] = {}; + static size_t vm_pool_offset[64] = {}; + + constexpr size_t VM_POOL_SIZE = 2 * 1024 * 1024; + + uint32_t node = xmrig::VirtualMemory::bindToNUMANode(affinity); + if (node > 64) { + node = 0; + } + + if (!vm_pool[node]) { + vm_pool[node] = (uint8_t*) xmrig::VirtualMemory::allocateLargePagesMemory(VM_POOL_SIZE); + if (!vm_pool[node]) { + vm_pool[node] = (uint8_t*) rx_aligned_alloc(VM_POOL_SIZE, 4096); + } + } + + + void* p = vm_pool[node] + vm_pool_offset[node]; + size_t vm_size = 0; try { switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES)) { case RANDOMX_FLAG_DEFAULT: - vm = new randomx::InterpretedLightVmDefault(); + vm = new(p) randomx::InterpretedLightVmDefault(); + vm_size = sizeof(randomx::InterpretedLightVmDefault); break; case RANDOMX_FLAG_FULL_MEM: - vm = new randomx::InterpretedVmDefault(); + vm = new(p) randomx::InterpretedVmDefault(); + vm_size = sizeof(randomx::InterpretedVmDefault); break; case RANDOMX_FLAG_JIT: - vm = new randomx::CompiledLightVmDefault(); + vm = new(p) randomx::CompiledLightVmDefault(); + vm_size = sizeof(randomx::CompiledLightVmDefault); break; case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT: - vm = new randomx::CompiledVmDefault(); + vm = new(p) randomx::CompiledVmDefault(); + vm_size = sizeof(randomx::CompiledVmDefault); break; case RANDOMX_FLAG_HARD_AES: - vm = new randomx::InterpretedLightVmHardAes(); + vm = new(p) randomx::InterpretedLightVmHardAes(); + vm_size = sizeof(randomx::InterpretedLightVmHardAes); break; case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES: - vm = new randomx::InterpretedVmHardAes(); + vm = new(p) randomx::InterpretedVmHardAes(); + vm_size = sizeof(randomx::InterpretedVmHardAes); break; case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: - vm = new randomx::CompiledLightVmHardAes(); + vm = new(p) randomx::CompiledLightVmHardAes(); + vm_size = sizeof(randomx::CompiledLightVmHardAes); break; case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES: - vm = new randomx::CompiledVmHardAes(); + vm = new(p) randomx::CompiledVmHardAes(); + vm_size = sizeof(randomx::CompiledVmHardAes); break; default: @@ -452,10 +487,16 @@ extern "C" { vm->setFlags(flags); } catch (std::exception &ex) { - delete vm; vm = nullptr; } + if (vm) { + vm_pool_offset[node] += vm_size; + if (vm_pool_offset[node] + 4096 > VM_POOL_SIZE) { + vm_pool_offset[node] = 0; + } + } + return vm; } @@ -471,9 +512,7 @@ extern "C" { machine->setDataset(dataset); } - void randomx_destroy_vm(randomx_vm *machine) { - assert(machine != nullptr); - delete machine; + void randomx_destroy_vm(randomx_vm*) { } void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) { diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 90c9520ef..aebe8dfd4 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -308,7 +308,7 @@ RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset); * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set */ -RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad); +RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad, int64_t affinity); /** * Reinitializes a virtual machine with a new Cache. This function should be called anytime diff --git a/src/crypto/randomx/vm_compiled.hpp b/src/crypto/randomx/vm_compiled.hpp index 6fa824155..22c269068 100644 --- a/src/crypto/randomx/vm_compiled.hpp +++ b/src/crypto/randomx/vm_compiled.hpp @@ -41,16 +41,8 @@ namespace randomx { class CompiledVm : public VmBase { public: - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(CompiledVm)); - } + void* operator new(size_t, void* ptr) { return ptr; } + void operator delete(void*) {} void setDataset(randomx_dataset* dataset) override; void run(void* seed) override; diff --git a/src/crypto/randomx/vm_compiled_light.hpp b/src/crypto/randomx/vm_compiled_light.hpp index 4d8638a82..6d11d60ad 100644 --- a/src/crypto/randomx/vm_compiled_light.hpp +++ b/src/crypto/randomx/vm_compiled_light.hpp @@ -37,16 +37,8 @@ namespace randomx { class CompiledLightVm : public CompiledVm { public: - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(CompiledLightVm)); - } + void* operator new(size_t, void* ptr) { return ptr; } + void operator delete(void*) {} void setCache(randomx_cache* cache) override; void setDataset(randomx_dataset* dataset) override { } diff --git a/src/crypto/randomx/vm_interpreted.hpp b/src/crypto/randomx/vm_interpreted.hpp index b369ab110..d928de749 100644 --- a/src/crypto/randomx/vm_interpreted.hpp +++ b/src/crypto/randomx/vm_interpreted.hpp @@ -49,16 +49,8 @@ namespace randomx { using VmBase::datasetPtr; using VmBase::datasetOffset; - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(InterpretedVm)); - } + void* operator new(size_t, void* ptr) { return ptr; } + void operator delete(void*) {} void run(void* seed) override; void setDataset(randomx_dataset* dataset) override; diff --git a/src/crypto/randomx/vm_interpreted_light.hpp b/src/crypto/randomx/vm_interpreted_light.hpp index c8abba2cf..bec7978b1 100644 --- a/src/crypto/randomx/vm_interpreted_light.hpp +++ b/src/crypto/randomx/vm_interpreted_light.hpp @@ -39,16 +39,8 @@ namespace randomx { using VmBase::mem; using VmBase::cachePtr; - void* operator new(size_t size) { - void* ptr = AlignedAllocator::allocMemory(size); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } - - void operator delete(void* ptr) { - AlignedAllocator::freeMemory(ptr, sizeof(InterpretedLightVm)); - } + void* operator new(size_t, void* ptr) { return ptr; } + void operator delete(void*) {} void setDataset(randomx_dataset* dataset) override { } void setCache(randomx_cache* cache) override; diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index 654775373..5382af316 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -31,7 +31,7 @@ #include "crypto/rx/RxVm.h" -xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly) +xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity) { if (!softAes) { m_flags |= RANDOMX_FLAG_HARD_AES; @@ -53,7 +53,7 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig:: m_flags |= RANDOMX_FLAG_AMD; } - m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad); + m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, affinity); } diff --git a/src/crypto/rx/RxVm.h b/src/crypto/rx/RxVm.h index 79c3b9d66..93603529e 100644 --- a/src/crypto/rx/RxVm.h +++ b/src/crypto/rx/RxVm.h @@ -50,7 +50,7 @@ class RxVm public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm); - RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly); + RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity); ~RxVm(); inline randomx_vm *get() const { return m_vm; } diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp index 49bff807f..306367171 100644 --- a/src/net/JobResults.cpp +++ b/src/net/JobResults.cpp @@ -116,7 +116,7 @@ static void getResults(JobBundle &bundle, std::vector &results, uint3 return; } - auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE); + auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, -1); for (uint32_t nonce : bundle.nonces) { *bundle.job.nonce() = nonce; From 69cbfd682a231134d6643f5f5cc31786f2eae9db Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 7 Apr 2020 18:46:22 +0200 Subject: [PATCH 2/5] Use node number instead of affinity --- src/backend/cpu/CpuWorker.cpp | 2 +- src/crypto/randomx/randomx.cpp | 7 +++---- src/crypto/randomx/randomx.h | 2 +- src/crypto/rx/RxVm.cpp | 4 ++-- src/crypto/rx/RxVm.h | 2 +- src/net/JobResults.cpp | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 7d2aa671f..756c34eac 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -118,7 +118,7 @@ void xmrig::CpuWorker::allocateRandomX_VM() } if (!m_vm) { - m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_affinity); + m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node); } } #endif diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index df9a3c99e..ae960df03 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -399,25 +399,24 @@ extern "C" { delete dataset; } - randomx_vm* randomx_create_vm(randomx_flags flags, randomx_cache* cache, randomx_dataset* dataset, uint8_t* scratchpad, int64_t affinity) { + randomx_vm* randomx_create_vm(randomx_flags flags, randomx_cache* cache, randomx_dataset* dataset, uint8_t* scratchpad, uint32_t node) { assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM)); assert(cache == nullptr || cache->isInitialized()); assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM)); randomx_vm* vm = nullptr; - std::lock_guard lock(vm_pool_mutex); - static uint8_t* vm_pool[64] = {}; static size_t vm_pool_offset[64] = {}; constexpr size_t VM_POOL_SIZE = 2 * 1024 * 1024; - uint32_t node = xmrig::VirtualMemory::bindToNUMANode(affinity); if (node > 64) { node = 0; } + std::lock_guard lock(vm_pool_mutex); + if (!vm_pool[node]) { vm_pool[node] = (uint8_t*) xmrig::VirtualMemory::allocateLargePagesMemory(VM_POOL_SIZE); if (!vm_pool[node]) { diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index aebe8dfd4..3ca65c8fd 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -308,7 +308,7 @@ RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset); * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set */ -RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad, int64_t affinity); +RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad, uint32_t node); /** * Reinitializes a virtual machine with a new Cache. This function should be called anytime diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index 5382af316..8a354616f 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -31,7 +31,7 @@ #include "crypto/rx/RxVm.h" -xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity) +xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node) { if (!softAes) { m_flags |= RANDOMX_FLAG_HARD_AES; @@ -53,7 +53,7 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig:: m_flags |= RANDOMX_FLAG_AMD; } - m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, affinity); + m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node); } diff --git a/src/crypto/rx/RxVm.h b/src/crypto/rx/RxVm.h index 93603529e..519a5f2ef 100644 --- a/src/crypto/rx/RxVm.h +++ b/src/crypto/rx/RxVm.h @@ -50,7 +50,7 @@ class RxVm public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm); - RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity); + RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node); ~RxVm(); inline randomx_vm *get() const { return m_vm; } diff --git a/src/net/JobResults.cpp b/src/net/JobResults.cpp index 306367171..6103d40c1 100644 --- a/src/net/JobResults.cpp +++ b/src/net/JobResults.cpp @@ -116,7 +116,7 @@ static void getResults(JobBundle &bundle, std::vector &results, uint3 return; } - auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, -1); + auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, 0); for (uint32_t nonce : bundle.nonces) { *bundle.job.nonce() = nonce; From 4d0edde66db0e88034ddf6a3152a870543132552 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 7 Apr 2020 18:48:02 +0200 Subject: [PATCH 3/5] Fixed pool lock --- src/crypto/randomx/randomx.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index ae960df03..f5b4e682d 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -406,6 +406,8 @@ extern "C" { randomx_vm* vm = nullptr; + std::lock_guard lock(vm_pool_mutex); + static uint8_t* vm_pool[64] = {}; static size_t vm_pool_offset[64] = {}; @@ -415,8 +417,6 @@ extern "C" { node = 0; } - std::lock_guard lock(vm_pool_mutex); - if (!vm_pool[node]) { vm_pool[node] = (uint8_t*) xmrig::VirtualMemory::allocateLargePagesMemory(VM_POOL_SIZE); if (!vm_pool[node]) { From 39bd3ca1dae01f97b9a61bd83fd4a26a529fa1ac Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 7 Apr 2020 18:53:08 +0200 Subject: [PATCH 4/5] Fix off-by-one error --- src/crypto/randomx/randomx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index f5b4e682d..b1747cbc3 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -413,7 +413,7 @@ extern "C" { constexpr size_t VM_POOL_SIZE = 2 * 1024 * 1024; - if (node > 64) { + if (node >= 64) { node = 0; } From 92810ad7612b79a97383322621d12e6fff3c27fe Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 8 Apr 2020 08:31:53 +0200 Subject: [PATCH 5/5] Fixed VM destruction --- src/crypto/randomx/randomx.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index b1747cbc3..f0adf5414 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -511,7 +511,8 @@ extern "C" { machine->setDataset(dataset); } - void randomx_destroy_vm(randomx_vm*) { + void randomx_destroy_vm(randomx_vm* vm) { + vm->~randomx_vm(); } void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {