Pooled allocation of RandomX VMs

+0.5% speedup on Zen2 when the whole L3 cache is used (16 threads on 3700X/3800X, 32 threads on 3950X).
This commit is contained in:
SChernykh 2020-04-07 18:31:35 +02:00
parent 7f01c5c6f3
commit 6ae37a9519
11 changed files with 68 additions and 61 deletions

View file

@ -118,7 +118,7 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
}
if (!m_vm) {
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly);
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_affinity);
}
}
#endif

View file

@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_DATASET_MAX_SIZE 2181038080
// Increase it if some configs use larger programs
#define RANDOMX_PROGRAM_MAX_SIZE 512
#define RANDOMX_PROGRAM_MAX_SIZE 320
// Increase it if some configs use larger scratchpad
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152

View file

@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#include "backend/cpu/Cpu.h"
#include "crypto/common/VirtualMemory.h"
#include <mutex>
#include <cassert>
@ -311,6 +313,8 @@ RandomX_ConfigurationKeva RandomX_KevaConfig;
alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig;
static std::mutex vm_pool_mutex;
extern "C" {
randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory) {
@ -395,45 +399,76 @@ extern "C" {
delete dataset;
}
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad) {
randomx_vm* randomx_create_vm(randomx_flags flags, randomx_cache* cache, randomx_dataset* dataset, uint8_t* scratchpad, int64_t affinity) {
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
assert(cache == nullptr || cache->isInitialized());
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
randomx_vm *vm = nullptr;
randomx_vm* vm = nullptr;
std::lock_guard<std::mutex> lock(vm_pool_mutex);
static uint8_t* vm_pool[64] = {};
static size_t vm_pool_offset[64] = {};
constexpr size_t VM_POOL_SIZE = 2 * 1024 * 1024;
uint32_t node = xmrig::VirtualMemory::bindToNUMANode(affinity);
if (node > 64) {
node = 0;
}
if (!vm_pool[node]) {
vm_pool[node] = (uint8_t*) xmrig::VirtualMemory::allocateLargePagesMemory(VM_POOL_SIZE);
if (!vm_pool[node]) {
vm_pool[node] = (uint8_t*) rx_aligned_alloc(VM_POOL_SIZE, 4096);
}
}
void* p = vm_pool[node] + vm_pool_offset[node];
size_t vm_size = 0;
try {
switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES)) {
case RANDOMX_FLAG_DEFAULT:
vm = new randomx::InterpretedLightVmDefault();
vm = new(p) randomx::InterpretedLightVmDefault();
vm_size = sizeof(randomx::InterpretedLightVmDefault);
break;
case RANDOMX_FLAG_FULL_MEM:
vm = new randomx::InterpretedVmDefault();
vm = new(p) randomx::InterpretedVmDefault();
vm_size = sizeof(randomx::InterpretedVmDefault);
break;
case RANDOMX_FLAG_JIT:
vm = new randomx::CompiledLightVmDefault();
vm = new(p) randomx::CompiledLightVmDefault();
vm_size = sizeof(randomx::CompiledLightVmDefault);
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
vm = new randomx::CompiledVmDefault();
vm = new(p) randomx::CompiledVmDefault();
vm_size = sizeof(randomx::CompiledVmDefault);
break;
case RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedLightVmHardAes();
vm = new(p) randomx::InterpretedLightVmHardAes();
vm_size = sizeof(randomx::InterpretedLightVmHardAes);
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedVmHardAes();
vm = new(p) randomx::InterpretedVmHardAes();
vm_size = sizeof(randomx::InterpretedVmHardAes);
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledLightVmHardAes();
vm = new(p) randomx::CompiledLightVmHardAes();
vm_size = sizeof(randomx::CompiledLightVmHardAes);
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
vm = new randomx::CompiledVmHardAes();
vm = new(p) randomx::CompiledVmHardAes();
vm_size = sizeof(randomx::CompiledVmHardAes);
break;
default:
@ -452,10 +487,16 @@ extern "C" {
vm->setFlags(flags);
}
catch (std::exception &ex) {
delete vm;
vm = nullptr;
}
if (vm) {
vm_pool_offset[node] += vm_size;
if (vm_pool_offset[node] + 4096 > VM_POOL_SIZE) {
vm_pool_offset[node] = 0;
}
}
return vm;
}
@ -471,9 +512,7 @@ extern "C" {
machine->setDataset(dataset);
}
void randomx_destroy_vm(randomx_vm *machine) {
assert(machine != nullptr);
delete machine;
void randomx_destroy_vm(randomx_vm*) {
}
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {

View file

@ -308,7 +308,7 @@ RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset);
* (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
* (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
*/
RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad);
RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset, uint8_t *scratchpad, int64_t affinity);
/**
* Reinitializes a virtual machine with a new Cache. This function should be called anytime

View file

@ -41,16 +41,8 @@ namespace randomx {
class CompiledVm : public VmBase<softAes>
{
public:
void* operator new(size_t size) {
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledVm));
}
void* operator new(size_t, void* ptr) { return ptr; }
void operator delete(void*) {}
void setDataset(randomx_dataset* dataset) override;
void run(void* seed) override;

View file

@ -37,16 +37,8 @@ namespace randomx {
class CompiledLightVm : public CompiledVm<softAes>
{
public:
void* operator new(size_t size) {
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledLightVm));
}
void* operator new(size_t, void* ptr) { return ptr; }
void operator delete(void*) {}
void setCache(randomx_cache* cache) override;
void setDataset(randomx_dataset* dataset) override { }

View file

@ -49,16 +49,8 @@ namespace randomx {
using VmBase<softAes>::datasetPtr;
using VmBase<softAes>::datasetOffset;
void* operator new(size_t size) {
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedVm));
}
void* operator new(size_t, void* ptr) { return ptr; }
void operator delete(void*) {}
void run(void* seed) override;
void setDataset(randomx_dataset* dataset) override;

View file

@ -39,16 +39,8 @@ namespace randomx {
using VmBase<softAes>::mem;
using VmBase<softAes>::cachePtr;
void* operator new(size_t size) {
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedLightVm));
}
void* operator new(size_t, void* ptr) { return ptr; }
void operator delete(void*) {}
void setDataset(randomx_dataset* dataset) override { }
void setCache(randomx_cache* cache) override;

View file

@ -31,7 +31,7 @@
#include "crypto/rx/RxVm.h"
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly)
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity)
{
if (!softAes) {
m_flags |= RANDOMX_FLAG_HARD_AES;
@ -53,7 +53,7 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::
m_flags |= RANDOMX_FLAG_AMD;
}
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad);
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, affinity);
}

View file

@ -50,7 +50,7 @@ class RxVm
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly);
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, int64_t affinity);
~RxVm();
inline randomx_vm *get() const { return m_vm; }

View file

@ -116,7 +116,7 @@ static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint3
return;
}
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE);
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, -1);
for (uint32_t nonce : bundle.nonces) {
*bundle.job.nonce() = nonce;