mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 10:01:06 +00:00
Optimized dataset read for Ryzen CPUs
Removed register dependency in dataset read, +0.8% speedup on average.
This commit is contained in:
parent
4dec063472
commit
d0df824599
17 changed files with 81 additions and 20 deletions
|
@ -96,7 +96,7 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_vm) {
|
if (!m_vm) {
|
||||||
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES);
|
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
19
src/crypto/randomx/asm/program_read_dataset_ryzen.inc
Normal file
19
src/crypto/randomx/asm/program_read_dataset_ryzen.inc
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
mov rcx, rbp ;# ecx = ma
|
||||||
|
shr rcx, 32
|
||||||
|
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||||
|
xor rbp, rax ;# modify "mx"
|
||||||
|
mov rax, qword ptr [rdi+rcx]
|
||||||
|
mov edx, ebp ;# edx = mx
|
||||||
|
and edx, RANDOMX_DATASET_BASE_MASK
|
||||||
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
|
add rcx, rdi ;# dataset cache line
|
||||||
|
xor r8, rax
|
||||||
|
xor r9, qword ptr [rcx+8]
|
||||||
|
xor r10, qword ptr [rcx+16]
|
||||||
|
xor r11, qword ptr [rcx+24]
|
||||||
|
xor r12, qword ptr [rcx+32]
|
||||||
|
xor r13, qword ptr [rcx+40]
|
||||||
|
xor r14, qword ptr [rcx+48]
|
||||||
|
xor r15, qword ptr [rcx+56]
|
||||||
|
|
|
@ -118,7 +118,7 @@ static void clear_code_cache(char* p1, char* p2)
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config)
|
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t)
|
||||||
{
|
{
|
||||||
uint32_t codePos = MainLoopBegin + 4;
|
uint32_t codePos = MainLoopBegin + 4;
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ namespace randomx {
|
||||||
JitCompilerA64();
|
JitCompilerA64();
|
||||||
~JitCompilerA64();
|
~JitCompilerA64();
|
||||||
|
|
||||||
void generateProgram(Program&, ProgramConfiguration&);
|
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||||
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
|
|
|
@ -44,7 +44,7 @@ namespace randomx {
|
||||||
JitCompilerFallback() {
|
JitCompilerFallback() {
|
||||||
throw std::runtime_error("JIT compilation is not supported on this platform");
|
throw std::runtime_error("JIT compilation is not supported on this platform");
|
||||||
}
|
}
|
||||||
void generateProgram(Program&, ProgramConfiguration&) {
|
void generateProgram(Program&, ProgramConfiguration&, uint32_t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
|
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
|
||||||
|
|
|
@ -89,7 +89,6 @@ namespace randomx {
|
||||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
|
||||||
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||||
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||||
|
@ -105,7 +104,6 @@ namespace randomx {
|
||||||
const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad;
|
const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad;
|
||||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||||
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
|
|
||||||
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
||||||
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
||||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||||
|
@ -301,10 +299,22 @@ namespace randomx {
|
||||||
freePagedMemory(allocatedCode, CodeSize);
|
freePagedMemory(allocatedCode, CodeSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||||
generateProgramPrologue(prog, pcfg);
|
generateProgramPrologue(prog, pcfg);
|
||||||
memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize);
|
|
||||||
codePos += readDatasetSize;
|
uint8_t* p;
|
||||||
|
uint32_t n;
|
||||||
|
if (flags & RANDOMX_FLAG_RYZEN) {
|
||||||
|
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
|
||||||
|
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p = RandomX_CurrentConfig.codeReadDatasetTweaked;
|
||||||
|
n = RandomX_CurrentConfig.codeReadDatasetTweakedSize;
|
||||||
|
}
|
||||||
|
memcpy(code + codePos, p, n);
|
||||||
|
codePos += n;
|
||||||
|
|
||||||
generateProgramEpilogue(prog, pcfg);
|
generateProgramEpilogue(prog, pcfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ namespace randomx {
|
||||||
public:
|
public:
|
||||||
JitCompilerX86();
|
JitCompilerX86();
|
||||||
~JitCompilerX86();
|
~JitCompilerX86();
|
||||||
void generateProgram(Program&, ProgramConfiguration&);
|
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
|
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
.global DECL(randomx_program_loop_load)
|
.global DECL(randomx_program_loop_load)
|
||||||
.global DECL(randomx_program_start)
|
.global DECL(randomx_program_start)
|
||||||
.global DECL(randomx_program_read_dataset)
|
.global DECL(randomx_program_read_dataset)
|
||||||
|
.global DECL(randomx_program_read_dataset_ryzen)
|
||||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||||
.global DECL(randomx_program_loop_store)
|
.global DECL(randomx_program_loop_store)
|
||||||
|
@ -110,6 +111,9 @@ DECL(randomx_program_start):
|
||||||
DECL(randomx_program_read_dataset):
|
DECL(randomx_program_read_dataset):
|
||||||
#include "asm/program_read_dataset.inc"
|
#include "asm/program_read_dataset.inc"
|
||||||
|
|
||||||
|
DECL(randomx_program_read_dataset_ryzen):
|
||||||
|
#include "asm/program_read_dataset_ryzen.inc"
|
||||||
|
|
||||||
DECL(randomx_program_read_dataset_sshash_init):
|
DECL(randomx_program_read_dataset_sshash_init):
|
||||||
#include "asm/program_read_dataset_sshash_init.inc"
|
#include "asm/program_read_dataset_sshash_init.inc"
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ PUBLIC randomx_program_loop_begin
|
||||||
PUBLIC randomx_program_loop_load
|
PUBLIC randomx_program_loop_load
|
||||||
PUBLIC randomx_program_start
|
PUBLIC randomx_program_start
|
||||||
PUBLIC randomx_program_read_dataset
|
PUBLIC randomx_program_read_dataset
|
||||||
|
PUBLIC randomx_program_read_dataset_ryzen
|
||||||
PUBLIC randomx_program_read_dataset_sshash_init
|
PUBLIC randomx_program_read_dataset_sshash_init
|
||||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||||
PUBLIC randomx_dataset_init
|
PUBLIC randomx_dataset_init
|
||||||
|
@ -103,6 +104,10 @@ randomx_program_read_dataset PROC
|
||||||
include asm/program_read_dataset.inc
|
include asm/program_read_dataset.inc
|
||||||
randomx_program_read_dataset ENDP
|
randomx_program_read_dataset ENDP
|
||||||
|
|
||||||
|
randomx_program_read_dataset_ryzen PROC
|
||||||
|
include asm/program_read_dataset_ryzen.inc
|
||||||
|
randomx_program_read_dataset_ryzen ENDP
|
||||||
|
|
||||||
randomx_program_read_dataset_sshash_init PROC
|
randomx_program_read_dataset_sshash_init PROC
|
||||||
include asm/program_read_dataset_sshash_init.inc
|
include asm/program_read_dataset_sshash_init.inc
|
||||||
randomx_program_read_dataset_sshash_init ENDP
|
randomx_program_read_dataset_sshash_init ENDP
|
||||||
|
|
|
@ -37,6 +37,7 @@ extern "C" {
|
||||||
void randomx_program_loop_load();
|
void randomx_program_loop_load();
|
||||||
void randomx_program_start();
|
void randomx_program_start();
|
||||||
void randomx_program_read_dataset();
|
void randomx_program_read_dataset();
|
||||||
|
void randomx_program_read_dataset_ryzen();
|
||||||
void randomx_program_read_dataset_sshash_init();
|
void randomx_program_read_dataset_sshash_init();
|
||||||
void randomx_program_read_dataset_sshash_fin();
|
void randomx_program_read_dataset_sshash_fin();
|
||||||
void randomx_program_loop_store();
|
void randomx_program_loop_store();
|
||||||
|
|
|
@ -157,8 +157,15 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
|
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
|
||||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||||
memcpy(codeReadDatasetTweaked, a, b - a);
|
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||||
|
codeReadDatasetTweakedSize = b - a;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_ryzen;
|
||||||
|
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||||
|
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||||
|
codeReadDatasetRyzenTweakedSize = b - a;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||||
|
@ -191,10 +198,11 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||||
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
// Not needed right now because all variants use default dataset base size
|
||||||
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
|
//const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
||||||
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
//*(uint32_t*)(codeReadDatasetTweaked + 9) = DatasetBaseMask;
|
||||||
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
//*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
|
||||||
|
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||||
|
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||||
|
@ -435,6 +443,7 @@ extern "C" {
|
||||||
}
|
}
|
||||||
|
|
||||||
vm->setScratchpad(scratchpad);
|
vm->setScratchpad(scratchpad);
|
||||||
|
vm->setFlags(flags);
|
||||||
}
|
}
|
||||||
catch (std::exception &ex) {
|
catch (std::exception &ex) {
|
||||||
delete vm;
|
delete vm;
|
||||||
|
|
|
@ -49,6 +49,7 @@ enum randomx_flags {
|
||||||
RANDOMX_FLAG_FULL_MEM = 4,
|
RANDOMX_FLAG_FULL_MEM = 4,
|
||||||
RANDOMX_FLAG_JIT = 8,
|
RANDOMX_FLAG_JIT = 8,
|
||||||
RANDOMX_FLAG_1GB_PAGES = 16,
|
RANDOMX_FLAG_1GB_PAGES = 16,
|
||||||
|
RANDOMX_FLAG_RYZEN = 64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,7 +119,10 @@ struct RandomX_ConfigurationBase
|
||||||
rx_vec_i128 fillAes4Rx4_Key[8];
|
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||||
|
|
||||||
uint8_t codeShhPrefetchTweaked[20];
|
uint8_t codeShhPrefetchTweaked[20];
|
||||||
uint8_t codeReadDatasetTweaked[64];
|
uint8_t codeReadDatasetTweaked[72];
|
||||||
|
uint32_t codeReadDatasetTweakedSize;
|
||||||
|
uint8_t codeReadDatasetRyzenTweaked[72];
|
||||||
|
uint32_t codeReadDatasetRyzenTweakedSize;
|
||||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||||
uint8_t codePrefetchScratchpadTweaked[32];
|
uint8_t codePrefetchScratchpadTweaked[32];
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,9 @@ public:
|
||||||
virtual void run(void* seed) = 0;
|
virtual void run(void* seed) = 0;
|
||||||
void resetRoundingMode();
|
void resetRoundingMode();
|
||||||
|
|
||||||
|
void setFlags(uint32_t flags) { vm_flags = flags; }
|
||||||
|
uint32_t getFlags() const { return vm_flags; }
|
||||||
|
|
||||||
randomx::RegisterFile *getRegisterFile() {
|
randomx::RegisterFile *getRegisterFile() {
|
||||||
return ®
|
return ®
|
||||||
}
|
}
|
||||||
|
@ -71,6 +74,7 @@ protected:
|
||||||
randomx_dataset* datasetPtr;
|
randomx_dataset* datasetPtr;
|
||||||
};
|
};
|
||||||
uint64_t datasetOffset;
|
uint64_t datasetOffset;
|
||||||
|
uint32_t vm_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
|
@ -43,7 +43,7 @@ namespace randomx {
|
||||||
void CompiledVm<softAes>::run(void* seed) {
|
void CompiledVm<softAes>::run(void* seed) {
|
||||||
VmBase<softAes>::generateProgram(seed);
|
VmBase<softAes>::generateProgram(seed);
|
||||||
randomx_vm::initialize();
|
randomx_vm::initialize();
|
||||||
compiler.generateProgram(program, config);
|
compiler.generateProgram(program, config, getFlags());
|
||||||
mem.memory = datasetPtr->memory + datasetOffset;
|
mem.memory = datasetPtr->memory + datasetOffset;
|
||||||
execute();
|
execute();
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
#include "crypto/rx/RxVm.h"
|
#include "crypto/rx/RxVm.h"
|
||||||
|
|
||||||
|
|
||||||
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes)
|
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly)
|
||||||
{
|
{
|
||||||
if (!softAes) {
|
if (!softAes) {
|
||||||
m_flags |= RANDOMX_FLAG_HARD_AES;
|
m_flags |= RANDOMX_FLAG_HARD_AES;
|
||||||
|
@ -45,6 +45,10 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes)
|
||||||
m_flags |= RANDOMX_FLAG_JIT;
|
m_flags |= RANDOMX_FLAG_JIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (assembly == Assembly::RYZEN) {
|
||||||
|
m_flags |= RANDOMX_FLAG_RYZEN;
|
||||||
|
}
|
||||||
|
|
||||||
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad);
|
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
|
|
||||||
#include "base/tools/Object.h"
|
#include "base/tools/Object.h"
|
||||||
|
#include "backend/cpu/Cpu.h"
|
||||||
|
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
@ -49,7 +50,7 @@ class RxVm
|
||||||
public:
|
public:
|
||||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
|
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
|
||||||
|
|
||||||
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes);
|
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly);
|
||||||
~RxVm();
|
~RxVm();
|
||||||
|
|
||||||
inline randomx_vm *get() const { return m_vm; }
|
inline randomx_vm *get() const { return m_vm; }
|
||||||
|
|
|
@ -117,7 +117,7 @@ static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint3
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES);
|
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE);
|
||||||
|
|
||||||
for (uint32_t nonce : bundle.nonces) {
|
for (uint32_t nonce : bundle.nonces) {
|
||||||
*bundle.job.nonce() = nonce;
|
*bundle.job.nonce() = nonce;
|
||||||
|
|
Loading…
Reference in a new issue