Merge pull request #2395 from SChernykh/dev

RandomX: rewrote dataset read code
This commit is contained in:
xmrig 2021-05-20 18:58:48 +07:00 committed by GitHub
commit 3bfa5ea038
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 16 additions and 69 deletions

View file

@ -22,6 +22,7 @@
mov rsi, rdx ;# uint8_t* scratchpad mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp mov rax, rbp
ror rbp, 32
;# zero integer registers ;# zero integer registers
xor r8, r8 xor r8, r8

View file

@ -35,6 +35,7 @@
mov rbx, r9 ;# loop counter mov rbx, r9 ;# loop counter
mov rax, rbp mov rax, rbp
ror rbp, 32
;# zero integer registers ;# zero integer registers
xor r8, r8 xor r8, r8

View file

@ -1,17 +1,16 @@
mov ecx, ebp ;# ecx = ma
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx" xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx] prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx" xor r9, qword ptr [rdi+rcx+8]
mov edx, ebp ;# edx = ma xor r10, qword ptr [rdi+rcx+16]
and edx, RANDOMX_DATASET_BASE_MASK xor r11, qword ptr [rdi+rcx+24]
lea rcx, [rdi+rdx] ;# dataset cache line xor r12, qword ptr [rdi+rcx+32]
xor r8, qword ptr [rcx+0] xor r13, qword ptr [rdi+rcx+40]
xor r9, qword ptr [rcx+8] xor r14, qword ptr [rdi+rcx+48]
xor r10, qword ptr [rcx+16] xor r15, qword ptr [rdi+rcx+56]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View file

@ -1,17 +0,0 @@
mov rcx, rbp ;# ecx = ma
shr rcx, 32
and ecx, RANDOMX_DATASET_BASE_MASK
xor r8, qword ptr [rdi+rcx]
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]

View file

@ -115,6 +115,7 @@ namespace randomx {
#define codeLoopLoad ADDR(randomx_program_loop_load) #define codeLoopLoad ADDR(randomx_program_loop_load)
#define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop) #define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop)
#define codeProgamStart ADDR(randomx_program_start) #define codeProgamStart ADDR(randomx_program_start)
#define codeReadDataset ADDR(randomx_program_read_dataset)
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init) #define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin) #define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
#define codeDatasetInit ADDR(randomx_dataset_init) #define codeDatasetInit ADDR(randomx_dataset_init)
@ -135,6 +136,7 @@ namespace randomx {
#define prologueSize (codeLoopBegin - codePrologue) #define prologueSize (codeLoopBegin - codePrologue)
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad) #define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP) #define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
#define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset)
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit) #define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin) #define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
#define loopStoreSize (codeLoopEnd - codeLoopStore) #define loopStoreSize (codeLoopEnd - codeLoopStore)
@ -318,20 +320,7 @@ namespace randomx {
vm_flags = flags; vm_flags = flags;
generateProgramPrologue(prog, pcfg); generateProgramPrologue(prog, pcfg);
emit(codeReadDataset, readDatasetSize, code, codePos);
uint8_t* p;
uint32_t n;
if (flags & RANDOMX_FLAG_AMD) {
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
}
else {
p = RandomX_CurrentConfig.codeReadDatasetTweaked;
n = RandomX_CurrentConfig.codeReadDatasetTweakedSize;
}
memcpy(code + codePos, p, n);
codePos += n;
generateProgramEpilogue(prog, pcfg); generateProgramEpilogue(prog, pcfg);
} }

View file

@ -48,7 +48,6 @@
.global DECL(randomx_program_loop_load_xop) .global DECL(randomx_program_loop_load_xop)
.global DECL(randomx_program_start) .global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset) .global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_ryzen)
.global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_init)
.global DECL(randomx_program_read_dataset_sshash_fin) .global DECL(randomx_program_read_dataset_sshash_fin)
.global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_store)
@ -140,9 +139,6 @@ DECL(randomx_program_start):
DECL(randomx_program_read_dataset): DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc" #include "asm/program_read_dataset.inc"
DECL(randomx_program_read_dataset_ryzen):
#include "asm/program_read_dataset_ryzen.inc"
DECL(randomx_program_read_dataset_sshash_init): DECL(randomx_program_read_dataset_sshash_init):
#include "asm/program_read_dataset_sshash_init.inc" #include "asm/program_read_dataset_sshash_init.inc"

View file

@ -39,7 +39,6 @@ PUBLIC randomx_program_loop_load
PUBLIC randomx_program_loop_load_xop PUBLIC randomx_program_loop_load_xop
PUBLIC randomx_program_start PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_ryzen
PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init PUBLIC randomx_dataset_init
@ -136,10 +135,6 @@ randomx_program_read_dataset PROC
include asm/program_read_dataset.inc include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP randomx_program_read_dataset ENDP
randomx_program_read_dataset_ryzen PROC
include asm/program_read_dataset_ryzen.inc
randomx_program_read_dataset_ryzen ENDP
randomx_program_read_dataset_sshash_init PROC randomx_program_read_dataset_sshash_init PROC
include asm/program_read_dataset_sshash_init.inc include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP randomx_program_read_dataset_sshash_init ENDP

View file

@ -40,7 +40,6 @@ extern "C" {
void randomx_program_loop_load_xop(); void randomx_program_loop_load_xop();
void randomx_program_start(); void randomx_program_start();
void randomx_program_read_dataset(); void randomx_program_read_dataset();
void randomx_program_read_dataset_ryzen();
void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_init();
void randomx_program_read_dataset_sshash_fin(); void randomx_program_read_dataset_sshash_fin();
void randomx_program_loop_store(); void randomx_program_loop_store();

View file

@ -165,18 +165,6 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
const uint8_t* b = addr(randomx_sshash_end); const uint8_t* b = addr(randomx_sshash_end);
memcpy(codeShhPrefetchTweaked, a, b - a); memcpy(codeShhPrefetchTweaked, a, b - a);
} }
{
const uint8_t* a = addr(randomx_program_read_dataset);
const uint8_t* b = addr(randomx_program_read_dataset_ryzen);
memcpy(codeReadDatasetTweaked, a, b - a);
codeReadDatasetTweakedSize = b - a;
}
{
const uint8_t* a = addr(randomx_program_read_dataset_ryzen);
const uint8_t* b = addr(randomx_program_read_dataset_sshash_init);
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
codeReadDatasetRyzenTweakedSize = b - a;
}
if (xmrig::Cpu::info()->hasBMI2()) { if (xmrig::Cpu::info()->hasBMI2()) {
const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2); const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2);
const uint8_t* b = addr(randomx_prefetch_scratchpad_end); const uint8_t* b = addr(randomx_prefetch_scratchpad_end);

View file

@ -125,10 +125,6 @@ struct RandomX_ConfigurationBase
rx_vec_i128 fillAes4Rx4_Key[8]; rx_vec_i128 fillAes4Rx4_Key[8];
uint8_t codeShhPrefetchTweaked[20]; uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[64];
uint32_t codeReadDatasetTweakedSize;
uint8_t codeReadDatasetRyzenTweaked[72];
uint32_t codeReadDatasetRyzenTweakedSize;
uint8_t codePrefetchScratchpadTweaked[28]; uint8_t codePrefetchScratchpadTweaked[28];
uint32_t codePrefetchScratchpadTweakedSize; uint32_t codePrefetchScratchpadTweakedSize;