mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 03:59:41 +00:00
More optimizations for Ryzen
This commit is contained in:
parent
9bc13813ba
commit
763691fa4b
5 changed files with 20 additions and 11 deletions
|
@ -7,13 +7,12 @@
|
||||||
and edx, RANDOMX_DATASET_BASE_MASK
|
and edx, RANDOMX_DATASET_BASE_MASK
|
||||||
prefetchnta byte ptr [rdi+rdx]
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
ror rbp, 32 ;# swap "ma" and "mx"
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
add rcx, rdi ;# dataset cache line
|
|
||||||
xor r8, rax
|
xor r8, rax
|
||||||
xor r9, qword ptr [rcx+8]
|
xor r9, qword ptr [rdi+rcx+8]
|
||||||
xor r10, qword ptr [rcx+16]
|
xor r10, qword ptr [rdi+rcx+16]
|
||||||
xor r11, qword ptr [rcx+24]
|
xor r11, qword ptr [rdi+rcx+24]
|
||||||
xor r12, qword ptr [rcx+32]
|
xor r12, qword ptr [rdi+rcx+32]
|
||||||
xor r13, qword ptr [rcx+40]
|
xor r13, qword ptr [rdi+rcx+40]
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rdi+rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rdi+rcx+56]
|
||||||
|
|
|
@ -169,6 +169,7 @@ namespace randomx {
|
||||||
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
|
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
|
||||||
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
|
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
|
||||||
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
|
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
|
||||||
|
static const uint8_t AND_OR_MOV_LDMXCSR_RYZEN[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x3B, 0x44, 0x24, 0xFC, 0x74, 0x09, 0x89, 0x44, 0x24, 0xFC, 0x0F, 0xAE, 0x54, 0x24, 0xFC };
|
||||||
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
|
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
|
||||||
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
|
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
|
||||||
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
|
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
|
||||||
|
@ -300,6 +301,8 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||||
|
vm_flags = flags;
|
||||||
|
|
||||||
generateProgramPrologue(prog, pcfg);
|
generateProgramPrologue(prog, pcfg);
|
||||||
|
|
||||||
uint8_t* p;
|
uint8_t* p;
|
||||||
|
@ -1010,7 +1013,12 @@ namespace randomx {
|
||||||
emit(ROL_RAX, p, pos);
|
emit(ROL_RAX, p, pos);
|
||||||
emitByte(rotate, p, pos);
|
emitByte(rotate, p, pos);
|
||||||
}
|
}
|
||||||
emit(AND_OR_MOV_LDMXCSR, p, pos);
|
if (vm_flags & RANDOMX_FLAG_RYZEN) {
|
||||||
|
emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
emit(AND_OR_MOV_LDMXCSR, p, pos);
|
||||||
|
}
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,6 +70,7 @@ namespace randomx {
|
||||||
uint8_t* allocatedCode;
|
uint8_t* allocatedCode;
|
||||||
uint8_t* code;
|
uint8_t* code;
|
||||||
int32_t codePos;
|
int32_t codePos;
|
||||||
|
uint32_t vm_flags;
|
||||||
|
|
||||||
static bool BranchesWithin32B;
|
static bool BranchesWithin32B;
|
||||||
|
|
||||||
|
|
|
@ -81,6 +81,7 @@ randomx_program_prologue_first_load PROC
|
||||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
ror rdx, 32
|
ror rdx, 32
|
||||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
stmxcsr dword ptr [rsp-20]
|
||||||
jmp randomx_program_loop_begin
|
jmp randomx_program_loop_begin
|
||||||
randomx_program_prologue_first_load ENDP
|
randomx_program_prologue_first_load ENDP
|
||||||
|
|
||||||
|
|
|
@ -119,9 +119,9 @@ struct RandomX_ConfigurationBase
|
||||||
rx_vec_i128 fillAes4Rx4_Key[8];
|
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||||
|
|
||||||
uint8_t codeShhPrefetchTweaked[20];
|
uint8_t codeShhPrefetchTweaked[20];
|
||||||
uint8_t codeReadDatasetTweaked[72];
|
uint8_t codeReadDatasetTweaked[256];
|
||||||
uint32_t codeReadDatasetTweakedSize;
|
uint32_t codeReadDatasetTweakedSize;
|
||||||
uint8_t codeReadDatasetRyzenTweaked[72];
|
uint8_t codeReadDatasetRyzenTweaked[256];
|
||||||
uint32_t codeReadDatasetRyzenTweakedSize;
|
uint32_t codeReadDatasetRyzenTweakedSize;
|
||||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||||
uint8_t codePrefetchScratchpadTweaked[32];
|
uint8_t codePrefetchScratchpadTweaked[32];
|
||||||
|
|
Loading…
Reference in a new issue