mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-25 12:06:04 +00:00
RandomX: optimized IMUL_RCP instruction
+0.4% on AMD Zen2 +0.3% on AMD Zen3 +0.1% on Intel SandyBridge +0.3% on rx/wow on Intel SandyBridge
This commit is contained in:
parent
61d165a314
commit
3477f9fbc1
6 changed files with 55 additions and 9 deletions
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||||
|
add rsp, 128
|
|
@ -428,7 +428,10 @@ namespace randomx {
|
||||||
xmrig::RxFix::setMainLoopBounds(mainLoopBounds);
|
xmrig::RxFix::setMainLoopBounds(mainLoopBounds);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
imul_rcp_storage = code + (ADDR(randomx_program_imul_rcp_store) - codePrologue) + 2;
|
||||||
|
imul_rcp_storage_used = 0;
|
||||||
|
|
||||||
|
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
codePos = codePosFirst;
|
codePos = codePosFirst;
|
||||||
prevCFROUND = 0;
|
prevCFROUND = 0;
|
||||||
|
|
||||||
|
@ -1012,13 +1015,24 @@ namespace randomx {
|
||||||
|
|
||||||
uint64_t divisor = instr.getImm32();
|
uint64_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
*(uint32_t*)(p + pos) = 0xb848;
|
|
||||||
pos += 2;
|
|
||||||
|
|
||||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
|
||||||
|
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
|
||||||
|
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
||||||
|
if (imul_rcp_storage_used < 16) {
|
||||||
|
*(uint64_t*)(imul_rcp_storage) = reciprocal;
|
||||||
|
*(uint64_t*)(p + pos) = 0x2444AF0F4Cull + (dst << 27) + (static_cast<uint64_t>(248 - imul_rcp_storage_used * 8) << 40);
|
||||||
|
++imul_rcp_storage_used;
|
||||||
|
imul_rcp_storage += 11;
|
||||||
|
pos += 6;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*(uint32_t*)(p + pos) = 0xb848;
|
||||||
|
pos += 2;
|
||||||
|
|
||||||
|
emit64(reciprocal, p, pos);
|
||||||
|
|
||||||
|
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||||
|
}
|
||||||
|
|
||||||
registerUsage[dst] = pos;
|
registerUsage[dst] = pos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,6 +104,9 @@ namespace randomx {
|
||||||
uint8_t* allocatedCode = nullptr;
|
uint8_t* allocatedCode = nullptr;
|
||||||
size_t allocatedSize = 0;
|
size_t allocatedSize = 0;
|
||||||
|
|
||||||
|
uint8_t* imul_rcp_storage = nullptr;
|
||||||
|
uint32_t imul_rcp_storage_used = 0;
|
||||||
|
|
||||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||||
template<bool rax>
|
template<bool rax>
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
.global DECL(randomx_prefetch_scratchpad_end)
|
.global DECL(randomx_prefetch_scratchpad_end)
|
||||||
.global DECL(randomx_program_prologue)
|
.global DECL(randomx_program_prologue)
|
||||||
.global DECL(randomx_program_prologue_first_load)
|
.global DECL(randomx_program_prologue_first_load)
|
||||||
|
.global DECL(randomx_program_imul_rcp_store)
|
||||||
.global DECL(randomx_program_loop_begin)
|
.global DECL(randomx_program_loop_begin)
|
||||||
.global DECL(randomx_program_loop_load)
|
.global DECL(randomx_program_loop_load)
|
||||||
.global DECL(randomx_program_loop_load_xop)
|
.global DECL(randomx_program_loop_load_xop)
|
||||||
|
@ -106,11 +107,15 @@ DECL(randomx_program_prologue_first_load):
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
jmp DECL(randomx_program_loop_begin)
|
jmp DECL(randomx_program_imul_rcp_store)
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
#include "asm/program_xmm_constants.inc"
|
#include "asm/program_xmm_constants.inc"
|
||||||
|
|
||||||
|
DECL(randomx_program_imul_rcp_store):
|
||||||
|
#include "asm/program_imul_rcp_store.inc"
|
||||||
|
jmp DECL(randomx_program_loop_begin)
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
DECL(randomx_program_loop_begin):
|
DECL(randomx_program_loop_begin):
|
||||||
nop
|
nop
|
||||||
|
|
|
@ -32,6 +32,7 @@ PUBLIC randomx_prefetch_scratchpad
|
||||||
PUBLIC randomx_prefetch_scratchpad_end
|
PUBLIC randomx_prefetch_scratchpad_end
|
||||||
PUBLIC randomx_program_prologue
|
PUBLIC randomx_program_prologue
|
||||||
PUBLIC randomx_program_prologue_first_load
|
PUBLIC randomx_program_prologue_first_load
|
||||||
|
PUBLIC randomx_program_imul_rcp_store
|
||||||
PUBLIC randomx_program_loop_begin
|
PUBLIC randomx_program_loop_begin
|
||||||
PUBLIC randomx_program_loop_load
|
PUBLIC randomx_program_loop_load
|
||||||
PUBLIC randomx_program_loop_load_xop
|
PUBLIC randomx_program_loop_load_xop
|
||||||
|
@ -94,12 +95,17 @@ randomx_program_prologue_first_load PROC
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
nop
|
nop
|
||||||
jmp randomx_program_loop_begin
|
jmp randomx_program_imul_rcp_store
|
||||||
randomx_program_prologue_first_load ENDP
|
randomx_program_prologue_first_load ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
include asm/program_xmm_constants.inc
|
include asm/program_xmm_constants.inc
|
||||||
|
|
||||||
|
randomx_program_imul_rcp_store PROC
|
||||||
|
include asm/program_imul_rcp_store.inc
|
||||||
|
jmp randomx_program_loop_begin
|
||||||
|
randomx_program_imul_rcp_store ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
randomx_program_loop_begin PROC
|
randomx_program_loop_begin PROC
|
||||||
nop
|
nop
|
||||||
|
|
|
@ -33,6 +33,7 @@ extern "C" {
|
||||||
void randomx_prefetch_scratchpad_end();
|
void randomx_prefetch_scratchpad_end();
|
||||||
void randomx_program_prologue();
|
void randomx_program_prologue();
|
||||||
void randomx_program_prologue_first_load();
|
void randomx_program_prologue_first_load();
|
||||||
|
void randomx_program_imul_rcp_store();
|
||||||
void randomx_program_loop_begin();
|
void randomx_program_loop_begin();
|
||||||
void randomx_program_loop_load();
|
void randomx_program_loop_load();
|
||||||
void randomx_program_loop_load_xop();
|
void randomx_program_loop_load_xop();
|
||||||
|
|
Loading…
Reference in a new issue