mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-10 21:04:37 +00:00
Merge pull request #2289 from SChernykh/dev
RandomX: optimized IMUL_RCP instruction
This commit is contained in:
commit
854b7618ef
6 changed files with 55 additions and 9 deletions
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
|
@ -0,0 +1,17 @@
|
|||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
add rsp, 128
|
|
@ -428,7 +428,10 @@ namespace randomx {
|
|||
xmrig::RxFix::setMainLoopBounds(mainLoopBounds);
|
||||
# endif
|
||||
|
||||
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
imul_rcp_storage = code + (ADDR(randomx_program_imul_rcp_store) - codePrologue) + 2;
|
||||
imul_rcp_storage_used = 0;
|
||||
|
||||
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
codePos = codePosFirst;
|
||||
prevCFROUND = 0;
|
||||
|
||||
|
@ -1012,13 +1015,24 @@ namespace randomx {
|
|||
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
*(uint32_t*)(p + pos) = 0xb848;
|
||||
pos += 2;
|
||||
|
||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
|
||||
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
||||
if (imul_rcp_storage_used < 16) {
|
||||
*(uint64_t*)(imul_rcp_storage) = reciprocal;
|
||||
*(uint64_t*)(p + pos) = 0x2444AF0F4Cull + (dst << 27) + (static_cast<uint64_t>(248 - imul_rcp_storage_used * 8) << 40);
|
||||
++imul_rcp_storage_used;
|
||||
imul_rcp_storage += 11;
|
||||
pos += 6;
|
||||
}
|
||||
else {
|
||||
*(uint32_t*)(p + pos) = 0xb848;
|
||||
pos += 2;
|
||||
|
||||
emit64(reciprocal, p, pos);
|
||||
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
}
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
}
|
||||
|
|
|
@ -104,6 +104,9 @@ namespace randomx {
|
|||
uint8_t* allocatedCode = nullptr;
|
||||
size_t allocatedSize = 0;
|
||||
|
||||
uint8_t* imul_rcp_storage = nullptr;
|
||||
uint32_t imul_rcp_storage_used = 0;
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
template<bool rax>
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
.global DECL(randomx_prefetch_scratchpad_end)
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_prologue_first_load)
|
||||
.global DECL(randomx_program_imul_rcp_store)
|
||||
.global DECL(randomx_program_loop_begin)
|
||||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_loop_load_xop)
|
||||
|
@ -106,11 +107,15 @@ DECL(randomx_program_prologue_first_load):
|
|||
nop
|
||||
nop
|
||||
nop
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
jmp DECL(randomx_program_imul_rcp_store)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
DECL(randomx_program_imul_rcp_store):
|
||||
#include "asm/program_imul_rcp_store.inc"
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_loop_begin):
|
||||
nop
|
||||
|
|
|
@ -32,6 +32,7 @@ PUBLIC randomx_prefetch_scratchpad
|
|||
PUBLIC randomx_prefetch_scratchpad_end
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_prologue_first_load
|
||||
PUBLIC randomx_program_imul_rcp_store
|
||||
PUBLIC randomx_program_loop_begin
|
||||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_loop_load_xop
|
||||
|
@ -94,12 +95,17 @@ randomx_program_prologue_first_load PROC
|
|||
nop
|
||||
nop
|
||||
nop
|
||||
jmp randomx_program_loop_begin
|
||||
jmp randomx_program_imul_rcp_store
|
||||
randomx_program_prologue_first_load ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
randomx_program_imul_rcp_store PROC
|
||||
include asm/program_imul_rcp_store.inc
|
||||
jmp randomx_program_loop_begin
|
||||
randomx_program_imul_rcp_store ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_loop_begin PROC
|
||||
nop
|
||||
|
|
|
@ -33,6 +33,7 @@ extern "C" {
|
|||
void randomx_prefetch_scratchpad_end();
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_prologue_first_load();
|
||||
void randomx_program_imul_rcp_store();
|
||||
void randomx_program_loop_begin();
|
||||
void randomx_program_loop_load();
|
||||
void randomx_program_loop_load_xop();
|
||||
|
|
Loading…
Reference in a new issue