Optimized CFROUND

This commit is contained in:
SChernykh 2020-01-22 20:11:00 +01:00
parent d342968211
commit 9f1753cc4f
5 changed files with 24 additions and 17 deletions

View file

@ -1,5 +1,5 @@
;# save VM register values
add rsp, 24
add rsp, 40
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9

View file

@ -1,5 +1,5 @@
lea rcx, [rsi+rax]
mov [rsp+8], rcx
mov [rsp+16], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
@ -9,7 +9,7 @@
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
mov [rsp+16], rcx
mov [rsp+24], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]

View file

@ -1,4 +1,4 @@
mov rcx, [rsp+16]
mov rcx, [rsp+24]
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
@ -7,7 +7,7 @@
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
mov rcx, [rsp+8]
mov rcx, [rsp+16]
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6

View file

@ -400,7 +400,7 @@ namespace randomx {
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
if (hasAVX) {
uint32_t* p = (uint32_t*)(code + codePos + 32);
uint32_t* p = (uint32_t*)(code + codePos + 67);
*p = (*p & 0xFF000000U) | 0x0077F8C5U;
}
@ -1072,18 +1072,21 @@ namespace randomx {
uint8_t* const p = code;
int pos = codePos;
emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.src, p, pos);
int rotate = (13 - (instr.getImm32() & 63)) & 63;
if (rotate != 0) {
emit(ROL_RAX, p, pos);
emitByte(rotate, p, pos);
}
const uint32_t src = instr.src;
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24);
if (vm_flags & RANDOMX_FLAG_AMD) {
emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos);
*(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL;
*(uint8_t*)(p + pos + 15) = 8;
*(uint64_t*)(p + pos + 16) = 0x202444890414AE0FULL;
pos += 24;
}
else {
emit(AND_OR_MOV_LDMXCSR, p, pos);
*(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL;
pos += 14;
}
codePos = pos;

View file

@ -82,8 +82,12 @@ randomx_program_prologue_first_load PROC
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
sub rsp, 24
stmxcsr dword ptr [rsp]
sub rsp, 40
mov dword ptr [rsp], 9FC0h
mov dword ptr [rsp+4], 0BFC0h
mov dword ptr [rsp+8], 0DFC0h
mov dword ptr [rsp+12], 0FFC0h
mov dword ptr [rsp+32], -1
nop
nop
nop