diff --git a/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc index 88204d996..7ebd871fd 100644 --- a/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc +++ b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc @@ -1,4 +1,4 @@ - add rsp, 32 + add rsp, 40 pop r9 movdqu xmm0, xmmword ptr [rsp] diff --git a/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc index 46dd469d7..09c2deebd 100644 --- a/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc +++ b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc @@ -34,5 +34,5 @@ add rbp, 5 add rsi, 320 - cmp rbp, qword ptr [rsp+32] + cmp rbp, qword ptr [rsp+40] db 15, 130, 0, 0, 0, 0 ;# jb rel32 diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 5f7a83a14..0e79d6a40 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -244,7 +244,9 @@ namespace randomx { switch (arch) { case xmrig::ICpuInfo::ARCH_ZEN: case xmrig::ICpuInfo::ARCH_ZEN_PLUS: + default: // AVX2 init is slower on Zen/Zen+ + // Also disable it for other unknown architectures initDatasetAVX2 = false; break; case xmrig::ICpuInfo::ARCH_ZEN2: diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index da5ee98ea..e2177147d 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -212,7 +212,7 @@ DECL(randomx_dataset_init_avx2_prologue): mov rbp, rdx ;# block index push rcx ;# max. block index #endif - sub rsp, 32 + sub rsp, 40 jmp randomx_dataset_init_avx2_prologue_loop_begin #include "asm/program_sshash_avx2_constants.inc" @@ -240,9 +240,8 @@ randomx_dataset_init_avx2_prologue_loop_begin: xor r15, r8 ;# init AVX registers (lanes 1-4) - vpxor ymm0, ymm0, ymm0 - movq xmm0, rbp - vpbroadcastq ymm0, xmm0 + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip] ;# ymm0 *= r0_avx2_mul diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index f8a2d527d..6e90cbf37 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -195,7 +195,7 @@ randomx_dataset_init_avx2_prologue PROC mov rsi, rdx ;# dataset mov rbp, r8 ;# block index push r9 ;# max. block index - sub rsp, 32 + sub rsp, 40 jmp loop_begin include asm/program_sshash_avx2_constants.inc @@ -223,9 +223,8 @@ loop_begin: xor r15, r8 ;# init AVX registers (lanes 1-4) - vpxor ymm0, ymm0, ymm0 - movq xmm0, rbp - vpbroadcastq ymm0, xmm0 + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments] ;# ymm0 *= r0_avx2_mul