Merge pull request #1987 from SChernykh/dev

Another dataset AVX2 init speedup (+3.8% faster on Zen3)
This commit is contained in:
xmrig 2020-12-20 01:57:12 +07:00 committed by GitHub
commit aa53ba073d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 10 additions and 10 deletions

View file

@ -1,4 +1,4 @@
add rsp, 32 add rsp, 40
pop r9 pop r9
movdqu xmm0, xmmword ptr [rsp] movdqu xmm0, xmmword ptr [rsp]

View file

@ -34,5 +34,5 @@
add rbp, 5 add rbp, 5
add rsi, 320 add rsi, 320
cmp rbp, qword ptr [rsp+32] cmp rbp, qword ptr [rsp+40]
db 15, 130, 0, 0, 0, 0 ;# jb rel32 db 15, 130, 0, 0, 0, 0 ;# jb rel32

View file

@ -244,7 +244,9 @@ namespace randomx {
switch (arch) { switch (arch) {
case xmrig::ICpuInfo::ARCH_ZEN: case xmrig::ICpuInfo::ARCH_ZEN:
case xmrig::ICpuInfo::ARCH_ZEN_PLUS: case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
default:
// AVX2 init is slower on Zen/Zen+ // AVX2 init is slower on Zen/Zen+
// Also disable it for other unknown architectures
initDatasetAVX2 = false; initDatasetAVX2 = false;
break; break;
case xmrig::ICpuInfo::ARCH_ZEN2: case xmrig::ICpuInfo::ARCH_ZEN2:

View file

@ -212,7 +212,7 @@ DECL(randomx_dataset_init_avx2_prologue):
mov rbp, rdx ;# block index mov rbp, rdx ;# block index
push rcx ;# max. block index push rcx ;# max. block index
#endif #endif
sub rsp, 32 sub rsp, 40
jmp randomx_dataset_init_avx2_prologue_loop_begin jmp randomx_dataset_init_avx2_prologue_loop_begin
#include "asm/program_sshash_avx2_constants.inc" #include "asm/program_sshash_avx2_constants.inc"
@ -240,9 +240,8 @@ randomx_dataset_init_avx2_prologue_loop_begin:
xor r15, r8 xor r15, r8
;# init AVX registers (lanes 1-4) ;# init AVX registers (lanes 1-4)
vpxor ymm0, ymm0, ymm0 mov qword ptr [rsp+32], rbp
movq xmm0, rbp vbroadcastsd ymm0, qword ptr [rsp+32]
vpbroadcastq ymm0, xmm0
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
;# ymm0 *= r0_avx2_mul ;# ymm0 *= r0_avx2_mul

View file

@ -195,7 +195,7 @@ randomx_dataset_init_avx2_prologue PROC
mov rsi, rdx ;# dataset mov rsi, rdx ;# dataset
mov rbp, r8 ;# block index mov rbp, r8 ;# block index
push r9 ;# max. block index push r9 ;# max. block index
sub rsp, 32 sub rsp, 40
jmp loop_begin jmp loop_begin
include asm/program_sshash_avx2_constants.inc include asm/program_sshash_avx2_constants.inc
@ -223,9 +223,8 @@ loop_begin:
xor r15, r8 xor r15, r8
;# init AVX registers (lanes 1-4) ;# init AVX registers (lanes 1-4)
vpxor ymm0, ymm0, ymm0 mov qword ptr [rsp+32], rbp
movq xmm0, rbp vbroadcastsd ymm0, qword ptr [rsp+32]
vpbroadcastq ymm0, xmm0
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
;# ymm0 *= r0_avx2_mul ;# ymm0 *= r0_avx2_mul