mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-22 18:54:43 +00:00
Merge pull request #1987 from SChernykh/dev
Another dataset AVX2 init speedup (+3.8% faster on Zen3)
This commit is contained in:
commit
aa53ba073d
5 changed files with 10 additions and 10 deletions
|
@ -1,4 +1,4 @@
|
||||||
add rsp, 32
|
add rsp, 40
|
||||||
pop r9
|
pop r9
|
||||||
|
|
||||||
movdqu xmm0, xmmword ptr [rsp]
|
movdqu xmm0, xmmword ptr [rsp]
|
||||||
|
|
|
@ -34,5 +34,5 @@
|
||||||
|
|
||||||
add rbp, 5
|
add rbp, 5
|
||||||
add rsi, 320
|
add rsi, 320
|
||||||
cmp rbp, qword ptr [rsp+32]
|
cmp rbp, qword ptr [rsp+40]
|
||||||
db 15, 130, 0, 0, 0, 0 ;# jb rel32
|
db 15, 130, 0, 0, 0, 0 ;# jb rel32
|
||||||
|
|
|
@ -244,7 +244,9 @@ namespace randomx {
|
||||||
switch (arch) {
|
switch (arch) {
|
||||||
case xmrig::ICpuInfo::ARCH_ZEN:
|
case xmrig::ICpuInfo::ARCH_ZEN:
|
||||||
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
|
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
|
||||||
|
default:
|
||||||
// AVX2 init is slower on Zen/Zen+
|
// AVX2 init is slower on Zen/Zen+
|
||||||
|
// Also disable it for other unknown architectures
|
||||||
initDatasetAVX2 = false;
|
initDatasetAVX2 = false;
|
||||||
break;
|
break;
|
||||||
case xmrig::ICpuInfo::ARCH_ZEN2:
|
case xmrig::ICpuInfo::ARCH_ZEN2:
|
||||||
|
|
|
@ -212,7 +212,7 @@ DECL(randomx_dataset_init_avx2_prologue):
|
||||||
mov rbp, rdx ;# block index
|
mov rbp, rdx ;# block index
|
||||||
push rcx ;# max. block index
|
push rcx ;# max. block index
|
||||||
#endif
|
#endif
|
||||||
sub rsp, 32
|
sub rsp, 40
|
||||||
|
|
||||||
jmp randomx_dataset_init_avx2_prologue_loop_begin
|
jmp randomx_dataset_init_avx2_prologue_loop_begin
|
||||||
#include "asm/program_sshash_avx2_constants.inc"
|
#include "asm/program_sshash_avx2_constants.inc"
|
||||||
|
@ -240,9 +240,8 @@ randomx_dataset_init_avx2_prologue_loop_begin:
|
||||||
xor r15, r8
|
xor r15, r8
|
||||||
|
|
||||||
;# init AVX registers (lanes 1-4)
|
;# init AVX registers (lanes 1-4)
|
||||||
vpxor ymm0, ymm0, ymm0
|
mov qword ptr [rsp+32], rbp
|
||||||
movq xmm0, rbp
|
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||||
vpbroadcastq ymm0, xmm0
|
|
||||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
|
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
|
||||||
|
|
||||||
;# ymm0 *= r0_avx2_mul
|
;# ymm0 *= r0_avx2_mul
|
||||||
|
|
|
@ -195,7 +195,7 @@ randomx_dataset_init_avx2_prologue PROC
|
||||||
mov rsi, rdx ;# dataset
|
mov rsi, rdx ;# dataset
|
||||||
mov rbp, r8 ;# block index
|
mov rbp, r8 ;# block index
|
||||||
push r9 ;# max. block index
|
push r9 ;# max. block index
|
||||||
sub rsp, 32
|
sub rsp, 40
|
||||||
|
|
||||||
jmp loop_begin
|
jmp loop_begin
|
||||||
include asm/program_sshash_avx2_constants.inc
|
include asm/program_sshash_avx2_constants.inc
|
||||||
|
@ -223,9 +223,8 @@ loop_begin:
|
||||||
xor r15, r8
|
xor r15, r8
|
||||||
|
|
||||||
;# init AVX registers (lanes 1-4)
|
;# init AVX registers (lanes 1-4)
|
||||||
vpxor ymm0, ymm0, ymm0
|
mov qword ptr [rsp+32], rbp
|
||||||
movq xmm0, rbp
|
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||||
vpbroadcastq ymm0, xmm0
|
|
||||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
|
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
|
||||||
|
|
||||||
;# ymm0 *= r0_avx2_mul
|
;# ymm0 *= r0_avx2_mul
|
||||||
|
|
Loading…
Reference in a new issue