mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 18:11:05 +00:00
Optimized cn/r asm code
Average over 100 block heights: Coffee Lake +0.1% Ryzen +0.4% Sandy Bridge +1.5%
This commit is contained in:
parent
4ebfc135e0
commit
9f2d821970
4 changed files with 76 additions and 68 deletions
|
@ -70,29 +70,30 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movq r12, xmm5
|
movq r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_part2):
|
FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
|
@ -70,29 +70,30 @@ CryptonightR_template_mainloop:
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movq r12, xmm5
|
movq r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ CryptonightR_template_mainloop:
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
CryptonightR_template_part2:
|
CryptonightR_template_part2:
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ CryptonightR_template_part2:
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ CryptonightR_template_part2:
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
|
@ -70,29 +70,30 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movd r12, xmm5
|
movd r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_part2):
|
FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
|
@ -70,29 +70,30 @@ CryptonightR_template_mainloop:
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movd r12, xmm5
|
movd r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ CryptonightR_template_mainloop:
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
CryptonightR_template_part2:
|
CryptonightR_template_part2:
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ CryptonightR_template_part2:
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ CryptonightR_template_part2:
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
Loading…
Reference in a new issue