mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 18:11:05 +00:00
Added asm optimized code for AMD Bulldozer
This commit is contained in:
parent
afeaabdca4
commit
4b91978af6
10 changed files with 211 additions and 7 deletions
|
@ -43,7 +43,8 @@ static const char *coloredAsmNames[] = {
|
||||||
"\x1B[1;31mnone\x1B[0m",
|
"\x1B[1;31mnone\x1B[0m",
|
||||||
"auto",
|
"auto",
|
||||||
"\x1B[1;32mintel\x1B[0m",
|
"\x1B[1;32mintel\x1B[0m",
|
||||||
"\x1B[1;32mryzen\x1B[0m"
|
"\x1B[1;32mryzen\x1B[0m",
|
||||||
|
"\x1B[1;32mbulldozer\x1B[0m"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -99,6 +99,7 @@ enum Assembly {
|
||||||
ASM_AUTO,
|
ASM_AUTO,
|
||||||
ASM_INTEL,
|
ASM_INTEL,
|
||||||
ASM_RYZEN,
|
ASM_RYZEN,
|
||||||
|
ASM_BULLDOZER,
|
||||||
ASM_MAX
|
ASM_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ Options:\n\
|
||||||
"\
|
"\
|
||||||
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
|
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
|
||||||
--safe safe adjust threads and av settings for current CPU\n\
|
--safe safe adjust threads and av settings for current CPU\n\
|
||||||
--asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen.\n\
|
--asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\
|
||||||
--print-time=N print hashrate report every N seconds\n\
|
--print-time=N print hashrate report every N seconds\n\
|
||||||
--api-port=N port for the miner API\n\
|
--api-port=N port for the miner API\n\
|
||||||
--api-access-token=T access token for API\n\
|
--api-access-token=T access token for API\n\
|
||||||
|
|
|
@ -76,7 +76,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
|
||||||
m_aes = true;
|
m_aes = true;
|
||||||
|
|
||||||
if (data.vendor == VENDOR_AMD) {
|
if (data.vendor == VENDOR_AMD) {
|
||||||
m_assembly = ASM_RYZEN;
|
m_assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
|
||||||
}
|
}
|
||||||
else if (data.vendor == VENDOR_INTEL) {
|
else if (data.vendor == VENDOR_INTEL) {
|
||||||
m_assembly = ASM_INTEL;
|
m_assembly = ASM_INTEL;
|
||||||
|
|
|
@ -40,7 +40,8 @@ static const char *asmNames[] = {
|
||||||
"none",
|
"none",
|
||||||
"auto",
|
"auto",
|
||||||
"intel",
|
"intel",
|
||||||
"ryzen"
|
"ryzen",
|
||||||
|
"bulldozer"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -564,6 +564,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
|
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
||||||
|
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
|
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
|
||||||
|
|
||||||
|
|
||||||
|
@ -578,9 +579,12 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||||
if (ASM == xmrig::ASM_INTEL) {
|
if (ASM == xmrig::ASM_INTEL) {
|
||||||
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
||||||
}
|
}
|
||||||
else {
|
else if (ASM == xmrig::ASM_RYZEN) {
|
||||||
cnv2_mainloop_ryzen_asm(ctx[0]);
|
cnv2_mainloop_ryzen_asm(ctx[0]);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
cnv2_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||||
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#endif
|
#endif
|
||||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
|
@ -27,6 +28,14 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_main_loop_bulldozer.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
|
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
|
@ -15,6 +16,12 @@ cnv2_mainloop_ryzen_asm PROC
|
||||||
ret 0
|
ret 0
|
||||||
cnv2_mainloop_ryzen_asm ENDP
|
cnv2_mainloop_ryzen_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_bulldozer_asm PROC
|
||||||
|
INCLUDE cnv2_main_loop_bulldozer.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_bulldozer_asm ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
cnv2_double_mainloop_sandybridge_asm PROC
|
cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
||||||
|
|
180
src/crypto/asm/cnv2_main_loop_bulldozer.inc
Normal file
180
src/crypto/asm/cnv2_main_loop_bulldozer.inc
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
cnv2_main_loop_bulldozer:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm6, r8
|
||||||
|
pinsrq xmm6, r11, 1
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
|
||||||
|
mov edi, 1023
|
||||||
|
shl rdi, 52
|
||||||
|
|
||||||
|
movq r14, xmm5
|
||||||
|
pextrq rax, xmm5, 1
|
||||||
|
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
div r9
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
lea r15, [rax+rdx]
|
||||||
|
lea rax, [r14+r15]
|
||||||
|
shr rax, 12
|
||||||
|
add rax, rdi
|
||||||
|
movq xmm0, rax
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je sqrt_fixup_bulldozer
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne cnv2_main_loop_bulldozer
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp cnv2_main_loop_bulldozer_endp
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer:
|
||||||
|
movq r9, xmm5
|
||||||
|
add r9, r15
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp sqrt_fixup_bulldozer_ret
|
||||||
|
|
||||||
|
cnv2_main_loop_bulldozer_endp:
|
|
@ -64,7 +64,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
# ifndef XMRIG_NO_ASM
|
||||||
constexpr const size_t count = VARIANT_MAX * 10 * 3 + 3;
|
constexpr const size_t count = VARIANT_MAX * 10 * 3 + 4;
|
||||||
# else
|
# else
|
||||||
constexpr const size_t count = VARIANT_MAX * 10 * 3;
|
constexpr const size_t count = VARIANT_MAX * 10 * 3;
|
||||||
# endif
|
# endif
|
||||||
|
@ -249,6 +249,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
# ifndef XMRIG_NO_ASM
|
# ifndef XMRIG_NO_ASM
|
||||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
|
||||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>,
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>,
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_BULLDOZER>,
|
||||||
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>
|
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>
|
||||||
# endif
|
# endif
|
||||||
};
|
};
|
||||||
|
@ -447,7 +448,7 @@ size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant
|
||||||
}
|
}
|
||||||
|
|
||||||
if (av == AV_DOUBLE) {
|
if (av == AV_DOUBLE) {
|
||||||
return offset + 2;
|
return offset + 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
Loading…
Reference in a new issue