mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-08 20:09:52 +00:00
Initial compile with ASM.
This commit is contained in:
parent
0c20d7a125
commit
dd27c42293
6 changed files with 427 additions and 1 deletions
|
@ -7,6 +7,7 @@ option(WITH_SUMO "CryptoNight-Heavy support" ON)
|
||||||
option(WITH_HTTPD "HTTP REST API" ON)
|
option(WITH_HTTPD "HTTP REST API" ON)
|
||||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||||
|
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||||
option(BUILD_STATIC "Build static binary" OFF)
|
option(BUILD_STATIC "Build static binary" OFF)
|
||||||
|
|
||||||
include (CheckIncludeFile)
|
include (CheckIncludeFile)
|
||||||
|
@ -195,6 +196,7 @@ else()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include(cmake/OpenSSL.cmake)
|
include(cmake/OpenSSL.cmake)
|
||||||
|
include(cmake/asm.cmake)
|
||||||
|
|
||||||
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
||||||
if (HAVE_SYSLOG_H)
|
if (HAVE_SYSLOG_H)
|
||||||
|
@ -254,4 +256,4 @@ if (WITH_DEBUG_LOG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
|
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
|
||||||
target_link_libraries(${PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||||
|
|
23
cmake/asm.cmake
Normal file
23
cmake/asm.cmake
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
if (WITH_ASM AND NOT XMRIG_ARM)
|
||||||
|
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||||
|
|
||||||
|
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||||
|
enable_language(ASM_MASM)
|
||||||
|
set_property(SOURCE "src/crypto/asm/cnv2_main_loop.asm" PROPERTY ASM_MASM)
|
||||||
|
add_library(${XMRIG_ASM_LIBRARY} STATIC
|
||||||
|
"src/crypto/asm/cnv2_main_loop.asm"
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
enable_language(ASM)
|
||||||
|
set_property(SOURCE "src/crypto/asm/cnv2_main_loop.S" PROPERTY C)
|
||||||
|
add_library(${XMRIG_ASM_LIBRARY} STATIC
|
||||||
|
"src/crypto/asm/cnv2_main_loop.S"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||||
|
else()
|
||||||
|
# set(XMRIG_ASM_SOURCES "")
|
||||||
|
set(XMRIG_ASM_LIBRARY "")
|
||||||
|
add_definitions(/DXMRIG_NO_ASM)
|
||||||
|
endif()
|
21
src/crypto/asm/cnv2_main_loop.S
Normal file
21
src/crypto/asm/cnv2_main_loop.S
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#define ALIGN .align
|
||||||
|
.intel_syntax noprefix
|
||||||
|
.section .text
|
||||||
|
.global cnv2_mainloop_ivybridge_asm
|
||||||
|
.global cnv2_mainloop_ryzen_asm
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_ivybridge_asm:
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_main_loop_ivybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_ryzen_asm:
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_main_loop_ryzen.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
18
src/crypto/asm/cnv2_main_loop.asm
Normal file
18
src/crypto/asm/cnv2_main_loop.asm
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||||
|
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_ivybridge_asm PROC
|
||||||
|
INCLUDE cnv2_main_loop_ivybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_ivybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_ryzen_asm PROC
|
||||||
|
INCLUDE cnv2_main_loop_ryzen.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_ryzen_asm ENDP
|
||||||
|
|
||||||
|
_TEXT_CNV2_MAINLOOP ENDS
|
||||||
|
END
|
183
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
183
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
xor eax, eax
|
||||||
|
mov QWORD PTR [rsp+16], rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
$main_loop_ivybridge:
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movq rcx, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
mov rax, rcx
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movq rbp, xmm6
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
mov r10, rbp
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r10+rbx]
|
||||||
|
lea r14, QWORD PTR [r10+rbx]
|
||||||
|
xor r10d, 32
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movq rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||||
|
movq rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je $sqrt_fixup_ivybridge
|
||||||
|
psrlq xmm3, 19
|
||||||
|
$sqrt_fixup_ivybridge_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movq xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
movq xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov r10, r8
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
and r10d, 2097136
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne $main_loop_ivybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp $cnv2_main_loop_ivybridge_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_ivybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm3, rdx
|
||||||
|
jmp $sqrt_fixup_ivybridge_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_ivybridge_endp:
|
179
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
179
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
$main_loop_ryzen:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm6, r8
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
movq r14, xmm5
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
|
||||||
|
div r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm1, rdx
|
||||||
|
punpckldq xmm0, xmm1
|
||||||
|
movq r15, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqa xmm2, xmm0
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je $sqrt_fixup_ryzen
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
$sqrt_fixup_ryzen_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne $main_loop_ryzen
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp $cnv2_main_loop_ryzen_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_ryzen:
|
||||||
|
movq r9, xmm2
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp $sqrt_fixup_ryzen_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_ryzen_endp:
|
Loading…
Reference in a new issue