diff --git a/src/crypto/asm/cnv2_main_loop.S b/src/crypto/asm/cnv2_main_loop.S index dc5a82f5..580a4588 100644 --- a/src/crypto/asm/cnv2_main_loop.S +++ b/src/crypto/asm/cnv2_main_loop.S @@ -1,19 +1,25 @@ #define ALIGN .align .intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn .section .text -.global cnv2_mainloop_ivybridge_asm -.global cnv2_mainloop_ryzen_asm +#endif +.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) +.global FN_PREFIX(cnv2_mainloop_ryzen_asm) -ALIGN 64 -cnv2_mainloop_ivybridge_asm: +ALIGN 16 +FN_PREFIX(cnv2_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi #include "cnv2_main_loop_ivybridge.inc" add rsp, 48 ret 0 -ALIGN 64 -cnv2_mainloop_ryzen_asm: +ALIGN 16 +FN_PREFIX(cnv2_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi #include "cnv2_main_loop_ryzen.inc" diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc index 0985d1bd..a253a549 100644 --- a/src/crypto/asm/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc @@ -49,8 +49,8 @@ movq xmm0, rcx punpcklqdq xmm5, xmm0 - ALIGN 64 -$main_loop_ivybridge: + ALIGN 16 +main_loop_ivybridge: movdqu xmm6, XMMWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d @@ -108,9 +108,9 @@ $main_loop_ivybridge: psubq xmm3, XMMWORD PTR [rsp+16] movq rdx, xmm3 test edx, 524287 - je $sqrt_fixup_ivybridge + je sqrt_fixup_ivybridge psrlq xmm3, 19 -$sqrt_fixup_ivybridge_ret: +sqrt_fixup_ivybridge_ret: mov ecx, r10d mov rax, rdi @@ -144,7 +144,7 @@ $sqrt_fixup_ivybridge_ret: and r10d, 2097136 xor r11, r12 dec rsi - jne $main_loop_ivybridge + jne main_loop_ivybridge ldmxcsr DWORD PTR [rsp] mov rbx, QWORD PTR [rsp+160] @@ -159,9 +159,9 @@ $sqrt_fixup_ivybridge_ret: pop rdi pop rsi pop rbp - jmp $cnv2_main_loop_ivybridge_endp + jmp cnv2_main_loop_ivybridge_endp -$sqrt_fixup_ivybridge: +sqrt_fixup_ivybridge: dec rdx mov r13d, -1022 shl r13, 32 @@ -178,6 +178,6 @@ $sqrt_fixup_ivybridge: sub rcx, r9 adc rdx, 0 movq xmm3, rdx - jmp $sqrt_fixup_ivybridge_ret + jmp sqrt_fixup_ivybridge_ret -$cnv2_main_loop_ivybridge_endp: +cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cnv2_main_loop_ryzen.inc index 3294548e..d386aa2d 100644 --- a/src/crypto/asm/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cnv2_main_loop_ryzen.inc @@ -45,8 +45,8 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 64 -$main_loop_ryzen: + ALIGN 16 +main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 movq xmm6, r8 @@ -103,10 +103,10 @@ $main_loop_ryzen: sqrtsd xmm1, xmm0 movq rdi, xmm1 test rdi, 524287 - je $sqrt_fixup_ryzen + je sqrt_fixup_ryzen shr rdi, 19 -$sqrt_fixup_ryzen_ret: +sqrt_fixup_ryzen_ret: mov rax, rsi mul r14 movq xmm1, rax @@ -141,7 +141,7 @@ $sqrt_fixup_ryzen_ret: and r10d, 2097136 movdqa xmm3, xmm5 dec ebp - jne $main_loop_ryzen + jne main_loop_ryzen ldmxcsr DWORD PTR [rsp] movaps xmm6, XMMWORD PTR [rsp+48] @@ -157,9 +157,9 @@ $sqrt_fixup_ryzen_ret: pop r13 pop r12 pop rdi - jmp $cnv2_main_loop_ryzen_endp + jmp cnv2_main_loop_ryzen_endp -$sqrt_fixup_ryzen: +sqrt_fixup_ryzen: movq r9, xmm2 dec rdi mov edx, -1022 @@ -174,6 +174,6 @@ $sqrt_fixup_ryzen: imul rcx, rax sub rcx, r9 adc rdi, 0 - jmp $sqrt_fixup_ryzen_ret + jmp sqrt_fixup_ryzen_ret -$cnv2_main_loop_ryzen_endp: +cnv2_main_loop_ryzen_endp: