diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index e8251bc7..aa5101a8 100644 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 16 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc index 478976c0..c764501d 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) cnv2_main_loop_bulldozer: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm6, r8 diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 8c2c2d3b..06f1d28b 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 16 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc index d386aa2d..5dbf5917 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index e9ac64f5..1e5610d1 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -1,4 +1,8 @@ -#define ALIGN .align +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif .intel_syntax noprefix #ifdef __APPLE__ # define FN_PREFIX(fn) _ ## fn @@ -12,7 +16,7 @@ .global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi @@ -24,7 +28,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi @@ -36,7 +40,7 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_bulldozer_asm): sub rsp, 48 mov rcx, rdi @@ -48,7 +52,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm index 9c8a6ea9..47b4df9e 100644 --- a/src/crypto/asm/cn_main_loop.asm +++ b/src/crypto/asm/cn_main_loop.asm @@ -4,7 +4,7 @@ PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_double_mainloop_sandybridge_asm -ALIGN 64 +ALIGN(64) cnv2_mainloop_ivybridge_asm PROC INCLUDE cn2/cnv2_main_loop_ivybridge.inc ret 0 @@ -14,7 +14,7 @@ cnv2_mainloop_ivybridge_asm PROC nop cnv2_mainloop_ivybridge_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_mainloop_ryzen_asm PROC INCLUDE cn2/cnv2_main_loop_ryzen.inc ret 0 @@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm PROC nop cnv2_mainloop_ryzen_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_mainloop_bulldozer_asm PROC INCLUDE cn2/cnv2_main_loop_bulldozer.inc ret 0 @@ -34,7 +34,7 @@ cnv2_mainloop_bulldozer_asm PROC nop cnv2_mainloop_bulldozer_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_double_mainloop_sandybridge_asm PROC INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc ret 0 diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc index 44ea8923..05af9393 100644 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 16 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc index c19e9d69..03a36f48 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -45,7 +45,7 @@ movd xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) cnv2_main_loop_bulldozer: movdqa xmm5, XMMWORD PTR [r10+rbx] movd xmm6, r8 diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc index c925ca24..77e28f80 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 16 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc index d1cd26c4..7e5c127f 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -45,7 +45,7 @@ movd xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movd xmm0, r11 diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S index ea5a63b8..90e43470 100644 --- a/src/crypto/asm/win64/cn_main_loop.S +++ b/src/crypto/asm/win64/cn_main_loop.S @@ -1,4 +1,4 @@ -#define ALIGN .align +#define ALIGN(x) .align 64 .intel_syntax noprefix .section .text .global cnv2_mainloop_ivybridge_asm @@ -6,7 +6,7 @@ .global cnv2_mainloop_bulldozer_asm .global cnv2_double_mainloop_sandybridge_asm -ALIGN 16 +ALIGN(64) cnv2_mainloop_ivybridge_asm: #include "../cn2/cnv2_main_loop_ivybridge.inc" ret 0 @@ -15,7 +15,7 @@ cnv2_mainloop_ivybridge_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_mainloop_ryzen_asm: #include "../cn2/cnv2_main_loop_ryzen.inc" ret 0 @@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_mainloop_bulldozer_asm: #include "../cn2/cnv2_main_loop_bulldozer.inc" ret 0 @@ -33,7 +33,7 @@ cnv2_mainloop_bulldozer_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_double_mainloop_sandybridge_asm: #include "../cn2/cnv2_double_main_loop_sandybridge.inc" ret 0