From a98c475a3c0aba42e984fc163c5a9b1a4ce0a3ef Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 15 Jan 2019 18:18:04 +0700 Subject: [PATCH] Fixed wrong ASM code alignment on macOS, thanks @SChernykh. --- .../asm/cn2/cnv2_double_main_loop_sandybridge.inc | 2 +- src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc | 2 +- src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc | 2 +- src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc | 2 +- src/crypto/asm/cn_main_loop.S | 14 +++++++++----- src/crypto/asm/cn_main_loop.asm | 8 ++++---- .../cn2/cnv2_double_main_loop_sandybridge.inc | 2 +- .../asm/win64/cn2/cnv2_main_loop_bulldozer.inc | 2 +- .../asm/win64/cn2/cnv2_main_loop_ivybridge.inc | 2 +- src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc | 2 +- src/crypto/asm/win64/cn_main_loop.S | 10 +++++----- 11 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index e8251bc7f..aa5101a83 100644 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 16 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc index 478976c03..c764501db 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) cnv2_main_loop_bulldozer: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm6, r8 diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 8c2c2d3b0..06f1d28be 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 16 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc index d386aa2df..5dbf5917f 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index e9ac64f55..1e5610d15 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -1,4 +1,8 @@ -#define ALIGN .align +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif .intel_syntax noprefix #ifdef __APPLE__ # define FN_PREFIX(fn) _ ## fn @@ -12,7 +16,7 @@ .global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi @@ -24,7 +28,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi @@ -36,7 +40,7 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_bulldozer_asm): sub rsp, 48 mov rcx, rdi @@ -48,7 +52,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm): nop nop -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm index 9c8a6ea9c..47b4df9e4 100644 --- a/src/crypto/asm/cn_main_loop.asm +++ b/src/crypto/asm/cn_main_loop.asm @@ -4,7 +4,7 @@ PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_double_mainloop_sandybridge_asm -ALIGN 64 +ALIGN(64) cnv2_mainloop_ivybridge_asm PROC INCLUDE cn2/cnv2_main_loop_ivybridge.inc ret 0 @@ -14,7 +14,7 @@ cnv2_mainloop_ivybridge_asm PROC nop cnv2_mainloop_ivybridge_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_mainloop_ryzen_asm PROC INCLUDE cn2/cnv2_main_loop_ryzen.inc ret 0 @@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm PROC nop cnv2_mainloop_ryzen_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_mainloop_bulldozer_asm PROC INCLUDE cn2/cnv2_main_loop_bulldozer.inc ret 0 @@ -34,7 +34,7 @@ cnv2_mainloop_bulldozer_asm PROC nop cnv2_mainloop_bulldozer_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_double_mainloop_sandybridge_asm PROC INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc ret 0 diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc index 44ea89230..05af93934 100644 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 16 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc index c19e9d69b..03a36f48d 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -45,7 +45,7 @@ movd xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) cnv2_main_loop_bulldozer: movdqa xmm5, XMMWORD PTR [r10+rbx] movd xmm6, r8 diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc index c925ca24c..77e28f801 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 16 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc index d1cd26c42..7e5c127f8 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -45,7 +45,7 @@ movd xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movd xmm0, r11 diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S index ea5a63b8e..90e43470c 100644 --- a/src/crypto/asm/win64/cn_main_loop.S +++ b/src/crypto/asm/win64/cn_main_loop.S @@ -1,4 +1,4 @@ -#define ALIGN .align +#define ALIGN(x) .align 64 .intel_syntax noprefix .section .text .global cnv2_mainloop_ivybridge_asm @@ -6,7 +6,7 @@ .global cnv2_mainloop_bulldozer_asm .global cnv2_double_mainloop_sandybridge_asm -ALIGN 16 +ALIGN(64) cnv2_mainloop_ivybridge_asm: #include "../cn2/cnv2_main_loop_ivybridge.inc" ret 0 @@ -15,7 +15,7 @@ cnv2_mainloop_ivybridge_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_mainloop_ryzen_asm: #include "../cn2/cnv2_main_loop_ryzen.inc" ret 0 @@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_mainloop_bulldozer_asm: #include "../cn2/cnv2_main_loop_bulldozer.inc" ret 0 @@ -33,7 +33,7 @@ cnv2_mainloop_bulldozer_asm: nop nop -ALIGN 16 +ALIGN(64) cnv2_double_mainloop_sandybridge_asm: #include "../cn2/cnv2_double_main_loop_sandybridge.inc" ret 0