Fixed wrong ASM code alignment on macOS, thanks @SChernykh.

This commit is contained in:
XMRig 2019-01-15 18:18:04 +07:00
parent 59b147b6fb
commit a98c475a3c
11 changed files with 26 additions and 22 deletions

View file

@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13] lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9] movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16 ALIGN(64)
main_loop_double_sandybridge: main_loop_double_sandybridge:
movdqu xmm9, xmm15 movdqu xmm9, xmm15
mov eax, edx mov eax, edx

View file

@ -45,7 +45,7 @@
movq xmm0, rcx movq xmm0, rcx
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
ALIGN 16 ALIGN(64)
cnv2_main_loop_bulldozer: cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx] movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm6, r8 movq xmm6, r8

View file

@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0 punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx] movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16 ALIGN(64)
main_loop_ivybridge: main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d mov ecx, r10d

View file

@ -45,7 +45,7 @@
movq xmm0, rcx movq xmm0, rcx
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
ALIGN 16 ALIGN(64)
main_loop_ryzen: main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx] movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11 movq xmm0, r11

View file

@ -1,4 +1,8 @@
#define ALIGN .align #ifdef __APPLE__
# define ALIGN(x) .align 6
#else
# define ALIGN(x) .align 64
#endif
.intel_syntax noprefix .intel_syntax noprefix
#ifdef __APPLE__ #ifdef __APPLE__
# define FN_PREFIX(fn) _ ## fn # define FN_PREFIX(fn) _ ## fn
@ -12,7 +16,7 @@
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm): FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
@ -24,7 +28,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
nop nop
nop nop
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm): FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
@ -36,7 +40,7 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm):
nop nop
nop nop
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_bulldozer_asm): FN_PREFIX(cnv2_mainloop_bulldozer_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
@ -48,7 +52,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
nop nop
nop nop
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi

View file

@ -4,7 +4,7 @@ PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_mainloop_bulldozer_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm PUBLIC cnv2_double_mainloop_sandybridge_asm
ALIGN 64 ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC cnv2_mainloop_ivybridge_asm PROC
INCLUDE cn2/cnv2_main_loop_ivybridge.inc INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0 ret 0
@ -14,7 +14,7 @@ cnv2_mainloop_ivybridge_asm PROC
nop nop
cnv2_mainloop_ivybridge_asm ENDP cnv2_mainloop_ivybridge_asm ENDP
ALIGN 64 ALIGN(64)
cnv2_mainloop_ryzen_asm PROC cnv2_mainloop_ryzen_asm PROC
INCLUDE cn2/cnv2_main_loop_ryzen.inc INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0 ret 0
@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm PROC
nop nop
cnv2_mainloop_ryzen_asm ENDP cnv2_mainloop_ryzen_asm ENDP
ALIGN 64 ALIGN(64)
cnv2_mainloop_bulldozer_asm PROC cnv2_mainloop_bulldozer_asm PROC
INCLUDE cn2/cnv2_main_loop_bulldozer.inc INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0 ret 0
@ -34,7 +34,7 @@ cnv2_mainloop_bulldozer_asm PROC
nop nop
cnv2_mainloop_bulldozer_asm ENDP cnv2_mainloop_bulldozer_asm ENDP
ALIGN 64 ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0 ret 0

View file

@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13] lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9] movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16 ALIGN(64)
main_loop_double_sandybridge: main_loop_double_sandybridge:
movdqu xmm9, xmm15 movdqu xmm9, xmm15
mov eax, edx mov eax, edx

View file

@ -45,7 +45,7 @@
movd xmm0, rcx movd xmm0, rcx
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
ALIGN 16 ALIGN(64)
cnv2_main_loop_bulldozer: cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx] movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm6, r8 movd xmm6, r8

View file

@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0 punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx] movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16 ALIGN(64)
main_loop_ivybridge: main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d mov ecx, r10d

View file

@ -45,7 +45,7 @@
movd xmm0, rcx movd xmm0, rcx
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
ALIGN 16 ALIGN(64)
main_loop_ryzen: main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx] movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm0, r11 movd xmm0, r11

View file

@ -1,4 +1,4 @@
#define ALIGN .align #define ALIGN(x) .align 64
.intel_syntax noprefix .intel_syntax noprefix
.section .text .section .text
.global cnv2_mainloop_ivybridge_asm .global cnv2_mainloop_ivybridge_asm
@ -6,7 +6,7 @@
.global cnv2_mainloop_bulldozer_asm .global cnv2_mainloop_bulldozer_asm
.global cnv2_double_mainloop_sandybridge_asm .global cnv2_double_mainloop_sandybridge_asm
ALIGN 16 ALIGN(64)
cnv2_mainloop_ivybridge_asm: cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc" #include "../cn2/cnv2_main_loop_ivybridge.inc"
ret 0 ret 0
@ -15,7 +15,7 @@ cnv2_mainloop_ivybridge_asm:
nop nop
nop nop
ALIGN 16 ALIGN(64)
cnv2_mainloop_ryzen_asm: cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc" #include "../cn2/cnv2_main_loop_ryzen.inc"
ret 0 ret 0
@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm:
nop nop
nop nop
ALIGN 16 ALIGN(64)
cnv2_mainloop_bulldozer_asm: cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc" #include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0 ret 0
@ -33,7 +33,7 @@ cnv2_mainloop_bulldozer_asm:
nop nop
nop nop
ALIGN 16 ALIGN(64)
cnv2_double_mainloop_sandybridge_asm: cnv2_double_mainloop_sandybridge_asm:
#include "../cn2/cnv2_double_main_loop_sandybridge.inc" #include "../cn2/cnv2_double_main_loop_sandybridge.inc"
ret 0 ret 0