Fixed wrong ASM code alignment on macOS, thanks @SChernykh.

This commit is contained in:
XMRig 2019-01-15 18:18:04 +07:00
parent 59b147b6fb
commit a98c475a3c
11 changed files with 26 additions and 22 deletions

View file

@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16
ALIGN(64)
main_loop_double_sandybridge:
movdqu xmm9, xmm15
mov eax, edx

View file

@ -45,7 +45,7 @@
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN 16
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm6, r8

View file

@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16
ALIGN(64)
main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d

View file

@ -45,7 +45,7 @@
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN 16
ALIGN(64)
main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11

View file

@ -1,4 +1,8 @@
#define ALIGN .align
#ifdef __APPLE__
# define ALIGN(x) .align 6
#else
# define ALIGN(x) .align 64
#endif
.intel_syntax noprefix
#ifdef __APPLE__
# define FN_PREFIX(fn) _ ## fn
@ -12,7 +16,7 @@
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48
mov rcx, rdi
@ -24,7 +28,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
nop
nop
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48
mov rcx, rdi
@ -36,7 +40,7 @@ FN_PREFIX(cnv2_mainloop_ryzen_asm):
nop
nop
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
sub rsp, 48
mov rcx, rdi
@ -48,7 +52,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
nop
nop
ALIGN 16
ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48
mov rcx, rdi

View file

@ -4,7 +4,7 @@ PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm
ALIGN 64
ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0
@ -14,7 +14,7 @@ cnv2_mainloop_ivybridge_asm PROC
nop
cnv2_mainloop_ivybridge_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_mainloop_ryzen_asm PROC
INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0
@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm PROC
nop
cnv2_mainloop_ryzen_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_mainloop_bulldozer_asm PROC
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0
@ -34,7 +34,7 @@ cnv2_mainloop_bulldozer_asm PROC
nop
cnv2_mainloop_bulldozer_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0

View file

@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16
ALIGN(64)
main_loop_double_sandybridge:
movdqu xmm9, xmm15
mov eax, edx

View file

@ -45,7 +45,7 @@
movd xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN 16
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm6, r8

View file

@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16
ALIGN(64)
main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d

View file

@ -45,7 +45,7 @@
movd xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN 16
ALIGN(64)
main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movd xmm0, r11

View file

@ -1,4 +1,4 @@
#define ALIGN .align
#define ALIGN(x) .align 64
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
@ -6,7 +6,7 @@
.global cnv2_mainloop_bulldozer_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN 16
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc"
ret 0
@ -15,7 +15,7 @@ cnv2_mainloop_ivybridge_asm:
nop
nop
ALIGN 16
ALIGN(64)
cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc"
ret 0
@ -24,7 +24,7 @@ cnv2_mainloop_ryzen_asm:
nop
nop
ALIGN 16
ALIGN(64)
cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0
@ -33,7 +33,7 @@ cnv2_mainloop_bulldozer_asm:
nop
nop
ALIGN 16
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm:
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
ret 0