Merge pull request #2287 from SChernykh/dev

Fixed rounding mode after running cn/upx
This commit is contained in:
xmrig 2021-04-19 18:06:16 +07:00 committed by GitHub
commit 61d165a314
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 23 deletions

View file

@ -7,7 +7,7 @@
push r13 push r13
push r14 push r14
push r15 push r15
sub rsp, 216 sub rsp, 232
mov rdi, QWORD PTR [rcx+8] mov rdi, QWORD PTR [rcx+8]
@ -84,6 +84,8 @@
movq xmm10, rax movq xmm10, rax
mov rax, 4389456576511 mov rax, 4389456576511
mov QWORD PTR [rsp+16], rax mov QWORD PTR [rsp+16], rax
mov rax, -4389456576512
mov QWORD PTR [rsp+216], rax
punpcklqdq xmm10, xmm0 punpcklqdq xmm10, xmm0
ALIGN(64) ALIGN(64)
@ -170,14 +172,13 @@ upx2_main_loop:
movq xmm0, rax movq xmm0, rax
paddq xmm0, xmm11 paddq xmm0, xmm11
sqrtsd xmm1, xmm0 sqrtsd xmm1, xmm0
mov r13, -4389456576512
movq rdx, xmm1 movq rdx, xmm1
mov rax, rdx mov rax, rdx
shr rdx, 19 shr rdx, 19
shr rax, 20 shr rax, 20
mov rcx, rdx mov rcx, rdx
sub rcx, rax sub rcx, rax
add rax, r13 add rax, QWORD PTR [rsp+216]
sub rcx, QWORD PTR [rsp+16] sub rcx, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp] mov r13, QWORD PTR [rsp]
imul rcx, rax imul rcx, rax
@ -251,12 +252,10 @@ upx2_main_loop:
shr rdx, 19 shr rdx, 19
mov rcx, rdx mov rcx, rdx
sub rcx, rax sub rcx, rax
mov rbx, 4389456576511 sub rcx, QWORD PTR [rsp+16]
sub rcx, rbx
movdqa xmm9, xmm7 movdqa xmm9, xmm7
mov rbx, -4389456576512
movdqa xmm7, xmm6 movdqa xmm7, xmm6
add rax, rbx add rax, QWORD PTR [rsp+216]
imul rcx, rax imul rcx, rax
mov rax, r9 mov rax, r9
sub rcx, r8 sub rcx, r8
@ -264,10 +263,9 @@ upx2_main_loop:
adc rdx, 0 adc rdx, 0
xor rcx, 32 xor rcx, 32
and ecx, 131056 and ecx, 131056
movq xmm0, rdx mov QWORD PTR [rsp+32], rdx
movdqu xmm1, XMMWORD PTR [rcx+r13] movdqu xmm1, XMMWORD PTR [rcx+r13]
mul rdi mul rdi
movdqa XMMWORD PTR [rsp+32], xmm0
paddq xmm1, xmm5 paddq xmm1, xmm5
mov r8, rax mov r8, rax
xor r8, QWORD PTR [rcx+r13+8] xor r8, QWORD PTR [rcx+r13+8]
@ -298,10 +296,10 @@ upx2_main_loop:
sub QWORD PTR [rsp+8], 1 sub QWORD PTR [rsp+8], 1
jne upx2_main_loop jne upx2_main_loop
ldmxcsr DWORD PTR [rsp+28] ldmxcsr DWORD PTR [rsp+24]
movaps xmm13, XMMWORD PTR [rsp+80] movaps xmm13, XMMWORD PTR [rsp+80]
lea r11, QWORD PTR [rsp+216] lea r11, QWORD PTR [rsp+232]
movaps xmm6, XMMWORD PTR [r11-24] movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40] movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56] movaps xmm8, XMMWORD PTR [r11-56]

View file

@ -7,7 +7,7 @@
push r13 push r13
push r14 push r14
push r15 push r15
sub rsp, 216 sub rsp, 232
mov rdi, QWORD PTR [rcx+8] mov rdi, QWORD PTR [rcx+8]
@ -84,6 +84,8 @@
movd xmm10, rax movd xmm10, rax
mov rax, 4389456576511 mov rax, 4389456576511
mov QWORD PTR [rsp+16], rax mov QWORD PTR [rsp+16], rax
mov rax, -4389456576512
mov QWORD PTR [rsp+216], rax
punpcklqdq xmm10, xmm0 punpcklqdq xmm10, xmm0
ALIGN(64) ALIGN(64)
@ -170,14 +172,13 @@ upx2_main_loop:
movd xmm0, rax movd xmm0, rax
paddq xmm0, xmm11 paddq xmm0, xmm11
sqrtsd xmm1, xmm0 sqrtsd xmm1, xmm0
mov r13, -4389456576512
movd rdx, xmm1 movd rdx, xmm1
mov rax, rdx mov rax, rdx
shr rdx, 19 shr rdx, 19
shr rax, 20 shr rax, 20
mov rcx, rdx mov rcx, rdx
sub rcx, rax sub rcx, rax
add rax, r13 add rax, QWORD PTR [rsp+216]
sub rcx, QWORD PTR [rsp+16] sub rcx, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp] mov r13, QWORD PTR [rsp]
imul rcx, rax imul rcx, rax
@ -251,12 +252,10 @@ upx2_main_loop:
shr rdx, 19 shr rdx, 19
mov rcx, rdx mov rcx, rdx
sub rcx, rax sub rcx, rax
mov rbx, 4389456576511 sub rcx, QWORD PTR [rsp+16]
sub rcx, rbx
movdqa xmm9, xmm7 movdqa xmm9, xmm7
mov rbx, -4389456576512
movdqa xmm7, xmm6 movdqa xmm7, xmm6
add rax, rbx add rax, QWORD PTR [rsp+216]
imul rcx, rax imul rcx, rax
mov rax, r9 mov rax, r9
sub rcx, r8 sub rcx, r8
@ -264,10 +263,9 @@ upx2_main_loop:
adc rdx, 0 adc rdx, 0
xor rcx, 32 xor rcx, 32
and ecx, 131056 and ecx, 131056
movd xmm0, rdx mov QWORD PTR [rsp+32], rdx
movdqu xmm1, XMMWORD PTR [rcx+r13] movdqu xmm1, XMMWORD PTR [rcx+r13]
mul rdi mul rdi
movdqa XMMWORD PTR [rsp+32], xmm0
paddq xmm1, xmm5 paddq xmm1, xmm5
mov r8, rax mov r8, rax
xor r8, QWORD PTR [rcx+r13+8] xor r8, QWORD PTR [rcx+r13+8]
@ -298,10 +296,10 @@ upx2_main_loop:
sub QWORD PTR [rsp+8], 1 sub QWORD PTR [rsp+8], 1
jne upx2_main_loop jne upx2_main_loop
ldmxcsr DWORD PTR [rsp+28] ldmxcsr DWORD PTR [rsp+24]
movaps xmm13, XMMWORD PTR [rsp+80] movaps xmm13, XMMWORD PTR [rsp+80]
lea r11, QWORD PTR [rsp+216] lea r11, QWORD PTR [rsp+232]
movaps xmm6, XMMWORD PTR [r11-24] movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40] movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56] movaps xmm8, XMMWORD PTR [r11-56]

View file

@ -41,7 +41,7 @@ public:
size_t size = 0; size_t size = 0;
inline bool isFullyAllocated() const { return allocated == total; } inline bool isFullyAllocated() const { return allocated == total; }
inline double percent() const { return allocated == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; } inline double percent() const { return total == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
inline void reset() { allocated = 0; total = 0; size = 0; } inline void reset() { allocated = 0; total = 0; size = 0; }
inline HugePagesInfo &operator+=(const HugePagesInfo &other) inline HugePagesInfo &operator+=(const HugePagesInfo &other)