diff --git a/CHANGELOG.md b/CHANGELOG.md index c5849269..8dc3b169 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# v2.8.1 +- [#768](https://github.com/xmrig/xmrig/issues/768) Fixed build with Visual Studio 2015. +- [#769](https://github.com/xmrig/xmrig/issues/769) Fixed regression, some ANSI escape sequences was in log with disabled colors. +- [#777](https://github.com/xmrig/xmrig/issues/777) Better report about pool connection issues. +- Simplified checks for ASM auto detection, only AES support necessary. +- Added missing options to `--help` output. + # v2.8.0 - **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).** - Added global and per thread option `"asm"` and and command line equivalent. diff --git a/README.md b/README.md index 81b1baea..e2e04aa1 100644 --- a/README.md +++ b/README.md @@ -52,14 +52,21 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar ### Options ``` - -a, --algo=ALGO cryptonight (default) or cryptonight-lite + -a, --algo=ALGO specify the algorithm to use + cryptonight + cryptonight-lite + cryptonight-heavy -o, --url=URL URL of mining server -O, --userpass=U:P username:password pair for mining server -u, --user=USERNAME username for mining server -p, --pass=PASSWORD password for mining server + --rig-id=ID rig identifier for pool-side statistics (needs pool support) -t, --threads=N number of miner threads -v, --av=N algorithm variation, 0 auto select - -k, --keepalive send keepalived for prevent timeout (need pool support) + -k, --keepalive send keepalived packet for prevent timeout (needs pool support) + --nicehash enable nicehash.com support + --tls enable SSL/TLS support (needs pool support) + --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning -r, --retries=N number of times to retry before switch to backup server (default: 5) -R, --retry-pause=N time to pause between retries (default: 5) --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1 @@ -75,16 +82,20 @@ Use [config.xmrig.com](https://config.xmrig.com/xmrig) to generate, edit or shar -S, --syslog use system log for output messages --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75) --safe safe adjust threads and av settings for current CPU - --nicehash enable nicehash/xmrig-proxy support + --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen. --print-time=N print hashrate report every N seconds --api-port=N port for the miner API --api-access-token=T access token for API --api-worker-id=ID custom worker-id for API + --api-id=ID custom instance ID for API + --api-ipv6 enable IPv6 support for API + --api-no-restricted enable full remote access (only if API token set) + --dry-run test configuration and exit -h, --help display this help and exit -V, --version output version information and exit ``` -Also you can use configuration via config file, default **config.json**. You can load multiple config files and combine it with command line options. +Also you can use configuration via config file, default name **config.json**. Some options available only via config file: [`autosave`](https://github.com/xmrig/xmrig/issues/767), [`hw-aes`](https://github.com/xmrig/xmrig/issues/563). `watch` option currently not implemented in miners only in proxy. ## Algorithm variations diff --git a/cmake/asm.cmake b/cmake/asm.cmake index cb50f0d9..358d5666 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -3,13 +3,19 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) if (CMAKE_C_COMPILER_ID MATCHES MSVC) enable_language(ASM_MASM) - set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.asm") + + if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) + set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.asm") + else() + set(XMRIG_ASM_FILE "src/crypto/asm/win64/cnv2_main_loop.asm") + endif() + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM) else() enable_language(ASM) if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) - set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop_win.S") + set(XMRIG_ASM_FILE "src/crypto/asm/win64/cnv2_main_loop.S") else() set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.S") endif() diff --git a/src/common/cpu/BasicCpuInfo.cpp b/src/common/cpu/BasicCpuInfo.cpp index 66af53cc..cb1e6d1d 100644 --- a/src/common/cpu/BasicCpuInfo.cpp +++ b/src/common/cpu/BasicCpuInfo.cpp @@ -71,7 +71,7 @@ static inline void cpuid(int level, int output[4]) { static inline void cpu_brand_string(char* s) { - int cpu_info[4] = { 0 }; + int32_t cpu_info[4] = { 0 }; cpuid(VENDOR_ID, cpu_info); if (cpu_info[EAX_Reg] >= 4) { @@ -86,7 +86,7 @@ static inline void cpu_brand_string(char* s) { static inline bool has_aes_ni() { - int cpu_info[4] = { 0 }; + int32_t cpu_info[4] = { 0 }; cpuid(PROCESSOR_INFO, cpu_info); return (cpu_info[ECX_Reg] & bit_AES) != 0; @@ -94,11 +94,32 @@ static inline bool has_aes_ni() xmrig::BasicCpuInfo::BasicCpuInfo() : + m_assembly(ASM_NONE), m_aes(has_aes_ni()), m_brand(), m_threads(std::thread::hardware_concurrency()) { cpu_brand_string(m_brand); + +# ifndef XMRIG_NO_ASM + if (hasAES()) { + char vendor[13] = { 0 }; + int32_t data[4] = { 0 }; + + cpuid(0, data); + + memcpy(vendor + 0, &data[1], 4); + memcpy(vendor + 4, &data[3], 4); + memcpy(vendor + 8, &data[2], 4); + + if (memcmp(vendor, "GenuineIntel", 12) == 0) { + m_assembly = ASM_INTEL; + } + else if (memcmp(vendor, "AuthenticAMD", 12) == 0) { + m_assembly = ASM_RYZEN; + } + } +# endif } diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h index f9d710d6..911674ea 100644 --- a/src/common/cpu/BasicCpuInfo.h +++ b/src/common/cpu/BasicCpuInfo.h @@ -7,7 +7,6 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , * - * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -40,7 +39,7 @@ public: protected: size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; - inline Assembly assembly() const override { return ASM_NONE; } + inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool isSupported() const override { return true; } inline const char *brand() const override { return m_brand; } @@ -58,6 +57,7 @@ protected: # endif private: + Assembly m_assembly; bool m_aes; char m_brand[64]; int32_t m_threads; diff --git a/src/common/net/Client.cpp b/src/common/net/Client.cpp index 6a79749d..1d1d86c7 100644 --- a/src/common/net/Client.cpp +++ b/src/common/net/Client.cpp @@ -259,7 +259,11 @@ int64_t Client::submit(const JobResult &result) bool Client::close() { - if (m_state == UnconnectedState || m_state == ClosingState || !m_socket) { + if (m_state == ClosingState) { + return m_socket != nullptr; + } + + if (m_state == UnconnectedState || m_socket == nullptr) { return false; } @@ -426,7 +430,7 @@ bool Client::send(BIO *bio) bool Client::verifyAlgorithm(const xmrig::Algorithm &algorithm) const { # ifdef XMRIG_PROXY_PROJECT - if (m_pool.algorithm().variant() == xmrig::VARIANT_AUTO) { + if (m_pool.algorithm().variant() == xmrig::VARIANT_AUTO || m_id == -1) { return true; } # endif @@ -550,7 +554,6 @@ void Client::connect(sockaddr *addr) setState(ConnectingState); reinterpret_cast(addr)->sin_port = htons(m_pool.port()); - delete m_socket; uv_connect_t *req = new uv_connect_t; req->data = m_storage.ptr(m_key); @@ -831,13 +834,14 @@ void Client::reconnect() return; } - setState(ConnectingState); m_keepAlive = 0; if (m_failures == -1) { return m_listener->onClose(this, -1); } + setState(ConnectingState); + m_failures++; m_listener->onClose(this, (int) m_failures); @@ -927,7 +931,7 @@ void Client::onRead(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) } if (nread < 0) { - if (nread != UV_EOF && !client->isQuiet()) { + if (!client->isQuiet()) { LOG_ERR("[%s] read error: \"%s\"", client->m_pool.url(), uv_strerror((int) nread)); } diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index a932b235..54546211 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -61,7 +61,10 @@ Options:\n\ --rig-id=ID rig identifier for pool-side statistics (needs pool support)\n\ -t, --threads=N number of miner threads\n\ -v, --av=N algorithm variation, 0 auto select\n\ - -k, --keepalive send keepalived for prevent timeout (need pool support)\n\ + -k, --keepalive send keepalived packet for prevent timeout (needs pool support)\n\ + --nicehash enable nicehash.com support\n\ + --tls enable SSL/TLS support (needs pool support)\n\ + --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning\n\ -r, --retries=N number of times to retry before switch to backup server (default: 5)\n\ -R, --retry-pause=N time to pause between retries (default: 5)\n\ --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ @@ -81,7 +84,7 @@ Options:\n\ "\ --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\ --safe safe adjust threads and av settings for current CPU\n\ - --nicehash enable nicehash/xmrig-proxy support\n\ + --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen.\n\ --print-time=N print hashrate report every N seconds\n\ --api-port=N port for the miner API\n\ --api-access-token=T access token for API\n\ @@ -89,6 +92,7 @@ Options:\n\ --api-id=ID custom instance ID for API\n\ --api-ipv6 enable IPv6 support for API\n\ --api-no-restricted enable full remote access (only if API token set)\n\ + --dry-run test configuration and exit\n\ -h, --help display this help and exit\n\ -V, --version output version information and exit\n\ "; diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp index 1f86a420..c1a9f8cd 100644 --- a/src/core/cpu/AdvancedCpuInfo.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -75,10 +75,10 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : if (data.flags[CPU_FEATURE_AES]) { m_aes = true; - if (data.vendor == VENDOR_AMD && data.ext_family >= 23) { + if (data.vendor == VENDOR_AMD) { m_assembly = ASM_RYZEN; } - else if (data.vendor == VENDOR_INTEL && data.ext_model >= 42) { + else if (data.vendor == VENDOR_INTEL) { m_assembly = ASM_INTEL; } } diff --git a/src/crypto/asm/win64/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cnv2_double_main_loop_sandybridge.inc new file mode 100644 index 00000000..44ea8923 --- /dev/null +++ b/src/crypto/asm/win64/cnv2_double_main_loop_sandybridge.inc @@ -0,0 +1,410 @@ + mov rax, rsp + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 184 + + stmxcsr DWORD PTR [rsp+272] + mov DWORD PTR [rsp+276], 24448 + ldmxcsr DWORD PTR [rsp+276] + + mov r13, QWORD PTR [rcx+224] + mov r9, rdx + mov r10, QWORD PTR [rcx+32] + mov r8, rcx + xor r10, QWORD PTR [rcx] + mov r14d, 524288 + mov r11, QWORD PTR [rcx+40] + xor r11, QWORD PTR [rcx+8] + mov rsi, QWORD PTR [rdx+224] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov rdi, QWORD PTR [r9+32] + xor rdi, QWORD PTR [r9] + mov rbp, QWORD PTR [r9+40] + xor rbp, QWORD PTR [r9+8] + movd xmm0, rdx + movaps XMMWORD PTR [rax-88], xmm6 + movaps XMMWORD PTR [rax-104], xmm7 + movaps XMMWORD PTR [rax-120], xmm8 + movaps XMMWORD PTR [rsp+112], xmm9 + movaps XMMWORD PTR [rsp+96], xmm10 + movaps XMMWORD PTR [rsp+80], xmm11 + movaps XMMWORD PTR [rsp+64], xmm12 + movaps XMMWORD PTR [rsp+48], xmm13 + movaps XMMWORD PTR [rsp+32], xmm14 + movaps XMMWORD PTR [rsp+16], xmm15 + mov rdx, r10 + movd xmm4, QWORD PTR [r8+96] + and edx, 2097136 + mov rax, QWORD PTR [rcx+48] + xorps xmm13, xmm13 + xor rax, QWORD PTR [rcx+16] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r8+72] + movd xmm5, QWORD PTR [r8+104] + movd xmm7, rax + + mov eax, 1 + shl rax, 52 + movd xmm14, rax + punpcklqdq xmm14, xmm14 + + mov eax, 1023 + shl rax, 52 + movd xmm12, rax + punpcklqdq xmm12, xmm12 + + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [r9+56] + xor rcx, QWORD PTR [r9+24] + movd xmm3, rax + mov rax, QWORD PTR [r9+48] + xor rax, QWORD PTR [r9+16] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp], r13 + mov rcx, QWORD PTR [r9+88] + xor rcx, QWORD PTR [r9+72] + movd xmm6, rax + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + punpcklqdq xmm6, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp+256], r10 + mov rcx, rdi + mov QWORD PTR [rsp+264], r11 + movd xmm8, rax + and ecx, 2097136 + punpcklqdq xmm8, xmm0 + movd xmm0, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, QWORD PTR [r9+104] + lea r8, QWORD PTR [rcx+rsi] + movdqu xmm11, XMMWORD PTR [r8] + punpcklqdq xmm5, xmm0 + lea r9, QWORD PTR [rdx+r13] + movdqu xmm15, XMMWORD PTR [r9] + + ALIGN 16 +main_loop_double_sandybridge: + movdqu xmm9, xmm15 + mov eax, edx + mov ebx, edx + xor eax, 16 + xor ebx, 32 + xor edx, 48 + + movd xmm0, r11 + movd xmm2, r10 + punpcklqdq xmm2, xmm0 + aesenc xmm9, xmm2 + + movdqu xmm0, XMMWORD PTR [rax+r13] + movdqu xmm1, XMMWORD PTR [rbx+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [rbx+r13], xmm0 + movdqu xmm0, XMMWORD PTR [rdx+r13] + movdqu XMMWORD PTR [rdx+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [rax+r13], xmm0 + + movd r11, xmm9 + mov edx, r11d + and edx, 2097136 + movdqa xmm0, xmm9 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [r9], xmm0 + + lea rbx, QWORD PTR [rdx+r13] + mov r10, QWORD PTR [rdx+r13] + + movdqu xmm10, xmm11 + movd xmm0, rbp + movd xmm11, rdi + punpcklqdq xmm11, xmm0 + aesenc xmm10, xmm11 + + mov eax, ecx + mov r12d, ecx + xor eax, 16 + xor r12d, 32 + xor ecx, 48 + + movdqu xmm0, XMMWORD PTR [rax+rsi] + paddq xmm0, xmm6 + movdqu xmm1, XMMWORD PTR [r12+rsi] + movdqu XMMWORD PTR [r12+rsi], xmm0 + paddq xmm1, xmm11 + movdqu xmm0, XMMWORD PTR [rcx+rsi] + movdqu XMMWORD PTR [rcx+rsi], xmm1 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [rax+rsi], xmm0 + + movd rcx, xmm10 + and ecx, 2097136 + + movdqa xmm0, xmm10 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [r8], xmm0 + mov r12, QWORD PTR [rcx+rsi] + + mov r9, QWORD PTR [rbx+8] + + xor edx, 16 + mov r8d, edx + mov r15d, edx + + movd rdx, xmm5 + shl rdx, 32 + movd rax, xmm4 + xor rdx, rax + xor r10, rdx + mov rax, r10 + mul r11 + mov r11d, r8d + xor r11d, 48 + movd xmm0, rdx + xor rdx, [r11+r13] + movd xmm1, rax + xor rax, [r11+r13+8] + punpcklqdq xmm0, xmm1 + + pxor xmm0, XMMWORD PTR [r8+r13] + xor r8d, 32 + movdqu xmm1, XMMWORD PTR [r11+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [r11+r13], xmm0 + movdqu xmm0, XMMWORD PTR [r8+r13] + movdqu XMMWORD PTR [r8+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [r15+r13], xmm0 + + mov r11, QWORD PTR [rsp+256] + add r11, rdx + mov rdx, QWORD PTR [rsp+264] + add rdx, rax + mov QWORD PTR [rbx], r11 + xor r11, r10 + mov QWORD PTR [rbx+8], rdx + xor rdx, r9 + mov QWORD PTR [rsp+256], r11 + and r11d, 2097136 + mov QWORD PTR [rsp+264], rdx + mov QWORD PTR [rsp+8], r11 + lea r15, QWORD PTR [r11+r13] + movdqu xmm15, XMMWORD PTR [r11+r13] + lea r13, QWORD PTR [rsi+rcx] + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movaps xmm2, xmm13 + movd r10, xmm0 + psllq xmm5, 1 + shl r10, 32 + movdqa xmm0, xmm9 + psrldq xmm0, 8 + movdqa xmm1, xmm10 + movd r11, xmm0 + psrldq xmm1, 8 + movd r8, xmm1 + psrldq xmm4, 8 + movaps xmm0, xmm13 + movd rax, xmm4 + xor r10, rax + movaps xmm1, xmm13 + xor r10, r12 + lea rax, QWORD PTR [r11+1] + shr rax, 1 + movdqa xmm3, xmm9 + punpcklqdq xmm3, xmm10 + paddq xmm5, xmm3 + movd rdx, xmm5 + psrldq xmm5, 8 + cvtsi2sd xmm2, rax + or edx, -2147483647 + lea rax, QWORD PTR [r8+1] + shr rax, 1 + movd r9, xmm5 + cvtsi2sd xmm0, rax + or r9d, -2147483647 + cvtsi2sd xmm1, rdx + unpcklpd xmm2, xmm0 + movaps xmm0, xmm13 + cvtsi2sd xmm0, r9 + unpcklpd xmm1, xmm0 + divpd xmm2, xmm1 + paddq xmm2, xmm14 + cvttsd2si rax, xmm2 + psrldq xmm2, 8 + mov rbx, rax + imul rax, rdx + sub r11, rax + js div_fix_1_sandybridge +div_fix_1_ret_sandybridge: + + cvttsd2si rdx, xmm2 + mov rax, rdx + imul rax, r9 + movd xmm2, r11d + movd xmm4, ebx + sub r8, rax + js div_fix_2_sandybridge +div_fix_2_ret_sandybridge: + + movd xmm1, r8d + movd xmm0, edx + punpckldq xmm2, xmm1 + punpckldq xmm4, xmm0 + punpckldq xmm4, xmm2 + paddq xmm3, xmm4 + movdqa xmm0, xmm3 + psrlq xmm0, 12 + paddq xmm0, xmm12 + sqrtpd xmm1, xmm0 + movd r9, xmm1 + movdqa xmm5, xmm1 + psrlq xmm5, 19 + test r9, 524287 + je sqrt_fix_1_sandybridge +sqrt_fix_1_ret_sandybridge: + + movd r9, xmm10 + psrldq xmm1, 8 + movd r8, xmm1 + test r8, 524287 + je sqrt_fix_2_sandybridge +sqrt_fix_2_ret_sandybridge: + + mov r12d, ecx + mov r8d, ecx + xor r12d, 16 + xor r8d, 32 + xor ecx, 48 + mov rax, r10 + mul r9 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + movdqu xmm0, XMMWORD PTR [r12+rsi] + pxor xmm0, xmm3 + movdqu xmm1, XMMWORD PTR [r8+rsi] + xor rdx, [r8+rsi] + xor rax, [r8+rsi+8] + movdqu xmm3, XMMWORD PTR [rcx+rsi] + paddq xmm0, xmm6 + paddq xmm1, xmm11 + paddq xmm3, xmm8 + movdqu XMMWORD PTR [r8+rsi], xmm0 + movdqu XMMWORD PTR [rcx+rsi], xmm1 + movdqu XMMWORD PTR [r12+rsi], xmm3 + + add rdi, rdx + mov QWORD PTR [r13], rdi + xor rdi, r10 + mov ecx, edi + and ecx, 2097136 + lea r8, QWORD PTR [rcx+rsi] + + mov rdx, QWORD PTR [r13+8] + add rbp, rax + mov QWORD PTR [r13+8], rbp + movdqu xmm11, XMMWORD PTR [rcx+rsi] + xor rbp, rdx + mov r13, QWORD PTR [rsp] + movdqa xmm3, xmm7 + mov rdx, QWORD PTR [rsp+8] + movdqa xmm8, xmm6 + mov r10, QWORD PTR [rsp+256] + movdqa xmm7, xmm9 + mov r11, QWORD PTR [rsp+264] + movdqa xmm6, xmm10 + mov r9, r15 + dec r14d + jne main_loop_double_sandybridge + + ldmxcsr DWORD PTR [rsp+272] + movaps xmm13, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+184] + movaps xmm6, XMMWORD PTR [r11-24] + movaps xmm7, XMMWORD PTR [r11-40] + movaps xmm8, XMMWORD PTR [r11-56] + movaps xmm9, XMMWORD PTR [r11-72] + movaps xmm10, XMMWORD PTR [r11-88] + movaps xmm11, XMMWORD PTR [r11-104] + movaps xmm12, XMMWORD PTR [r11-120] + movaps xmm14, XMMWORD PTR [rsp+32] + movaps xmm15, XMMWORD PTR [rsp+16] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + jmp cnv2_double_mainloop_asm_sandybridge_endp + +div_fix_1_sandybridge: + dec rbx + add r11, rdx + jmp div_fix_1_ret_sandybridge + +div_fix_2_sandybridge: + dec rdx + add r8, r9 + jmp div_fix_2_ret_sandybridge + +sqrt_fix_1_sandybridge: + movd r8, xmm3 + movdqa xmm0, xmm5 + psrldq xmm0, 8 + dec r9 + mov r11d, -1022 + shl r11, 32 + mov rax, r9 + shr r9, 19 + shr rax, 20 + mov rdx, r9 + sub rdx, rax + lea rdx, [rdx+r11+1] + add rax, r11 + imul rdx, rax + sub rdx, r8 + adc r9, 0 + movd xmm5, r9 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_1_ret_sandybridge + +sqrt_fix_2_sandybridge: + psrldq xmm3, 8 + movd r11, xmm3 + dec r8 + mov ebx, -1022 + shl rbx, 32 + mov rax, r8 + shr r8, 19 + shr rax, 20 + mov rdx, r8 + sub rdx, rax + lea rdx, [rdx+rbx+1] + add rax, rbx + imul rdx, rax + sub rdx, r11 + adc r8, 0 + movd xmm0, r8 + punpcklqdq xmm5, xmm0 + jmp sqrt_fix_2_ret_sandybridge + +cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/cnv2_main_loop_win.S b/src/crypto/asm/win64/cnv2_main_loop.S similarity index 69% rename from src/crypto/asm/cnv2_main_loop_win.S rename to src/crypto/asm/win64/cnv2_main_loop.S index f06e4fa4..78eb1185 100644 --- a/src/crypto/asm/cnv2_main_loop_win.S +++ b/src/crypto/asm/win64/cnv2_main_loop.S @@ -7,15 +7,15 @@ ALIGN 16 cnv2_mainloop_ivybridge_asm: - #include "cnv2_main_loop_ivybridge.inc" + #include "../cnv2_main_loop_ivybridge.inc" ret 0 ALIGN 16 cnv2_mainloop_ryzen_asm: - #include "cnv2_main_loop_ryzen.inc" + #include "../cnv2_main_loop_ryzen.inc" ret 0 ALIGN 16 cnv2_double_mainloop_sandybridge_asm: - #include "cnv2_double_main_loop_sandybridge.inc" + #include "../cnv2_double_main_loop_sandybridge.inc" ret 0 diff --git a/src/crypto/asm/win64/cnv2_main_loop.asm b/src/crypto/asm/win64/cnv2_main_loop.asm new file mode 100644 index 00000000..d9522267 --- /dev/null +++ b/src/crypto/asm/win64/cnv2_main_loop.asm @@ -0,0 +1,25 @@ +_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE +PUBLIC cnv2_mainloop_ivybridge_asm +PUBLIC cnv2_mainloop_ryzen_asm +PUBLIC cnv2_double_mainloop_sandybridge_asm + +ALIGN 64 +cnv2_mainloop_ivybridge_asm PROC + INCLUDE cnv2_main_loop_ivybridge.inc + ret 0 +cnv2_mainloop_ivybridge_asm ENDP + +ALIGN 64 +cnv2_mainloop_ryzen_asm PROC + INCLUDE cnv2_main_loop_ryzen.inc + ret 0 +cnv2_mainloop_ryzen_asm ENDP + +ALIGN 64 +cnv2_double_mainloop_sandybridge_asm PROC + INCLUDE cnv2_double_main_loop_sandybridge.inc + ret 0 +cnv2_double_mainloop_sandybridge_asm ENDP + +_TEXT_CNV2_MAINLOOP ENDS +END diff --git a/src/crypto/asm/win64/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cnv2_main_loop_ivybridge.inc new file mode 100644 index 00000000..c925ca24 --- /dev/null +++ b/src/crypto/asm/win64/cnv2_main_loop_ivybridge.inc @@ -0,0 +1,186 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 524288 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movd xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movd xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movd xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, rcx + punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] + + ALIGN 16 +main_loop_ivybridge: + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movd xmm0, r11 + movd xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movd rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm2, XMMWORD PTR [rcx+rbx] + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm1, xmm7 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [rax+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movd rcx, xmm3 + mov rax, rcx + shl rax, 32 + xor rdi, rax + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movd rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movd xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movd rdx, xmm3 + test edx, 524287 + je sqrt_fixup_ivybridge + psrlq xmm3, 19 +sqrt_fixup_ivybridge_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movd xmm2, rdx + xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 + movd xmm0, rax + xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm4 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm0 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + movdqu xmm6, [rdi+rbx] + mov r10d, edi + xor r11, r12 + dec rsi + jne main_loop_ivybridge + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp cnv2_main_loop_ivybridge_endp + +sqrt_fixup_ivybridge: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movd xmm3, rdx + jmp sqrt_fixup_ivybridge_ret + +cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/win64/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cnv2_main_loop_ryzen.inc new file mode 100644 index 00000000..d1cd26c4 --- /dev/null +++ b/src/crypto/asm/win64/cnv2_main_loop_ryzen.inc @@ -0,0 +1,179 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movd xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movd xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN 16 +main_loop_ryzen: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movd xmm0, r11 + movd xmm6, r8 + punpcklqdq xmm6, xmm0 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + movd r14, xmm5 + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movd rax, xmm0 + + div r9 + movd xmm0, rax + movd xmm1, rdx + punpckldq xmm0, xmm1 + movd r15, xmm0 + paddq xmm0, xmm5 + movdqa xmm2, xmm0 + psrlq xmm0, 12 + paddq xmm0, xmm7 + sqrtsd xmm1, xmm0 + movd rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_ryzen + shr rdi, 19 + +sqrt_fixup_ryzen_ret: + mov rax, rsi + mul r14 + movd xmm1, rax + movd xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne main_loop_ryzen + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_ryzen_endp + +sqrt_fixup_ryzen: + movd r9, xmm2 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_ryzen_ret + +cnv2_main_loop_ryzen_endp: diff --git a/src/net/Network.cpp b/src/net/Network.cpp index 703e0ccf..828203a1 100644 --- a/src/net/Network.cpp +++ b/src/net/Network.cpp @@ -108,7 +108,7 @@ void Network::onActive(IStrategy *strategy, Client *client) const char *fingerprint = client->tlsFingerprint(); if (fingerprint != nullptr) { - LOG_INFO("\x1B[1;30mfingerprint (SHA-256): \"%s\"", fingerprint); + LOG_INFO("%sfingerprint (SHA-256): \"%s\"", isColors() ? "\x1B[1;30m" : "", fingerprint); } } diff --git a/src/version.h b/src/version.h index 6e5bf153..3c65f25b 100644 --- a/src/version.h +++ b/src/version.h @@ -27,7 +27,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.8.0-rc" +#define APP_VERSION "2.8.1-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2018 xmrig.com" @@ -35,7 +35,7 @@ #define APP_VER_MAJOR 2 #define APP_VER_MINOR 8 -#define APP_VER_PATCH 0 +#define APP_VER_PATCH 1 #ifdef _MSC_VER # if (_MSC_VER >= 1910)