From b7adb34c3798f42dd6b55a02ff1acb46f82ca5e8 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 21 Apr 2021 13:19:06 +0200 Subject: [PATCH 1/3] Fixed Zen3 asm for cn/upx2 - Invalid rounding mode was used which caused rejected shares sometimes - Also optimized CN implode/explode functions a bit. --- src/crypto/cn/CryptoNight_x86.h | 29 +++++++++++-------- .../asm/cn2/cnv2_upx_double_mainloop_zen3.inc | 2 +- .../cn2/cnv2_upx_double_mainloop_zen3.inc | 2 +- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/crypto/cn/CryptoNight_x86.h b/src/crypto/cn/CryptoNight_x86.h index 25eeb908a..cc88342be 100644 --- a/src/crypto/cn/CryptoNight_x86.h +++ b/src/crypto/cn/CryptoNight_x86.h @@ -377,12 +377,15 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) _mm_store_si128(output + 1, xin1); _mm_store_si128(output + 2, xin2); _mm_store_si128(output + 3, xin3); - output += (64 << interleave) / sizeof(__m128i); - _mm_store_si128(output + 0, xin4); - _mm_store_si128(output + 1, xin5); - _mm_store_si128(output + 2, xin6); - _mm_store_si128(output + 3, xin7); - output += (64 << interleave) / sizeof(__m128i); + + constexpr int output_increment = (64 << interleave) / sizeof(__m128i); + + _mm_store_si128(output + output_increment + 0, xin4); + _mm_store_si128(output + output_increment + 1, xin5); + _mm_store_si128(output + output_increment + 2, xin6); + _mm_store_si128(output + output_increment + 3, xin7); + + output += output_increment * 2; } } @@ -414,13 +417,15 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1); xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2); xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3); - input += (64 << interleave) / sizeof(__m128i); - xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7); - input += (64 << interleave) / sizeof(__m128i); + constexpr int input_increment = (64 << interleave) / sizeof(__m128i); + + xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4); + xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5); + xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6); + xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7); + + input += input_increment * 2; i += 8; if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) { diff --git a/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc b/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc index 4f6b70a04..14222dac0 100644 --- a/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc +++ b/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc @@ -34,7 +34,7 @@ movdqa XMMWORD PTR [rsp+32], xmm0 stmxcsr DWORD PTR [rsp+24] - mov DWORD PTR [rsp+28], 24448 + mov DWORD PTR [rsp+28], 16256 ldmxcsr DWORD PTR [rsp+28] mov rcx, QWORD PTR [rbx+56] diff --git a/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc b/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc index 854fbf111..00fabd6d4 100644 --- a/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc +++ b/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc @@ -34,7 +34,7 @@ movdqa XMMWORD PTR [rsp+32], xmm0 stmxcsr DWORD PTR [rsp+24] - mov DWORD PTR [rsp+28], 24448 + mov DWORD PTR [rsp+28], 16256 ldmxcsr DWORD PTR [rsp+28] mov rcx, QWORD PTR [rbx+56] From c66c593123686fcf0feb16890125751cb53830c8 Mon Sep 17 00:00:00 2001 From: XMRig Date: Wed, 21 Apr 2021 19:51:03 +0700 Subject: [PATCH 2/3] v6.12.1-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index 36619ab29..8497db53a 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.12.0" +#define APP_VERSION "6.12.1-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 6 #define APP_VER_MINOR 12 -#define APP_VER_PATCH 0 +#define APP_VER_PATCH 1 #ifdef _MSC_VER # if (_MSC_VER >= 1920) From 7fd6be7d83d9513a5559dd7a1dab7f32f993eab0 Mon Sep 17 00:00:00 2001 From: xmrig Date: Fri, 23 Apr 2021 18:54:42 +0700 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1020dded..08b6deb76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v6.12.1 +- [#2296](https://github.com/xmrig/xmrig/pull/2296) Fixed Zen3 assembly code for `cn/upx2` algorithm. + # v6.12.0 - [#2276](https://github.com/xmrig/xmrig/pull/2276) Added support for Uplexa (`cn/upx2` algorithm). - [#2261](https://github.com/xmrig/xmrig/pull/2261) Show total hashrate if compiled without OpenCL.