diff --git a/CHANGELOG.md b/CHANGELOG.md index bcaf27c7a..95575c3df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,11 @@ # v2.6.0-beta2 +- Improved performance for `cryptonight v7` especially in double hash mode. - [#499](https://github.com/xmrig/xmrig/issues/499) IPv6 disabled for internal HTTP API by default, was cause issues on some systems. - Added short aliases for algorithm names: `cn`, `cn-lite` and `cn-heavy`. - Fixed regressions (v2.6.0-beta1 affected) - [#494](https://github.com/xmrig/xmrig/issues/494) Command line option `--donate-level` was broken. - - [#502](https://github.com/xmrig/xmrig/issues/502) Build without libmicrohttpd was broken. + - [#502](https://github.com/xmrig/xmrig/issues/502) Build without libmicrohttpd was broken. + - Fixed nonce calculation for `--av 4` (software AES, double hash) was cause reduction of effective hashrate and rejected shares on nicehash. # v2.6.0-beta1 - [#476](https://github.com/xmrig/xmrig/issues/476) **Added Cryptonight-Heavy support for Sumokoin ASIC resistance fork.** diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 414a1f7fb..417404a6d 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -386,6 +386,22 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) } +static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = EXTRACT64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = EXTRACT64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + + template inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx) { @@ -400,7 +416,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si keccak(input, (int) size, ctx->state0, 200); - VARIANT1_INIT(0); + VARIANT1_INIT(0) cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); @@ -423,8 +439,13 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); } - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - VARIANT1_1(&l0[idx0 & MASK]); + + if (VARIANT > 0) { + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + } else { + _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + } + idx0 = EXTRACT64(cx); bx0 = cx; @@ -513,10 +534,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); } - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); - VARIANT1_1(&l0[idx0 & MASK]); - VARIANT1_1(&l1[idx1 & MASK]); + if (VARIANT > 0) { + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); + } else { + _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); + _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); + } idx0 = EXTRACT64(cx0); idx1 = EXTRACT64(cx1);