Improved performance for cryptonight v7.

This commit is contained in:
XMRig 2018-04-09 21:56:15 +07:00
parent 5b664f3681
commit ccbb78d4e9
2 changed files with 34 additions and 8 deletions

View file

@ -1,9 +1,11 @@
# v2.6.0-beta2
- Improved performance for `cryptonight v7` especially in double hash mode.
- [#499](https://github.com/xmrig/xmrig/issues/499) IPv6 disabled for internal HTTP API by default, was cause issues on some systems.
- Added short aliases for algorithm names: `cn`, `cn-lite` and `cn-heavy`.
- Fixed regressions (v2.6.0-beta1 affected)
- [#494](https://github.com/xmrig/xmrig/issues/494) Command line option `--donate-level` was broken.
- [#502](https://github.com/xmrig/xmrig/issues/502) Build without libmicrohttpd was broken.
- Fixed nonce calculation for `--av 4` (software AES, double hash) was cause reduction of effective hashrate and rejected shares on nicehash.
# v2.6.0-beta1
- [#476](https://github.com/xmrig/xmrig/issues/476) **Added Cryptonight-Heavy support for Sumokoin ASIC resistance fork.**

View file

@ -386,6 +386,22 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
}
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
{
@ -400,7 +416,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
keccak(input, (int) size, ctx->state0, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
@ -423,8 +439,13 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
VARIANT1_1(&l0[idx0 & MASK]);
if (VARIANT > 0) {
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
} else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
}
idx0 = EXTRACT64(cx);
bx0 = cx;
@ -513,10 +534,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
if (VARIANT > 0) {
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
} else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
VARIANT1_1(&l0[idx0 & MASK]);
VARIANT1_1(&l1[idx1 & MASK]);
}
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);