diff --git a/CHANGELOG.md b/CHANGELOG.md index 4114206c2..043d0f7f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # v0.6.0 - Added automatic cryptonight self test. -- Added support for software AES `--av=4`. Will be automatically selected if cpu not support AES-NI. +- New software AES algorithm variation `--av=4`. Will be automatically selected if cpu not support AES-NI. - Added 32 bit builds. - Documented [algorithm variations](https://github.com/xmrig/xmrig#algorithm-variations). diff --git a/README.md b/README.md index bd151c57e..a861fed78 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of * [Donations](#Donations) ## Features -* High performance, faster than others (290+ H/s on i7 6700). +* High performance (290+ H/s on i7 6700). * Official Windows support. -* Small Windows executable, only 350 KB without dependencies. +* Small Windows executable, only 430 KB without dependencies. * Support for backup (failover) mining server. * keepalived support. * Command line options compatible with cpuminer. diff --git a/algo/cryptonight/cryptonight_av1_aesni.c b/algo/cryptonight/cryptonight_av1_aesni.c index d0252996e..4b1a766c1 100644 --- a/algo/cryptonight/cryptonight_av1_aesni.c +++ b/algo/cryptonight/cryptonight_av1_aesni.c @@ -30,16 +30,6 @@ #include "crypto/c_keccak.h" -#ifndef __BMI2__ -static inline uint64_t _mulx_u64(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#endif - - void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) { keccak((const uint8_t *) input, size, ctx->state, 200); @@ -67,7 +57,7 @@ void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restri uint64_t hi, lo, cl, ch; cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; - lo = _mulx_u64(idx0, cl, &hi); + lo = _umul128(idx0, cl, &hi); al0 += hi; ah0 += lo; diff --git a/algo/cryptonight/cryptonight_av2_aesni_stak.c b/algo/cryptonight/cryptonight_av2_aesni_stak.c index 152a92c47..df96c6510 100644 --- a/algo/cryptonight/cryptonight_av2_aesni_stak.c +++ b/algo/cryptonight/cryptonight_av2_aesni_stak.c @@ -30,16 +30,6 @@ #include "crypto/c_keccak.h" -#ifndef __BMI2__ -static inline uint64_t _mulx_u64(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#endif - - void cryptonight_av2_aesni_stak(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) { keccak((const uint8_t *) input, size, ctx->state, 200); @@ -64,12 +54,10 @@ void cryptonight_av2_aesni_stak(const void *restrict input, size_t size, void *r idx0 = _mm_cvtsi128_si64(cx); bx0 = cx; - _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); - uint64_t hi, lo, cl, ch; cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; - lo = _mulx_u64(idx0, cl, &hi); + lo = _umul128(idx0, cl, &hi); al0 += hi; ah0 += lo; @@ -80,8 +68,6 @@ void cryptonight_av2_aesni_stak(const void *restrict input, size_t size, void *r ah0 ^= ch; al0 ^= cl; idx0 = al0; - - _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); } cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state); diff --git a/algo/cryptonight/cryptonight_p.h b/algo/cryptonight/cryptonight_p.h index 2f533deaa..b7f187658 100644 --- a/algo/cryptonight/cryptonight_p.h +++ b/algo/cryptonight/cryptonight_p.h @@ -212,4 +212,12 @@ inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) } +inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) +{ + unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; + *hi = r >> 64; + return (uint64_t) r; +} + + #endif /* __CRYPTONIGHT_P_H__ */