diff --git a/src/Cpu.cpp b/src/Cpu.cpp index b122e1567..a619781e0 100644 --- a/src/Cpu.cpp +++ b/src/Cpu.cpp @@ -100,13 +100,13 @@ void Cpu::initCommon() m_l2_cache = data.l2_cache * (m_totalCores / 2) * m_sockets; m_l2_exclusive = true; } - // Workaround for Intel Core Solo, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue - // These processors have L2 cache shared by 2 cores. - else if (data.vendor == VENDOR_INTEL && data.family == 0x06 && (data.model == 0x0E || data.model == 0x0F || data.model == 0x07)) { - int l2_count_per_socket = m_totalCores > 1 ? m_totalCores / 2 : 1; - m_l2_cache = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0; - } - else{ + // Workaround for Intel Core Solo, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue + // These processors have L2 cache shared by 2 cores. + else if (data.vendor == VENDOR_INTEL && data.family == 0x06 && (data.model == 0x0E || data.model == 0x0F || data.model == 0x07)) { + int l2_count_per_socket = m_totalCores > 1 ? m_totalCores / 2 : 1; + m_l2_cache = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0; + } + else{ m_l2_cache = data.l2_cache > 0 ? data.l2_cache * m_totalCores * m_sockets : 0; } diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 786d28f1b..927aab723 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -324,18 +324,18 @@ inline void cryptonight_hash(const void *__restrict__ input, size_t size, void * uint64_t idx0 = h0[0] ^ h0[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; + __m128i cx; - if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); - } - else { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - } - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; + if (SOFT_AES) { + cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + } + else { + cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + } + _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); + idx0 = EXTRACT64(cx); + bx0 = cx; uint64_t hi, lo, cl, ch; cl = ((uint64_t*) &l0[idx0 & MASK])[0]; @@ -385,19 +385,19 @@ inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx1 = h1[0] ^ h1[4]; - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0, cx1; + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx0, cx1; - if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); - } - else { - cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); - cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); - } + if (SOFT_AES) { + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + } + else { + cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); + } _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h index b7698ac4f..0703f98de 100644 --- a/src/crypto/soft_aes.h +++ b/src/crypto/soft_aes.h @@ -91,32 +91,32 @@ alignas(16) const uint8_t saes_sbox[256] = saes_data(saes_h0); static inline __m128i soft_aesenc(const uint32_t* in, __m128i key) { - const uint32_t x0 = in[0]; - const uint32_t x1 = in[1]; - const uint32_t x2 = in[2]; - const uint32_t x3 = in[3]; + const uint32_t x0 = in[0]; + const uint32_t x1 = in[1]; + const uint32_t x2 = in[2]; + const uint32_t x3 = in[3]; - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); + __m128i out = _mm_set_epi32( + (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), + (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), + (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), + (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - return _mm_xor_si128(out, key); + return _mm_xor_si128(out, key); } static inline uint32_t sub_word(uint32_t key) { - return (saes_sbox[key >> 24 ] << 24) | - (saes_sbox[(key >> 16) & 0xff] << 16 ) | - (saes_sbox[(key >> 8) & 0xff] << 8 ) | - saes_sbox[key & 0xff]; + return (saes_sbox[key >> 24 ] << 24) | + (saes_sbox[(key >> 16) & 0xff] << 16 ) | + (saes_sbox[(key >> 8) & 0xff] << 8 ) | + saes_sbox[key & 0xff]; } #if defined(__clang__) || defined(XMRIG_ARM) static inline uint32_t _rotr(uint32_t value, uint32_t amount) { - return (value >> amount) | (value << ((32 - amount) & 31)); + return (value >> amount) | (value << ((32 - amount) & 31)); } #endif