From baa3384d124d21190bd7cc7bcf1ca1724ff39da6 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sun, 7 Jun 2020 16:16:09 +0200 Subject: [PATCH 1/2] Fixed GCC 10.1 issues - Fixed uninitialized `state->x` warning - Fixed broken code with `-O3` or `-Ofast` --- src/crypto/cn/c_jh.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/crypto/cn/c_jh.c b/src/crypto/cn/c_jh.c index 728f3bbee..9e4e7efdb 100644 --- a/src/crypto/cn/c_jh.c +++ b/src/crypto/cn/c_jh.c @@ -213,16 +213,17 @@ static void E8(hashState *state) /*The compression function F8 */ static void F8(hashState *state) { - uint64 i; + uint64_t* x = (uint64_t*)state->x; + const uint64_t* buf = (uint64*)state->buffer; /*xor the 512-bit message with the fist half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i]; + for (int i = 0; i < 8; ++i) x[i] ^= buf[i]; /*the bijective function E8 */ E8(state); /*xor the 512-bit message with the second half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i]; + for (int i = 0; i < 8; ++i) x[i + 8] ^= buf[i]; } /*before hashing a message, initialize the hash state as H0 */ @@ -240,6 +241,7 @@ static HashReturn Init(hashState *state, int hashbitlen) case 224: memcpy(state->x,JH224_H0,128); break; case 256: memcpy(state->x,JH256_H0,128); break; case 384: memcpy(state->x,JH384_H0,128); break; + default: case 512: memcpy(state->x,JH512_H0,128); break; } From dc0aee1432ed99fec674c715c668243d48fc9de6 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 10 Jun 2020 21:49:43 +0200 Subject: [PATCH 2/2] KawPow: fixed crash on old CPUs - Use `popcnt` instruction only when it's supported --- src/backend/cpu/interfaces/ICpuInfo.h | 1 + src/backend/cpu/platform/BasicCpuInfo.cpp | 2 ++ src/crypto/kawpow/KPHash.cpp | 27 ++++++++++++++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index badb44c0d..bc73e75aa 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -61,6 +61,7 @@ public: FLAG_SSE2, FLAG_SSSE3, FLAG_XOP, + FLAG_POPCNT, FLAG_MAX }; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index 3c48b10f1..71bfa3b1c 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -142,6 +142,7 @@ static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); } static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); } static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); } +static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); } } // namespace xmrig @@ -176,6 +177,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_flags.set(FLAG_SSE2, has_sse2()); m_flags.set(FLAG_SSSE3, has_ssse3()); m_flags.set(FLAG_XOP, has_xop()); + m_flags.set(FLAG_POPCNT, has_popcnt()); # ifdef XMRIG_FEATURE_ASM if (hasAES()) { diff --git a/src/crypto/kawpow/KPHash.cpp b/src/crypto/kawpow/KPHash.cpp index 243d20afa..fe5873a6d 100644 --- a/src/crypto/kawpow/KPHash.cpp +++ b/src/crypto/kawpow/KPHash.cpp @@ -25,6 +25,7 @@ */ +#include "backend/cpu/Cpu.h" #include "crypto/kawpow/KPHash.h" #include "crypto/kawpow/KPCache.h" #include "3rdparty/libethash/ethash.h" @@ -156,7 +157,22 @@ static inline uint32_t popcount(uint32_t a) } -static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector) +// Taken from https://en.wikipedia.org/wiki/Hamming_weight +static inline uint32_t popcount_soft(uint64_t x) +{ + constexpr uint64_t m1 = 0x5555555555555555ull; + constexpr uint64_t m2 = 0x3333333333333333ull; + constexpr uint64_t m4 = 0x0f0f0f0f0f0f0f0full; + constexpr uint64_t h01 = 0x0101010101010101ull; + + x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits + x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits + x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits + return (x * h01) >> 56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... +} + + +static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector, bool has_popcnt) { switch (selector % 11) { @@ -181,7 +197,10 @@ static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector) case 9: return clz(a) + clz(b); case 10: - return popcount(a) + popcount(b); + if (has_popcnt) + return popcount(a) + popcount(b); + else + return popcount_soft(a) + popcount_soft(b); default: #ifdef _MSC_VER __assume(false); @@ -260,6 +279,8 @@ void KPHash::calculate(const KPCache& light_cache, uint32_t block_height, const uint32_t jsr0 = jsr; uint32_t jcong0 = jcong; + const bool has_popcnt = Cpu::info()->has(ICpuInfo::FLAG_POPCNT); + for (uint32_t r = 0; r < ETHASH_ACCESSES; ++r) { uint32_t item_index = (mix[r % LANES][0] % num_items) * 4; @@ -302,7 +323,7 @@ void KPHash::calculate(const KPCache& light_cache, uint32_t block_height, const for (size_t l = 0; l < LANES; ++l) { - const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1); + const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1, has_popcnt); random_merge(mix[l][dst], data, sel2); } }