From dc0aee1432ed99fec674c715c668243d48fc9de6 Mon Sep 17 00:00:00 2001
From: SChernykh <sergey.v.chernykh@gmail.com>
Date: Wed, 10 Jun 2020 21:49:43 +0200
Subject: [PATCH] KawPow: fixed crash on old CPUs

- Use `popcnt` instruction only when it's supported
---
 src/backend/cpu/interfaces/ICpuInfo.h     |  1 +
 src/backend/cpu/platform/BasicCpuInfo.cpp |  2 ++
 src/crypto/kawpow/KPHash.cpp              | 27 ++++++++++++++++++++---
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h
index badb44c0d..bc73e75aa 100644
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@@ -61,6 +61,7 @@ public:
         FLAG_SSE2,
         FLAG_SSSE3,
         FLAG_XOP,
+        FLAG_POPCNT,
         FLAG_MAX
     };
 
diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp
index 3c48b10f1..71bfa3b1c 100644
--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -142,6 +142,7 @@ static inline bool has_pdpe1gb()    { return has_feature(PROCESSOR_EXT_INFO,
 static inline bool has_sse2()       { return has_feature(PROCESSOR_INFO,        EDX_Reg, 1 << 26); }
 static inline bool has_ssse3()      { return has_feature(PROCESSOR_INFO,        ECX_Reg, 1 << 9); }
 static inline bool has_xop()        { return has_feature(0x80000001,            ECX_Reg, 1 << 11); }
+static inline bool has_popcnt()     { return has_feature(PROCESSOR_INFO,        ECX_Reg, 1 << 23); }
 
 
 } // namespace xmrig
@@ -176,6 +177,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
     m_flags.set(FLAG_SSE2,    has_sse2());
     m_flags.set(FLAG_SSSE3,   has_ssse3());
     m_flags.set(FLAG_XOP,     has_xop());
+    m_flags.set(FLAG_POPCNT,  has_popcnt());
 
 #   ifdef XMRIG_FEATURE_ASM
     if (hasAES()) {
diff --git a/src/crypto/kawpow/KPHash.cpp b/src/crypto/kawpow/KPHash.cpp
index 243d20afa..fe5873a6d 100644
--- a/src/crypto/kawpow/KPHash.cpp
+++ b/src/crypto/kawpow/KPHash.cpp
@@ -25,6 +25,7 @@
  */
 
 
+#include "backend/cpu/Cpu.h"
 #include "crypto/kawpow/KPHash.h"
 #include "crypto/kawpow/KPCache.h"
 #include "3rdparty/libethash/ethash.h"
@@ -156,7 +157,22 @@ static inline uint32_t popcount(uint32_t a)
 }
 
 
-static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector)
+// Taken from https://en.wikipedia.org/wiki/Hamming_weight
+static inline uint32_t popcount_soft(uint64_t x)
+{
+    constexpr uint64_t m1 = 0x5555555555555555ull;
+    constexpr uint64_t m2 = 0x3333333333333333ull;
+    constexpr uint64_t m4 = 0x0f0f0f0f0f0f0f0full;
+    constexpr uint64_t h01 = 0x0101010101010101ull;
+
+    x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
+    x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
+    x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
+    return (x * h01) >> 56;         //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 
+}
+
+
+static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector, bool has_popcnt)
 {
     switch (selector % 11)
     {
@@ -181,7 +197,10 @@ static inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector)
     case 9:
         return clz(a) + clz(b);
     case 10:
-        return popcount(a) + popcount(b);
+        if (has_popcnt)
+            return popcount(a) + popcount(b);
+        else
+            return popcount_soft(a) + popcount_soft(b);
     default:
 #ifdef _MSC_VER
         __assume(false);
@@ -260,6 +279,8 @@ void KPHash::calculate(const KPCache& light_cache, uint32_t block_height, const
     uint32_t jsr0 = jsr;
     uint32_t jcong0 = jcong;
 
+    const bool has_popcnt = Cpu::info()->has(ICpuInfo::FLAG_POPCNT);
+
     for (uint32_t r = 0; r < ETHASH_ACCESSES; ++r) {
         uint32_t item_index = (mix[r % LANES][0] % num_items) * 4;
 
@@ -302,7 +323,7 @@ void KPHash::calculate(const KPCache& light_cache, uint32_t block_height, const
 
                 for (size_t l = 0; l < LANES; ++l)
                 {
-                    const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1);
+                    const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1, has_popcnt);
                     random_merge(mix[l][dst], data, sel2);
                 }
             }