Merge pull request #1717 from SChernykh/evo

Conceal (CCX) support
This commit is contained in:
xmrig 2020-06-07 15:09:19 +07:00 committed by GitHub
commit 0e7bf5913b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 2107 additions and 1913 deletions

View file

@ -146,6 +146,7 @@ bool xmrig::CpuWorker<N>::selfTest()
verify2(Algorithm::CN_R, test_output_r) && verify2(Algorithm::CN_R, test_output_r) &&
verify(Algorithm::CN_RWZ, test_output_rwz) && verify(Algorithm::CN_RWZ, test_output_rwz) &&
verify(Algorithm::CN_ZLS, test_output_zls) && verify(Algorithm::CN_ZLS, test_output_zls) &&
verify(Algorithm::CN_CCX, test_output_ccx) &&
verify(Algorithm::CN_DOUBLE, test_output_double); verify(Algorithm::CN_DOUBLE, test_output_double);
return rc; return rc;

View file

@ -16,16 +16,17 @@
#define ALGO_CN_HEAVY_XHV 15 #define ALGO_CN_HEAVY_XHV 15
#define ALGO_CN_PICO_0 16 #define ALGO_CN_PICO_0 16
#define ALGO_CN_PICO_TLO 17 #define ALGO_CN_PICO_TLO 17
#define ALGO_RX_0 18 #define ALGO_CN_CCX 18
#define ALGO_RX_WOW 19 #define ALGO_RX_0 19
#define ALGO_RX_LOKI 20 #define ALGO_RX_WOW 20
#define ALGO_RX_ARQMA 21 #define ALGO_RX_LOKI 21
#define ALGO_RX_SFX 22 #define ALGO_RX_ARQMA 22
#define ALGO_RX_KEVA 23 #define ALGO_RX_SFX 23
#define ALGO_AR2_CHUKWA 24 #define ALGO_RX_KEVA 24
#define ALGO_AR2_WRKZ 25 #define ALGO_AR2_CHUKWA 25
#define ALGO_ASTROBWT_DERO 26 #define ALGO_AR2_WRKZ 26
#define ALGO_KAWPOW_RVN 27 #define ALGO_ASTROBWT_DERO 27
#define ALGO_KAWPOW_RVN 28
#define FAMILY_UNKNOWN 0 #define FAMILY_UNKNOWN 0
#define FAMILY_CN 1 #define FAMILY_CN 1

View file

@ -253,11 +253,34 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
{ {
uint idx0 = a[0]; uint idx0 = a[0];
# if (ALGO == ALGO_CN_CCX)
float4 conc_var = (float4)(0.0f);
const uint4 conc_t = (uint4)(0x807FFFFFU);
const uint4 conc_u = (uint4)(0x40000000U);
const uint4 conc_v = (uint4)(0x4DFFFFFFU);
# endif
#pragma unroll CN_UNROLL #pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) { for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2]; ulong c[2];
((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)]; ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)];
# if (ALGO == ALGO_CN_CCX)
{
float4 r = convert_float4_rte(((int4 *)c)[0]) + conc_var;
r = r * r * r;
r = as_float4((as_uint4(r) & conc_t) | conc_u);
float4 c_old = conc_var;
conc_var += r;
c_old = as_float4((as_uint4(c_old) & conc_t) | conc_u);
((int4 *)c)[0] ^= convert_int4_rtz(c_old * as_float4(conc_v));
}
# endif
((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]); ((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0]; Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0];

File diff suppressed because it is too large Load diff

View file

@ -127,6 +127,7 @@ static AlgoName const algorithm_names[] = {
{ "kawpow", nullptr, Algorithm::KAWPOW_RVN }, { "kawpow", nullptr, Algorithm::KAWPOW_RVN },
{ "kawpow/rvn", nullptr, Algorithm::KAWPOW_RVN }, { "kawpow/rvn", nullptr, Algorithm::KAWPOW_RVN },
# endif # endif
{ "cryptonight/ccx", "cn/ccx", Algorithm::CN_CCX },
}; };
@ -292,6 +293,7 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
case CN_RWZ: case CN_RWZ:
case CN_ZLS: case CN_ZLS:
case CN_DOUBLE: case CN_DOUBLE:
case CN_CCX:
return CN; return CN;
# ifdef XMRIG_ALGO_CN_LITE # ifdef XMRIG_ALGO_CN_LITE

View file

@ -63,6 +63,7 @@ public:
CN_HEAVY_XHV, // "cn-heavy/xhv" CryptoNight-Heavy (modified, Haven Protocol only). CN_HEAVY_XHV, // "cn-heavy/xhv" CryptoNight-Heavy (modified, Haven Protocol only).
CN_PICO_0, // "cn-pico" CryptoNight-Pico CN_PICO_0, // "cn-pico" CryptoNight-Pico
CN_PICO_TLO, // "cn-pico/tlo" CryptoNight-Pico (TLO) CN_PICO_TLO, // "cn-pico/tlo" CryptoNight-Pico (TLO)
CN_CCX, // "cn/ccx" Conceal (CCX)
RX_0, // "rx/0" RandomX (reference configuration). RX_0, // "rx/0" RandomX (reference configuration).
RX_WOW, // "rx/wow" RandomWOW (Wownero). RX_WOW, // "rx/wow" RandomWOW (Wownero).
RX_LOKI, // "rx/loki" RandomXL (Loki). RX_LOKI, // "rx/loki" RandomXL (Loki).

View file

@ -47,15 +47,16 @@ struct CoinName
static CoinName const coin_names[] = { static CoinName const coin_names[] = {
{ "monero", Coin::MONERO }, { "monero", Coin::MONERO },
{ "xmr", Coin::MONERO }, { "xmr", Coin::MONERO },
{ "arqma", Coin::ARQMA }, { "arqma", Coin::ARQMA },
{ "arq", Coin::ARQMA }, { "arq", Coin::ARQMA },
{ "dero", Coin::DERO }, { "dero", Coin::DERO },
{ "keva", Coin::KEVA }, { "keva", Coin::KEVA },
{ "ravencoin", Coin::RAVEN }, { "ravencoin", Coin::RAVEN },
{ "raven", Coin::RAVEN }, { "raven", Coin::RAVEN },
{ "rvn", Coin::RAVEN } { "rvn", Coin::RAVEN },
{ "conceal", Coin::CONCEAL }
}; };
@ -81,6 +82,9 @@ xmrig::Algorithm::Id xmrig::Coin::algorithm(uint8_t blobVersion) const
case RAVEN: case RAVEN:
return Algorithm::KAWPOW_RVN; return Algorithm::KAWPOW_RVN;
case CONCEAL:
return Algorithm::CN_CCX;
case INVALID: case INVALID:
break; break;
} }

View file

@ -43,7 +43,8 @@ public:
ARQMA, ARQMA,
DERO, DERO,
KEVA, KEVA,
RAVEN RAVEN,
CONCEAL
}; };

View file

@ -79,6 +79,7 @@ public:
case Algorithm::CN_HEAVY_TUBE: case Algorithm::CN_HEAVY_TUBE:
case Algorithm::CN_HEAVY_XHV: case Algorithm::CN_HEAVY_XHV:
# endif # endif
case Algorithm::CN_CCX:
return CN_ITER / 2; return CN_ITER / 2;
case Algorithm::CN_RWZ: case Algorithm::CN_RWZ:
@ -125,6 +126,7 @@ public:
case Algorithm::CN_HEAVY_0: case Algorithm::CN_HEAVY_0:
case Algorithm::CN_HEAVY_XHV: case Algorithm::CN_HEAVY_XHV:
# endif # endif
case Algorithm::CN_CCX:
return Algorithm::CN_0; return Algorithm::CN_0;
case Algorithm::CN_1: case Algorithm::CN_1:
@ -168,6 +170,7 @@ template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_XAO>::base() cons
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_0>::base() const { return Algorithm::CN_0; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_0>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_0>::base() const { return Algorithm::CN_0; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_0>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_XHV>::base() const { return Algorithm::CN_0; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_XHV>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_CCX>::base() const { return Algorithm::CN_0; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_1>::base() const { return Algorithm::CN_1; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_1>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() const { return Algorithm::CN_1; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; } template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; }
@ -188,6 +191,7 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_RWZ>::iterations() con
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; } template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; } template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; } template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; } template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }

View file

@ -270,6 +270,8 @@ xmrig::CnHash::CnHash()
ADD_FN_ASM(Algorithm::CN_PICO_TLO); ADD_FN_ASM(Algorithm::CN_PICO_TLO);
# endif # endif
ADD_FN(Algorithm::CN_CCX);
# ifdef XMRIG_ALGO_ARGON2 # ifdef XMRIG_ALGO_ARGON2
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>; m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>; m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;

View file

@ -442,6 +442,24 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
} }
static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
{
__m128 r = _mm_add_ps(_mm_cvtepi32_ps(cx), conc_var);
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
__m128 c_old = conc_var;
conc_var = _mm_add_ps(conc_var, r);
c_old = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), c_old);
c_old = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), c_old);
__m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f));
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
template<Algorithm::Id ALGO, bool SOFT_AES> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
@ -475,12 +493,20 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128 conc_var;
if (ALGO == Algorithm::CN_CCX) {
conc_var = _mm_setzero_ps();
}
uint64_t idx0 = al0; uint64_t idx0 = al0;
for (size_t i = 0; i < props.iterations(); i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx; __m128i cx;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK])); cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx, conc_var);
}
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
@ -488,7 +514,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = aes_round_tweak_div(cx, ax0); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); if (ALGO == Algorithm::CN_CCX) {
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc((uint32_t*)&cx, ax0);
}
else {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
}
} }
else { else {
cx = _mm_aesenc_si128(cx, ax0); cx = _mm_aesenc_si128(cx, ax0);
@ -622,6 +655,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
__m128 conc_var0, conc_var1;
if (ALGO == Algorithm::CN_CCX) {
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
}
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t idx1 = al1; uint64_t idx1 = al1;
@ -630,6 +669,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
}
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
@ -639,8 +682,18 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = aes_round_tweak_div(cx1, ax1); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); if (ALGO == Algorithm::CN_CCX) {
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1); cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cx0 = soft_aesenc((uint32_t*)&cx0, ax0);
cx1 = soft_aesenc((uint32_t*)&cx1, ax1);
}
else {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
}
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, ax0); cx0 = _mm_aesenc_si128(cx0, ax0);

View file

@ -67,8 +67,10 @@
#ifdef _MSC_VER #ifdef _MSC_VER
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { _control87(RC_DOWN, MCW_RC); } # define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { _control87(RC_DOWN, MCW_RC); }
# define RESTORE_ROUNDING_MODE() _control87(RC_NEAR, MCW_RC);
#else #else
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { fesetround(FE_DOWNWARD); } # define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { fesetround(FE_DOWNWARD); }
# define RESTORE_ROUNDING_MODE() fesetround(FE_TONEAREST);
#endif #endif
# define VARIANT2_INTEGER_MATH(part, cl, cx) \ # define VARIANT2_INTEGER_MATH(part, cl, cx) \

View file

@ -231,6 +231,20 @@ const static uint8_t test_output_zls[160] = {
0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F 0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F
}; };
// "cn/ccx"
const static uint8_t test_output_ccx[160] = {
0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
0xA5, 0x7C, 0xF9, 0xCA, 0x20, 0x49, 0x9C, 0xFD, 0xE8, 0x43, 0xCF, 0x66, 0x78, 0xEA, 0x76, 0xDD,
0x91, 0x0C, 0xDE, 0x29, 0x2A, 0xE0, 0xA8, 0xCA, 0xBC, 0xAA, 0x53, 0x4C, 0x93, 0x3E, 0x7B, 0x2C,
0xF1, 0xF9, 0xE1, 0x98, 0xB2, 0x92, 0x1E, 0x19, 0x93, 0x2A, 0x74, 0x9D, 0xDB, 0x10, 0x0F, 0x16,
0xD5, 0x3D, 0xE4, 0xC4, 0x23, 0xD9, 0x2E, 0xFD, 0x79, 0x8D, 0x1E, 0x48, 0x4E, 0x46, 0x08, 0x6C,
0xFF, 0x8A, 0x49, 0xFA, 0x1E, 0xB0, 0xB6, 0x9A, 0x47, 0x1C, 0xC6, 0x30, 0x36, 0x5D, 0xFD, 0x76,
0x10, 0x07, 0x44, 0xE6, 0xC8, 0x20, 0x2A, 0x84, 0x9D, 0x70, 0x22, 0x00, 0x8B, 0x9B, 0xBD, 0x8D,
0x27, 0x49, 0xA6, 0x06, 0xDC, 0xF0, 0xA1, 0x4B, 0x50, 0xA0, 0x12, 0xCD, 0x77, 0x01, 0x4C, 0x28
};
// "cn/double" // "cn/double"
const static uint8_t test_output_double[160] = { const static uint8_t test_output_double[160] = {
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9, 0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,

View file

@ -541,6 +541,23 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
} }
static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
{
__m128 r = _mm_add_ps(_mm_cvtepi32_ps(cx), conc_var);
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
__m128 c_old = conc_var;
conc_var = _mm_add_ps(conc_var, r);
c_old = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), c_old);
c_old = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), c_old);
__m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f));
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
template<Algorithm::Id ALGO, bool SOFT_AES> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
@ -594,10 +611,19 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx0 = _mm_set_epi64x(static_cast<int64_t>(h0[3] ^ h0[7]), static_cast<int64_t>(h0[2] ^ h0[6])); __m128i bx0 = _mm_set_epi64x(static_cast<int64_t>(h0[3] ^ h0[7]), static_cast<int64_t>(h0[2] ^ h0[6]));
__m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10])); __m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10]));
__m128 conc_var;
if (ALGO == Algorithm::CN_CCX) {
conc_var = _mm_setzero_ps();
RESTORE_ROUNDING_MODE();
}
for (size_t i = 0; i < props.iterations(); i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx; __m128i cx;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK])); cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx, conc_var);
}
} }
const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0)); const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0));
@ -605,7 +631,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = aes_round_tweak_div(cx, ax0); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table)); if (ALGO == Algorithm::CN_CCX) {
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
else {
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
} }
else { else {
cx = _mm_aesenc_si128(cx, ax0); cx = _mm_aesenc_si128(cx, ax0);
@ -971,6 +1004,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
__m128 conc_var0, conc_var1;
if (ALGO == Algorithm::CN_CCX) {
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
RESTORE_ROUNDING_MODE();
}
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t idx1 = al1; uint64_t idx1 = al1;
@ -979,6 +1019,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (IS_CN_HEAVY_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK])); cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK])); cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK]));
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
}
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
@ -988,8 +1032,18 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = aes_round_tweak_div(cx1, ax1); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table)); if (ALGO == Algorithm::CN_CCX) {
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table)); cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cx0 = soft_aesenc(&cx0, ax0, reinterpret_cast<const uint32_t*>(saes_table));
cx1 = soft_aesenc(&cx1, ax1, reinterpret_cast<const uint32_t*>(saes_table));
}
else {
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
}
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, ax0); cx0 = _mm_aesenc_si128(cx0, ax0);
@ -1144,9 +1198,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} }
#define CN_STEP1(a, b0, b1, c, l, ptr, idx) \ #define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
c = _mm_load_si128(ptr); c = _mm_load_si128(ptr); \
if (ALGO == Algorithm::CN_CCX) { \
cryptonight_conceal_tweak(c, conc_var); \
}
#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \ #define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
@ -1246,6 +1304,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \ __m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \ __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
__m128i cx##n = _mm_setzero_si128(); \ __m128i cx##n = _mm_setzero_si128(); \
__m128 conc_var##n; \
if (ALGO == Algorithm::CN_CCX) { \
conc_var##n = _mm_setzero_ps(); \
} \
VARIANT4_RANDOM_MATH_INIT(n); VARIANT4_RANDOM_MATH_INIT(n);
@ -1285,6 +1347,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
CONST_INIT(ctx[1], 1); CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2); CONST_INIT(ctx[2], 2);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) {
RESTORE_ROUNDING_MODE();
}
uint64_t idx0, idx1, idx2; uint64_t idx0, idx1, idx2;
idx0 = _mm_cvtsi128_si64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
@ -1295,9 +1360,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2; __m128i *ptr0, *ptr1, *ptr2;
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
@ -1359,6 +1424,9 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
CONST_INIT(ctx[2], 2); CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3); CONST_INIT(ctx[3], 3);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) {
RESTORE_ROUNDING_MODE();
}
uint64_t idx0, idx1, idx2, idx3; uint64_t idx0, idx1, idx2, idx3;
idx0 = _mm_cvtsi128_si64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
@ -1370,10 +1438,10 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3; __m128i *ptr0, *ptr1, *ptr2, *ptr3;
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3, conc_var3);
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
@ -1441,6 +1509,9 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
CONST_INIT(ctx[3], 3); CONST_INIT(ctx[3], 3);
CONST_INIT(ctx[4], 4); CONST_INIT(ctx[4], 4);
VARIANT2_SET_ROUNDING_MODE(); VARIANT2_SET_ROUNDING_MODE();
if (ALGO == Algorithm::CN_CCX) {
RESTORE_ROUNDING_MODE();
}
uint64_t idx0, idx1, idx2, idx3, idx4; uint64_t idx0, idx1, idx2, idx3, idx4;
idx0 = _mm_cvtsi128_si64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
@ -1453,11 +1524,11 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3, conc_var3);
CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4); CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4, conc_var4);
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);