mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-03 17:40:13 +00:00
Conceal (CCX) support
This commit is contained in:
parent
5ea0de2410
commit
7f00cb59d2
14 changed files with 2107 additions and 1913 deletions
|
@ -146,6 +146,7 @@ bool xmrig::CpuWorker<N>::selfTest()
|
|||
verify2(Algorithm::CN_R, test_output_r) &&
|
||||
verify(Algorithm::CN_RWZ, test_output_rwz) &&
|
||||
verify(Algorithm::CN_ZLS, test_output_zls) &&
|
||||
verify(Algorithm::CN_CCX, test_output_ccx) &&
|
||||
verify(Algorithm::CN_DOUBLE, test_output_double);
|
||||
|
||||
return rc;
|
||||
|
|
|
@ -16,16 +16,17 @@
|
|||
#define ALGO_CN_HEAVY_XHV 15
|
||||
#define ALGO_CN_PICO_0 16
|
||||
#define ALGO_CN_PICO_TLO 17
|
||||
#define ALGO_RX_0 18
|
||||
#define ALGO_RX_WOW 19
|
||||
#define ALGO_RX_LOKI 20
|
||||
#define ALGO_RX_ARQMA 21
|
||||
#define ALGO_RX_SFX 22
|
||||
#define ALGO_RX_KEVA 23
|
||||
#define ALGO_AR2_CHUKWA 24
|
||||
#define ALGO_AR2_WRKZ 25
|
||||
#define ALGO_ASTROBWT_DERO 26
|
||||
#define ALGO_KAWPOW_RVN 27
|
||||
#define ALGO_CN_CCX 18
|
||||
#define ALGO_RX_0 19
|
||||
#define ALGO_RX_WOW 20
|
||||
#define ALGO_RX_LOKI 21
|
||||
#define ALGO_RX_ARQMA 22
|
||||
#define ALGO_RX_SFX 23
|
||||
#define ALGO_RX_KEVA 24
|
||||
#define ALGO_AR2_CHUKWA 25
|
||||
#define ALGO_AR2_WRKZ 26
|
||||
#define ALGO_ASTROBWT_DERO 27
|
||||
#define ALGO_KAWPOW_RVN 28
|
||||
|
||||
#define FAMILY_UNKNOWN 0
|
||||
#define FAMILY_CN 1
|
||||
|
|
|
@ -253,11 +253,34 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
{
|
||||
uint idx0 = a[0];
|
||||
|
||||
# if (ALGO == ALGO_CN_CCX)
|
||||
float4 conc_var = (float4)(0.0f);
|
||||
const uint4 conc_t = (uint4)(0x807FFFFFU);
|
||||
const uint4 conc_u = (uint4)(0x40000000U);
|
||||
const uint4 conc_v = (uint4)(0x4DFFFFFFU);
|
||||
# endif
|
||||
|
||||
#pragma unroll CN_UNROLL
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
ulong c[2];
|
||||
|
||||
((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)];
|
||||
|
||||
# if (ALGO == ALGO_CN_CCX)
|
||||
{
|
||||
float4 r = convert_float4_rte(((int4 *)c)[0]) + conc_var;
|
||||
r = r * r * r;
|
||||
r = as_float4((as_uint4(r) & conc_t) | conc_u);
|
||||
|
||||
float4 c_old = conc_var;
|
||||
conc_var += r;
|
||||
|
||||
c_old = as_float4((as_uint4(c_old) & conc_t) | conc_u);
|
||||
|
||||
((int4 *)c)[0] ^= convert_int4_rtz(c_old * as_float4(conc_v));
|
||||
}
|
||||
# endif
|
||||
|
||||
((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
|
||||
|
||||
Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0];
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -127,6 +127,7 @@ static AlgoName const algorithm_names[] = {
|
|||
{ "kawpow", nullptr, Algorithm::KAWPOW_RVN },
|
||||
{ "kawpow/rvn", nullptr, Algorithm::KAWPOW_RVN },
|
||||
# endif
|
||||
{ "cryptonight/ccx", "cn/ccx", Algorithm::CN_CCX },
|
||||
};
|
||||
|
||||
|
||||
|
@ -292,6 +293,7 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
|
|||
case CN_RWZ:
|
||||
case CN_ZLS:
|
||||
case CN_DOUBLE:
|
||||
case CN_CCX:
|
||||
return CN;
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_LITE
|
||||
|
|
|
@ -63,6 +63,7 @@ public:
|
|||
CN_HEAVY_XHV, // "cn-heavy/xhv" CryptoNight-Heavy (modified, Haven Protocol only).
|
||||
CN_PICO_0, // "cn-pico" CryptoNight-Pico
|
||||
CN_PICO_TLO, // "cn-pico/tlo" CryptoNight-Pico (TLO)
|
||||
CN_CCX, // "cn/ccx" Conceal (CCX)
|
||||
RX_0, // "rx/0" RandomX (reference configuration).
|
||||
RX_WOW, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_LOKI, // "rx/loki" RandomXL (Loki).
|
||||
|
|
|
@ -47,15 +47,16 @@ struct CoinName
|
|||
|
||||
|
||||
static CoinName const coin_names[] = {
|
||||
{ "monero", Coin::MONERO },
|
||||
{ "xmr", Coin::MONERO },
|
||||
{ "arqma", Coin::ARQMA },
|
||||
{ "arq", Coin::ARQMA },
|
||||
{ "dero", Coin::DERO },
|
||||
{ "keva", Coin::KEVA },
|
||||
{ "ravencoin", Coin::RAVEN },
|
||||
{ "raven", Coin::RAVEN },
|
||||
{ "rvn", Coin::RAVEN }
|
||||
{ "monero", Coin::MONERO },
|
||||
{ "xmr", Coin::MONERO },
|
||||
{ "arqma", Coin::ARQMA },
|
||||
{ "arq", Coin::ARQMA },
|
||||
{ "dero", Coin::DERO },
|
||||
{ "keva", Coin::KEVA },
|
||||
{ "ravencoin", Coin::RAVEN },
|
||||
{ "raven", Coin::RAVEN },
|
||||
{ "rvn", Coin::RAVEN },
|
||||
{ "conceal", Coin::CONCEAL }
|
||||
};
|
||||
|
||||
|
||||
|
@ -81,6 +82,9 @@ xmrig::Algorithm::Id xmrig::Coin::algorithm(uint8_t blobVersion) const
|
|||
case RAVEN:
|
||||
return Algorithm::KAWPOW_RVN;
|
||||
|
||||
case CONCEAL:
|
||||
return Algorithm::CN_CCX;
|
||||
|
||||
case INVALID:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -43,7 +43,8 @@ public:
|
|||
ARQMA,
|
||||
DERO,
|
||||
KEVA,
|
||||
RAVEN
|
||||
RAVEN,
|
||||
CONCEAL
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@ public:
|
|||
case Algorithm::CN_HEAVY_TUBE:
|
||||
case Algorithm::CN_HEAVY_XHV:
|
||||
# endif
|
||||
case Algorithm::CN_CCX:
|
||||
return CN_ITER / 2;
|
||||
|
||||
case Algorithm::CN_RWZ:
|
||||
|
@ -125,6 +126,7 @@ public:
|
|||
case Algorithm::CN_HEAVY_0:
|
||||
case Algorithm::CN_HEAVY_XHV:
|
||||
# endif
|
||||
case Algorithm::CN_CCX:
|
||||
return Algorithm::CN_0;
|
||||
|
||||
case Algorithm::CN_1:
|
||||
|
@ -168,6 +170,7 @@ template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_XAO>::base() cons
|
|||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_0>::base() const { return Algorithm::CN_0; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_0>::base() const { return Algorithm::CN_0; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_XHV>::base() const { return Algorithm::CN_0; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_CCX>::base() const { return Algorithm::CN_0; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_1>::base() const { return Algorithm::CN_1; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() const { return Algorithm::CN_1; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; }
|
||||
|
@ -188,6 +191,7 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_RWZ>::iterations() con
|
|||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
|
||||
|
||||
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
|
||||
|
|
|
@ -270,6 +270,8 @@ xmrig::CnHash::CnHash()
|
|||
ADD_FN_ASM(Algorithm::CN_PICO_TLO);
|
||||
# endif
|
||||
|
||||
ADD_FN(Algorithm::CN_CCX);
|
||||
|
||||
# ifdef XMRIG_ALGO_ARGON2
|
||||
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
|
||||
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
|
||||
|
|
|
@ -442,6 +442,24 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
|||
}
|
||||
|
||||
|
||||
static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
||||
{
|
||||
__m128 r = _mm_add_ps(_mm_cvtepi32_ps(cx), conc_var);
|
||||
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
|
||||
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
|
||||
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
|
||||
|
||||
__m128 c_old = conc_var;
|
||||
conc_var = _mm_add_ps(conc_var, r);
|
||||
|
||||
c_old = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), c_old);
|
||||
c_old = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), c_old);
|
||||
|
||||
__m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f));
|
||||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
|
@ -475,12 +493,20 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
__m128 conc_var;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
conc_var = _mm_setzero_ps();
|
||||
}
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
}
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
@ -488,7 +514,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx = aes_round_tweak_div(cx, ax0);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
cx = soft_aesenc((uint32_t*)&cx, ax0);
|
||||
}
|
||||
else {
|
||||
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
@ -622,6 +655,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
__m128 conc_var0, conc_var1;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
conc_var0 = _mm_setzero_ps();
|
||||
conc_var1 = _mm_setzero_ps();
|
||||
}
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
|
@ -630,6 +669,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
||||
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
cryptonight_conceal_tweak(cx1, conc_var1);
|
||||
}
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
@ -639,8 +682,18 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx1 = aes_round_tweak_div(cx1, ax1);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
||||
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
cryptonight_conceal_tweak(cx1, conc_var1);
|
||||
cx0 = soft_aesenc((uint32_t*)&cx0, ax0);
|
||||
cx1 = soft_aesenc((uint32_t*)&cx1, ax1);
|
||||
}
|
||||
else {
|
||||
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
|
|
|
@ -67,8 +67,10 @@
|
|||
|
||||
#ifdef _MSC_VER
|
||||
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { _control87(RC_DOWN, MCW_RC); }
|
||||
# define RESTORE_ROUNDING_MODE() _control87(RC_NEAR, MCW_RC);
|
||||
#else
|
||||
# define VARIANT2_SET_ROUNDING_MODE() if (BASE == Algorithm::CN_2) { fesetround(FE_DOWNWARD); }
|
||||
# define RESTORE_ROUNDING_MODE() fesetround(FE_TONEAREST);
|
||||
#endif
|
||||
|
||||
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
|
||||
|
|
|
@ -231,6 +231,20 @@ const static uint8_t test_output_zls[160] = {
|
|||
0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F
|
||||
};
|
||||
|
||||
// "cn/ccx"
|
||||
const static uint8_t test_output_ccx[160] = {
|
||||
0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
|
||||
0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
|
||||
0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
|
||||
0xA5, 0x7C, 0xF9, 0xCA, 0x20, 0x49, 0x9C, 0xFD, 0xE8, 0x43, 0xCF, 0x66, 0x78, 0xEA, 0x76, 0xDD,
|
||||
0x91, 0x0C, 0xDE, 0x29, 0x2A, 0xE0, 0xA8, 0xCA, 0xBC, 0xAA, 0x53, 0x4C, 0x93, 0x3E, 0x7B, 0x2C,
|
||||
0xF1, 0xF9, 0xE1, 0x98, 0xB2, 0x92, 0x1E, 0x19, 0x93, 0x2A, 0x74, 0x9D, 0xDB, 0x10, 0x0F, 0x16,
|
||||
0xD5, 0x3D, 0xE4, 0xC4, 0x23, 0xD9, 0x2E, 0xFD, 0x79, 0x8D, 0x1E, 0x48, 0x4E, 0x46, 0x08, 0x6C,
|
||||
0xFF, 0x8A, 0x49, 0xFA, 0x1E, 0xB0, 0xB6, 0x9A, 0x47, 0x1C, 0xC6, 0x30, 0x36, 0x5D, 0xFD, 0x76,
|
||||
0x10, 0x07, 0x44, 0xE6, 0xC8, 0x20, 0x2A, 0x84, 0x9D, 0x70, 0x22, 0x00, 0x8B, 0x9B, 0xBD, 0x8D,
|
||||
0x27, 0x49, 0xA6, 0x06, 0xDC, 0xF0, 0xA1, 0x4B, 0x50, 0xA0, 0x12, 0xCD, 0x77, 0x01, 0x4C, 0x28
|
||||
};
|
||||
|
||||
// "cn/double"
|
||||
const static uint8_t test_output_double[160] = {
|
||||
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
|
||||
|
|
|
@ -541,6 +541,23 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
|
|||
}
|
||||
|
||||
|
||||
static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
||||
{
|
||||
__m128 r = _mm_add_ps(_mm_cvtepi32_ps(cx), conc_var);
|
||||
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
|
||||
r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
|
||||
r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
|
||||
|
||||
__m128 c_old = conc_var;
|
||||
conc_var = _mm_add_ps(conc_var, r);
|
||||
|
||||
c_old = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), c_old);
|
||||
c_old = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), c_old);
|
||||
|
||||
__m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f));
|
||||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
|
@ -594,10 +611,19 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx0 = _mm_set_epi64x(static_cast<int64_t>(h0[3] ^ h0[7]), static_cast<int64_t>(h0[2] ^ h0[6]));
|
||||
__m128i bx1 = _mm_set_epi64x(static_cast<int64_t>(h0[9] ^ h0[11]), static_cast<int64_t>(h0[8] ^ h0[10]));
|
||||
|
||||
__m128 conc_var;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
conc_var = _mm_setzero_ps();
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
}
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(static_cast<int64_t>(ah0), static_cast<int64_t>(al0));
|
||||
|
@ -605,7 +631,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx = aes_round_tweak_div(cx, ax0);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
}
|
||||
else {
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
@ -971,6 +1004,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
__m128 conc_var0, conc_var1;
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
conc_var0 = _mm_setzero_ps();
|
||||
conc_var1 = _mm_setzero_ps();
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
|
@ -979,6 +1019,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i *>(&l1[idx1 & MASK]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
cryptonight_conceal_tweak(cx1, conc_var1);
|
||||
}
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
@ -988,8 +1032,18 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx1 = aes_round_tweak_div(cx1, ax1);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||
cryptonight_conceal_tweak(cx0, conc_var0);
|
||||
cryptonight_conceal_tweak(cx1, conc_var1);
|
||||
cx0 = soft_aesenc(&cx0, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx1 = soft_aesenc(&cx1, ax1, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
}
|
||||
else {
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
|
@ -1144,9 +1198,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx) \
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
c = _mm_load_si128(ptr);
|
||||
c = _mm_load_si128(ptr); \
|
||||
if (ALGO == Algorithm::CN_CCX) { \
|
||||
cryptonight_conceal_tweak(c, conc_var); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
|
||||
|
@ -1246,6 +1304,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \
|
||||
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
|
||||
__m128i cx##n = _mm_setzero_si128(); \
|
||||
__m128 conc_var##n; \
|
||||
if (ALGO == Algorithm::CN_CCX) { \
|
||||
conc_var##n = _mm_setzero_ps(); \
|
||||
} \
|
||||
VARIANT4_RANDOM_MATH_INIT(n);
|
||||
|
||||
|
||||
|
@ -1285,6 +1347,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
CONST_INIT(ctx[1], 1);
|
||||
CONST_INIT(ctx[2], 2);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
uint64_t idx0, idx1, idx2;
|
||||
idx0 = _mm_cvtsi128_si64(ax0);
|
||||
|
@ -1295,9 +1360,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2;
|
||||
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
|
||||
|
||||
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
|
@ -1359,6 +1424,9 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
CONST_INIT(ctx[2], 2);
|
||||
CONST_INIT(ctx[3], 3);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
uint64_t idx0, idx1, idx2, idx3;
|
||||
idx0 = _mm_cvtsi128_si64(ax0);
|
||||
|
@ -1370,10 +1438,10 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2, *ptr3;
|
||||
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
|
||||
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3, conc_var3);
|
||||
|
||||
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
|
@ -1441,6 +1509,9 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
CONST_INIT(ctx[3], 3);
|
||||
CONST_INIT(ctx[4], 4);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
RESTORE_ROUNDING_MODE();
|
||||
}
|
||||
|
||||
uint64_t idx0, idx1, idx2, idx3, idx4;
|
||||
idx0 = _mm_cvtsi128_si64(ax0);
|
||||
|
@ -1453,11 +1524,11 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
|
||||
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
|
||||
CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
|
||||
CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0, conc_var0);
|
||||
CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1, conc_var1);
|
||||
CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2, conc_var2);
|
||||
CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3, conc_var3);
|
||||
CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4, conc_var4);
|
||||
|
||||
CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
|
||||
|
|
Loading…
Reference in a new issue