Cryptonight variant 2 support

Reference code: https://github.com/monero-project/monero/pull/4218
This commit is contained in:
SChernykh 2018-08-28 23:42:15 +02:00
parent f7b029eb05
commit 36a562e1f1
11 changed files with 485 additions and 390 deletions

View file

@ -60,6 +60,7 @@ static AlgoData const algorithms[] = {
{ "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR },
{ "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO },
{ "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO },
{ "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
# ifndef XMRIG_NO_AEON # ifndef XMRIG_NO_AEON
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
@ -81,6 +82,8 @@ static AlgoData const algorithms[] = {
static AlgoData const xmrStakAlgorithms[] = { static AlgoData const xmrStakAlgorithms[] = {
{ "cryptonight-monerov7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, { "cryptonight-monerov7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
{ "cryptonight_v7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, { "cryptonight_v7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
{ "cryptonight-monerov8", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
{ "cryptonight_v8", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
{ "cryptonight_v7_stellite", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, { "cryptonight_v7_stellite", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL },
{ "cryptonight_lite", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 }, { "cryptonight_lite", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 },
{ "cryptonight-aeonv7", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 }, { "cryptonight-aeonv7", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 },
@ -103,7 +106,8 @@ static const char *variants[] = {
"msr", "msr",
"xhv", "xhv",
"xao", "xao",
"rto" "rto",
"2",
}; };

View file

@ -178,7 +178,12 @@ xmrig::Variant Job::variant() const
} }
if (m_algorithm.variant() == xmrig::VARIANT_AUTO) { if (m_algorithm.variant() == xmrig::VARIANT_AUTO) {
return m_algorithm.algo() == xmrig::CRYPTONIGHT_HEAVY ? xmrig::VARIANT_0 : xmrig::VARIANT_1; if (m_algorithm.algo() == xmrig::CRYPTONIGHT_HEAVY) {
return xmrig::VARIANT_0;
} else if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) {
return xmrig::VARIANT_1;
}
return (m_blob[0] >= 8) ? xmrig::VARIANT_2 : xmrig::VARIANT_1;
} }
return m_algorithm.variant(); return m_algorithm.variant();

View file

@ -211,6 +211,7 @@ rapidjson::Value Pool::toJSON(rapidjson::Document &doc) const
case xmrig::VARIANT_AUTO: case xmrig::VARIANT_AUTO:
case xmrig::VARIANT_0: case xmrig::VARIANT_0:
case xmrig::VARIANT_1: case xmrig::VARIANT_1:
case xmrig::VARIANT_2:
obj.AddMember("variant", m_algorithm.variant(), allocator); obj.AddMember("variant", m_algorithm.variant(), allocator);
break; break;
@ -377,6 +378,7 @@ void Pool::rebuild()
m_algorithms.push_back(m_algorithm); m_algorithms.push_back(m_algorithm);
# ifndef XMRIG_PROXY_PROJECT # ifndef XMRIG_PROXY_PROJECT
addVariant(xmrig::VARIANT_2);
addVariant(xmrig::VARIANT_1); addVariant(xmrig::VARIANT_1);
addVariant(xmrig::VARIANT_0); addVariant(xmrig::VARIANT_0);
addVariant(xmrig::VARIANT_XTL); addVariant(xmrig::VARIANT_XTL);

View file

@ -67,6 +67,7 @@ enum Variant {
VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only)
VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only)
VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only)
VARIANT_2 = 8, // CryptoNight variant 2
VARIANT_MAX VARIANT_MAX
}; };

View file

@ -404,19 +404,27 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
} }
template<int SHIFT> template<int VARIANT>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{ {
mem_out[0] = EXTRACT64(tmp); uint64_t* mem_out = (uint64_t*)&l[idx];
uint64_t vh = vgetq_lane_u64(tmp, 1); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
mem_out[0] = EXTRACT64(tmp);
uint8_t x = vh >> 24; uint64_t vh = vgetq_lane_u64(tmp, 1);
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> SHIFT) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh; uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
} }
@ -426,27 +434,29 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
VARIANT1_INIT(0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
VARIANT1_INIT(0);
VARIANT2_INIT(0);
uint64_t al0 = h0[0] ^ h0[4]; uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx0 = al0;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx; __m128i cx;
@ -454,44 +464,47 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx = aes_round_tweak_div(cx, _mm_set_epi64x(ah0, al0)); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
} }
else { else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); cx = _mm_aesenc_si128(cx, ax0);
} }
if (IS_MONERO) { if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
} }
idx0 = EXTRACT64(cx); idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
}
else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; } else if (IS_V1) {
} ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
}
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
@ -514,6 +527,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
idx0 = d ^ q; idx0 = d ^ q;
} }
} }
bx1 = bx0;
bx0 = cx;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
@ -529,9 +544,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@ -539,14 +554,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state); xmrig::keccak(input + size, size, ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory; const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT2_INIT(0);
VARIANT2_INIT(1);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -555,11 +572,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5]; uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx0 = al0;
uint64_t idx1 = h1[0] ^ h1[4]; uint64_t idx1 = al1;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0, cx1; __m128i cx0, cx1;
@ -568,52 +587,53 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx0 = aes_round_tweak_div(cx0, _mm_set_epi64x(ah0, al0)); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, _mm_set_epi64x(ah1, al1)); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (IS_MONERO) { if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); cryptonight_monero_tweak<VARIANT>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
}; }
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; } else if (IS_V1) {
} ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
}
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
@ -639,22 +659,24 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; } else if (IS_V1) {
} ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
else { } else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
}
}
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
} }
@ -677,6 +699,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
idx1 = d ^ q; idx1 = d ^ q;
} }
} }
bx01 = bx00;
bx00 = cx0;
bx11 = bx10;
bx10 = cx1;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);

View file

@ -107,6 +107,7 @@ inline uint32_t cn_select_mask(Algo algorithm)
template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter() { return 0; } template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter() { return 0; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_2>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_MSR_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_MSR_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
@ -150,29 +151,16 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
} }
template<Variant variant> inline constexpr bool cn_is_monero() { return false; } template<Variant variant> inline constexpr bool cn_uses_variant1() { return false; }
template<> inline constexpr bool cn_is_monero<VARIANT_0>() { return false; } template<> inline constexpr bool cn_uses_variant1<VARIANT_0>() { return false; }
template<> inline constexpr bool cn_is_monero<VARIANT_1>() { return true; } template<> inline constexpr bool cn_uses_variant1<VARIANT_1>() { return true; }
template<> inline constexpr bool cn_is_monero<VARIANT_TUBE>() { return true; } template<> inline constexpr bool cn_uses_variant1<VARIANT_TUBE>() { return true; }
template<> inline constexpr bool cn_is_monero<VARIANT_XTL>() { return true; } template<> inline constexpr bool cn_uses_variant1<VARIANT_XTL>() { return true; }
template<> inline constexpr bool cn_is_monero<VARIANT_MSR>() { return true; } template<> inline constexpr bool cn_uses_variant1<VARIANT_MSR>() { return true; }
template<> inline constexpr bool cn_is_monero<VARIANT_XHV>() { return false; } template<> inline constexpr bool cn_uses_variant1<VARIANT_XHV>() { return false; }
template<> inline constexpr bool cn_is_monero<VARIANT_XAO>() { return false; } template<> inline constexpr bool cn_uses_variant1<VARIANT_XAO>() { return false; }
template<> inline constexpr bool cn_is_monero<VARIANT_RTO>() { return true; } template<> inline constexpr bool cn_uses_variant1<VARIANT_RTO>() { return true; }
template<> inline constexpr bool cn_uses_variant1<VARIANT_2>() { return false; }
inline bool cn_is_monero(Variant variant)
{
switch (variant) {
case VARIANT_0:
case VARIANT_XHV:
case VARIANT_RTO:
return false;
default:
return true;
}
}
} /* namespace xmrig */ } /* namespace xmrig */

View file

@ -25,26 +25,27 @@
#ifndef __CRYPTONIGHT_MONERO_H__ #ifndef __CRYPTONIGHT_MONERO_H__
#define __CRYPTONIGHT_MONERO_H__ #define __CRYPTONIGHT_MONERO_H__
#include <math.h>
// VARIANT ALTERATIONS // VARIANT ALTERATIONS
#ifndef XMRIG_ARM #ifndef XMRIG_ARM
# define VARIANT1_INIT(part) \ # define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \ uint64_t tweak1_2_##part = 0; \
if (IS_MONERO) { \ if (IS_V1) { \
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \ tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
*(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \ *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
} }
#else #else
# define VARIANT1_INIT(part) \ # define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \ uint64_t tweak1_2_##part = 0; \
if (IS_MONERO) { \ if (IS_V1) { \
memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \ memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \
tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \ tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
} }
#endif #endif
#define VARIANT1_1(p) \ #define VARIANT1_1(p) \
if (IS_MONERO) { \ if (IS_V1) { \
const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \ const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \
static const uint32_t table = 0x75310; \ static const uint32_t table = 0x75310; \
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
@ -52,9 +53,72 @@
} }
#define VARIANT1_2(p, part) \ #define VARIANT1_2(p, part) \
if (IS_MONERO) { \ if (IS_V1) { \
(p) ^= tweak1_2_##part; \ (p) ^= tweak1_2_##part; \
} }
#ifndef XMRIG_ARM
# define VARIANT2_INIT(part) \
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
#ifdef _MSC_VER
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
#else
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { std::fesetround(FE_DOWNWARD); }
#endif
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
do { \
const uint64_t sqrt_result = static_cast<uint64_t>(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
cl ^= static_cast<uint64_t>(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
const uint64_t division_result = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(division_result)); \
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
do { \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
#else
# define VARIANT2_INIT(part) \
uint64_t division_result_##part = h##part[12]; \
uint64_t sqrt_result_##part = h##part[13];
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
do { \
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
cl ^= division_result_##part ^ (sqrt_result_##part << 32); \
const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
division_result_##part = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
const uint64_t sqrt_input = cx_0 + division_result_##part; \
sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
const uint64_t s = sqrt_result_##part >> 1; \
const uint64_t b = sqrt_result_##part & 1; \
const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \
sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
do { \
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif
#endif /* __CRYPTONIGHT_MONERO_H__ */ #endif /* __CRYPTONIGHT_MONERO_H__ */

View file

@ -69,7 +69,7 @@ const static uint8_t test_output_v0[160] = {
}; };
// Monero v7 // Cryptonight variant 1 (Monero v7)
const static uint8_t test_output_v1[160] = { const static uint8_t test_output_v1[160] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
@ -84,6 +84,21 @@ const static uint8_t test_output_v1[160] = {
}; };
// Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[160] = {
0x6E, 0xEE, 0x53, 0xA3, 0xDA, 0xD1, 0x8C, 0x05, 0xB8, 0xCB, 0x32, 0x17, 0xAA, 0xEA, 0xEA, 0xB4,
0x16, 0x11, 0x01, 0xA9, 0x08, 0x76, 0x37, 0x36, 0x6F, 0xDC, 0xCA, 0xC6, 0x92, 0x0D, 0xEA, 0x09,
0x91, 0x03, 0x2F, 0x5B, 0x27, 0x4D, 0x94, 0x1D, 0x60, 0x50, 0xDC, 0x1F, 0x35, 0x57, 0xEC, 0x20,
0xA6, 0xAC, 0x10, 0xDB, 0xCF, 0x36, 0x23, 0x8F, 0x96, 0xC7, 0x72, 0x8B, 0xF9, 0xE7, 0x30, 0xEB,
0x50, 0x58, 0x4B, 0xFE, 0xAD, 0xC5, 0x13, 0x79, 0x50, 0x98, 0x1C, 0x67, 0xB2, 0xEB, 0xDA, 0x64,
0xD4, 0xAA, 0xC4, 0xE8, 0xE5, 0xC9, 0xE7, 0x6B, 0x84, 0xC2, 0xD2, 0xE9, 0x1F, 0xA1, 0x0F, 0xDF,
0x45, 0x06, 0x80, 0x25, 0x32, 0x6B, 0xC4, 0x66, 0x2A, 0x69, 0x9F, 0x1E, 0x1F, 0x4C, 0xBE, 0x89,
0xFE, 0x61, 0xBB, 0x04, 0x79, 0xB5, 0x3B, 0x45, 0x58, 0xD9, 0x9C, 0x18, 0x7C, 0x48, 0x1B, 0x44,
0x92, 0xC4, 0x4C, 0xD0, 0x8F, 0x16, 0x44, 0x79, 0x71, 0x48, 0x63, 0x0B, 0x51, 0xB6, 0x33, 0x8B,
0x6B, 0x3F, 0xCC, 0x0A, 0x3A, 0x14, 0x3B, 0x49, 0x68, 0x46, 0xB9, 0x46, 0xC6, 0xA3, 0x03, 0x41
};
// Stellite (XTL) // Stellite (XTL)
const static uint8_t test_output_xtl[160] = { const static uint8_t test_output_xtl[160] = {
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3, 0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,

View file

@ -408,20 +408,46 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
} }
template<int SHIFT> static inline __m128i int_sqrt_v2(const uint64_t n0)
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{ {
mem_out[0] = EXTRACT64(tmp); __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
x = _mm_sqrt_sd(_mm_setzero_pd(), x);
uint64_t r = static_cast<uint64_t>(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); const uint64_t s = r >> 20;
uint64_t vh = EXTRACT64(tmp); r >>= 19;
uint8_t x = vh >> 24; uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
static const uint16_t table = 0x7531; #if defined _MSC_VER || (__GNUC__ >= 7)
const uint8_t index = (((x >> SHIFT) & 6) | (x & 1)) << 1; _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
vh ^= ((table >> index) & 0x3) << 28; #else
if (x2 < n0) ++r;
#endif
mem_out[1] = vh; return _mm_cvtsi64_si128(r);
}
template<int VARIANT>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
mem_out[0] = EXTRACT64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp);
uint8_t x = static_cast<uint8_t>(vh >> 24);
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
} }
@ -431,92 +457,104 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
VARIANT1_INIT(0);
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
uint64_t al0 = h0[0] ^ h0[4]; uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t* ptr0 = (uint64_t*) &l0[idx0 & MASK];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx; __m128i cx;
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_load_si128((__m128i *) ptr0);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx = aes_round_tweak_div(cx, _mm_set_epi64x(ah0, al0)); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*) ptr0, ax0);
} }
else { else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); cx = _mm_aesenc_si128(cx, ax0);
} }
if (IS_MONERO) { if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); cryptonight_monero_tweak<VARIANT>(ptr0, l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *) ptr0, _mm_xor_si128(bx0, cx));
} }
idx0 = EXTRACT64(cx); idx0 = EXTRACT64(cx);
bx0 = cx; ptr0 = (uint64_t*) &l0[idx0 & MASK];
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ptr0[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ptr0[1];
lo = __umul128(idx0, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
}
else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ptr0[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ptr0[1] = ah0 ^ tweak1_2_0 ^ al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; } else if (IS_V1) {
} ptr0[1] = ah0 ^ tweak1_2_0;
else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ptr0[1] = ah0;
}
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
al0 ^= cl; al0 ^= cl;
ah0 ^= ch; ah0 ^= ch;
idx0 = al0; idx0 = al0;
ptr0 = (uint64_t*) &l0[idx0 & MASK];
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; int64_t n = ((int64_t*)ptr0)[0];
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; int32_t d = ((int32_t*)ptr0)[2];
int64_t q = n / (d | 0x5); int64_t q = n / (d | 0x5);
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; ((int64_t*) ptr0)[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (VARIANT == xmrig::VARIANT_XHV) {
d = ~d; d = ~d;
} }
idx0 = d ^ q; idx0 = d ^ q;
ptr0 = (uint64_t*)&l0[idx0 & MASK];
} }
bx1 = bx0;
bx0 = cx;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
@ -532,9 +570,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@ -542,14 +580,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state); xmrig::keccak(input + size, size, ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory; const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -558,124 +599,142 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5]; uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t idx1 = al1; uint64_t idx1 = al1;
uint64_t* ptr0 = (uint64_t*)&l0[idx0 & MASK];
uint64_t* ptr1 = (uint64_t*)&l1[idx1 & MASK];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0, cx1; __m128i cx0, cx1;
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i *) ptr0);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) ptr1);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx0 = aes_round_tweak_div(cx0, _mm_set_epi64x(ah0, al0)); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, _mm_set_epi64x(ah1, al1)); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)ptr0, ax0);
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)ptr1, ax1);
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (IS_MONERO) { if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); cryptonight_monero_tweak<VARIANT>(ptr0, l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); cryptonight_monero_tweak<VARIANT>(ptr1, l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) ptr0, _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i *) ptr1, _mm_xor_si128(bx10, cx1));
} }
idx0 = EXTRACT64(cx0); idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1); idx1 = EXTRACT64(cx1);
ptr0 = (uint64_t*)&l0[idx0 & MASK];
bx0 = cx0; ptr1 = (uint64_t*)&l1[idx1 & MASK];
bx1 = cx1;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*)ptr0)[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*)ptr0)[1];
lo = __umul128(idx0, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)ptr0)[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ((uint64_t*)ptr0)[1] = ah0 ^ tweak1_2_0 ^ al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; } else if (IS_V1) {
} ((uint64_t*)ptr0)[1] = ah0 ^ tweak1_2_0;
else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)ptr0)[1] = ah0;
}
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
al0 ^= cl; al0 ^= cl;
ah0 ^= ch; ah0 ^= ch;
idx0 = al0; idx0 = al0;
ptr0 = (uint64_t*)&l0[idx0 & MASK];
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; int64_t n = ((int64_t*)ptr0)[0];
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; int32_t d = ((int32_t*)ptr0)[2];
int64_t q = n / (d | 0x5); int64_t q = n / (d | 0x5);
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; ((int64_t*)ptr0)[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (VARIANT == xmrig::VARIANT_XHV) {
d = ~d; d = ~d;
} }
idx0 = d ^ q; idx0 = d ^ q;
ptr0 = (uint64_t*)&l0[idx0 & MASK];
} }
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ptr1[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ptr1[1];
lo = __umul128(idx1, cl, &hi); if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ptr1[0] = al1;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) { ptr1[1] = ah1 ^ tweak1_2_1 ^ al1;
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; } else if (IS_V1) {
} ptr1[1] = ah1 ^ tweak1_2_1;
else { } else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; ptr1[1] = ah1;
}
}
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
} }
al1 ^= cl; al1 ^= cl;
ah1 ^= ch; ah1 ^= ch;
idx1 = al1; idx1 = al1;
ptr1 = (uint64_t*)&l1[idx1 & MASK];
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; int64_t n = ((int64_t*)ptr1)[0];
int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; int32_t d = ((int32_t*)ptr1)[2];
int64_t q = n / (d | 0x5); int64_t q = n / (d | 0x5);
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; ((int64_t*)ptr1)[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (VARIANT == xmrig::VARIANT_XHV) {
d = ~d; d = ~d;
} }
idx1 = d ^ q; idx1 = d ^ q;
ptr1 = (uint64_t*)&l1[idx1 & MASK];
} }
bx01 = bx00;
bx00 = cx0;
bx11 = bx10;
bx10 = cx1;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@ -689,12 +748,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} }
#define CN_STEP1(a, b, c, l, ptr, idx) \ #define CN_STEP1(a, b0, b1, c, l, ptr, idx) \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
c = _mm_load_si128(ptr); c = _mm_load_si128(ptr);
#define CN_STEP2(a, b, c, l, ptr, idx) \ #define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
if (VARIANT == xmrig::VARIANT_TUBE) { \ if (VARIANT == xmrig::VARIANT_TUBE) { \
c = aes_round_tweak_div(c, a); \ c = aes_round_tweak_div(c, a); \
} \ } \
@ -704,26 +763,31 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
c = _mm_aesenc_si128(c, a); \ c = _mm_aesenc_si128(c, a); \
} \ } \
\ \
b = _mm_xor_si128(b, c); \ if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) { \
\ cryptonight_monero_tweak<VARIANT>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
if (IS_MONERO) { \
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>(reinterpret_cast<uint64_t*>(ptr), b); \
} else { \ } else { \
_mm_store_si128(ptr, b); \ _mm_store_si128(ptr, _mm_xor_si128(b0, c)); \
} }
#define CN_STEP3(a, b, c, l, ptr, idx) \ #define CN_STEP3(part, a, b0, b1, c, l, ptr, idx) \
idx = EXTRACT64(c); \ idx = EXTRACT64(c); \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
b = _mm_load_si128(ptr); uint64_t cl##part = ((uint64_t*)ptr)[0]; \
uint64_t ch##part = ((uint64_t*)ptr)[1];
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \ #define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \
lo = __umul128(idx, EXTRACT64(b), &hi); \ if (VARIANT == xmrig::VARIANT_2) { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
lo = __umul128(idx, cl##part, &hi); \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1); \
} else { \
lo = __umul128(idx, cl##part, &hi); \
} \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
\ \
if (IS_MONERO) { \ if (IS_V1) { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
\ \
if (VARIANT == xmrig::VARIANT_TUBE || \ if (VARIANT == xmrig::VARIANT_TUBE || \
@ -734,7 +798,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
_mm_store_si128(ptr, a); \ _mm_store_si128(ptr, a); \
} \ } \
\ \
a = _mm_xor_si128(a, b); \ a = _mm_xor_si128(a, _mm_set_epi64x(ch##part, cl##part)); \
idx = EXTRACT64(a); \ idx = EXTRACT64(a); \
\ \
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \ if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \
@ -747,15 +811,27 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} \ } \
\ \
idx = d ^ q; \ idx = d ^ q; \
} } \
b1 = b0; \
b0 = c;
#define CONST_INIT(ctx, n) \ #define CONST_INIT(ctx, n) \
__m128i mc##n; \ __m128i mc##n; \
if (IS_MONERO) { \ __m128i division_result_xmm_##n; \
__m128i sqrt_result_xmm_##n; \
if (IS_V1) { \
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \ mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \ *(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
} } \
if (VARIANT == xmrig::VARIANT_2) { \
division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \
sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \
} \
__m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \
__m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
__m128i cx##n = _mm_setzero_si128();
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
@ -764,9 +840,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 3); memset(output, 0, 32 * 3);
return; return;
} }
@ -776,10 +852,6 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -787,58 +859,35 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state); uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); VARIANT2_SET_ROUNDING_MODE();
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2; uint64_t idx0, idx1, idx2;
idx0 = EXTRACT64(ax0); idx0 = EXTRACT64(ax0);
idx1 = EXTRACT64(ax1); idx1 = EXTRACT64(ax1);
idx2 = EXTRACT64(ax2); idx2 = EXTRACT64(ax2);
for (size_t i = 0; i < ITERATIONS / 2; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2; __m128i *ptr0, *ptr1, *ptr2;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
} }
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
@ -855,9 +904,9 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 4); memset(output, 0, 32 * 4);
return; return;
} }
@ -867,11 +916,6 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -881,18 +925,11 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state); uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state); uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); CONST_INIT(ctx[3], 3);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); VARIANT2_SET_ROUNDING_MODE();
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3; uint64_t idx0, idx1, idx2, idx3;
idx0 = EXTRACT64(ax0); idx0 = EXTRACT64(ax0);
@ -900,52 +937,30 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
idx2 = EXTRACT64(ax2); idx2 = EXTRACT64(ax2);
idx3 = EXTRACT64(ax3); idx3 = EXTRACT64(ax3);
for (size_t i = 0; i < ITERATIONS / 2; i++) for (size_t i = 0; i < ITERATIONS; i++)
{ {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3; __m128i *ptr0, *ptr1, *ptr2, *ptr3;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
} }
for (size_t i = 0; i < 4; i++) { for (size_t i = 0; i < 4; i++) {
@ -962,9 +977,9 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_uses_variant1<VARIANT>();
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 5); memset(output, 0, 32 * 5);
return; return;
} }
@ -974,12 +989,6 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
CONST_INIT(ctx[4], 4);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -991,21 +1000,12 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state); uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state); uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); CONST_INIT(ctx[3], 3);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); CONST_INIT(ctx[4], 4);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); VARIANT2_SET_ROUNDING_MODE();
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
__m128i cx4 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3, idx4; uint64_t idx0, idx1, idx2, idx3, idx4;
idx0 = EXTRACT64(ax0); idx0 = EXTRACT64(ax0);
@ -1014,60 +1014,34 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
idx3 = EXTRACT64(ax3); idx3 = EXTRACT64(ax3);
idx4 = EXTRACT64(ax4); idx4 = EXTRACT64(ax4);
for (size_t i = 0; i < ITERATIONS / 2; i++) for (size_t i = 0; i < ITERATIONS; i++)
{ {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4); CN_STEP2(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4); CN_STEP3(4, ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4); CN_STEP4(4, ax4, bx40, bx41, cx4, l4, mc4, ptr4, idx4);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
} }
for (size_t i = 0; i < 5; i++) { for (size_t i = 0; i < 5; i++) {

View file

@ -135,6 +135,17 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_RTO>, cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_RTO>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_RTO>, cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_RTO>,
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_2>,
# ifndef XMRIG_NO_AEON # ifndef XMRIG_NO_AEON
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>, cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>, cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
@ -164,6 +175,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
# else # else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
@ -173,6 +185,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif # endif
# ifndef XMRIG_NO_SUMO # ifndef XMRIG_NO_SUMO
@ -216,6 +229,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
# else # else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
@ -225,6 +239,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif # endif
}; };

View file

@ -55,6 +55,7 @@ bool MultiWorker<N>::selfTest()
if (m_thread->algorithm() == CRYPTONIGHT) { if (m_thread->algorithm() == CRYPTONIGHT) {
return verify(VARIANT_0, test_output_v0) && return verify(VARIANT_0, test_output_v0) &&
verify(VARIANT_1, test_output_v1) && verify(VARIANT_1, test_output_v1) &&
verify(VARIANT_2, test_output_v2) &&
verify(VARIANT_XTL, test_output_xtl) && verify(VARIANT_XTL, test_output_xtl) &&
verify(VARIANT_MSR, test_output_msr) && verify(VARIANT_MSR, test_output_msr) &&
verify(VARIANT_XAO, test_output_xao) && verify(VARIANT_XAO, test_output_xao) &&