|
|
|
@ -306,7 +306,21 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
|
|
|
|
|
namespace xmrig {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
|
|
|
|
template<int interleave>
|
|
|
|
|
static inline constexpr uint64_t interleaved_index(uint64_t k)
|
|
|
|
|
{
|
|
|
|
|
return ((k & ~63ULL) << interleave) | (k & 63);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
|
inline constexpr uint64_t interleaved_index<0>(uint64_t k)
|
|
|
|
|
{
|
|
|
|
|
return k;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
|
|
|
|
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
{
|
|
|
|
|
constexpr CnAlgo<ALGO> props;
|
|
|
|
@ -343,6 +357,11 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
|
|
|
|
if (interleave > 0) {
|
|
|
|
|
_mm_prefetch((const char*)(output), _MM_HINT_T0);
|
|
|
|
|
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
|
|
|
|
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
|
|
|
|
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
|
|
|
@ -354,19 +373,21 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
|
|
|
|
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
|
|
|
|
|
|
|
|
|
_mm_store_si128(output + i + 0, xin0);
|
|
|
|
|
_mm_store_si128(output + i + 1, xin1);
|
|
|
|
|
_mm_store_si128(output + i + 2, xin2);
|
|
|
|
|
_mm_store_si128(output + i + 3, xin3);
|
|
|
|
|
_mm_store_si128(output + i + 4, xin4);
|
|
|
|
|
_mm_store_si128(output + i + 5, xin5);
|
|
|
|
|
_mm_store_si128(output + i + 6, xin6);
|
|
|
|
|
_mm_store_si128(output + i + 7, xin7);
|
|
|
|
|
_mm_store_si128(output + 0, xin0);
|
|
|
|
|
_mm_store_si128(output + 1, xin1);
|
|
|
|
|
_mm_store_si128(output + 2, xin2);
|
|
|
|
|
_mm_store_si128(output + 3, xin3);
|
|
|
|
|
output += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
_mm_store_si128(output + 0, xin4);
|
|
|
|
|
_mm_store_si128(output + 1, xin5);
|
|
|
|
|
_mm_store_si128(output + 2, xin6);
|
|
|
|
|
_mm_store_si128(output + 3, xin7);
|
|
|
|
|
output += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
|
|
|
|
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
{
|
|
|
|
|
constexpr CnAlgo<ALGO> props;
|
|
|
|
@ -387,15 +408,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
xout6 = _mm_load_si128(output + 10);
|
|
|
|
|
xout7 = _mm_load_si128(output + 11);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
|
|
|
|
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
|
|
|
|
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
|
|
|
|
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
|
|
|
|
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
|
|
|
|
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
|
|
|
|
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
|
|
|
|
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
|
|
|
|
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
|
|
|
|
const __m128i* input_begin = input;
|
|
|
|
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
|
|
|
|
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
|
|
|
|
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
|
|
|
|
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
|
|
|
|
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
|
|
|
|
input += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
|
|
|
|
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
|
|
|
|
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
|
|
|
|
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
|
|
|
|
input += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
|
|
|
|
|
i += 8;
|
|
|
|
|
|
|
|
|
|
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
|
|
|
|
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
|
|
|
|
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
|
|
|
|
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
|
|
|
@ -414,15 +445,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (IS_HEAVY) {
|
|
|
|
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
|
|
|
|
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
|
|
|
|
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
|
|
|
|
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
|
|
|
|
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
|
|
|
|
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
|
|
|
|
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
|
|
|
|
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
|
|
|
|
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
|
|
|
|
input = input_begin;
|
|
|
|
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
|
|
|
|
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
|
|
|
|
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
|
|
|
|
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
|
|
|
|
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
|
|
|
|
input += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
|
|
|
|
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
|
|
|
|
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
|
|
|
|
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
|
|
|
|
input += (64 << interleave) / sizeof(__m128i);
|
|
|
|
|
|
|
|
|
|
i += 8;
|
|
|
|
|
|
|
|
|
|
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
|
|
|
|
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
|
|
|
|
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
|
|
|
|
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
|
|
|
@ -558,7 +599,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
|
|
|
|
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
|
|
|
|
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
|
|
|
|
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
|
|
|
|
{
|
|
|
|
|
constexpr CnAlgo<ALGO> props;
|
|
|
|
@ -577,7 +618,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
keccak(input, size, ctx[0]->state);
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
|
|
|
|
|
|
|
|
|
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
|
|
|
|
uint8_t *l0 = ctx[0]->memory;
|
|
|
|
@ -620,7 +661,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
for (size_t i = 0; i < props.iterations(); i++) {
|
|
|
|
|
__m128i cx;
|
|
|
|
|
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
|
|
|
|
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
|
|
|
|
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
|
|
|
|
if (ALGO == Algorithm::CN_CCX) {
|
|
|
|
|
cryptonight_conceal_tweak(cx, conc_var);
|
|
|
|
|
}
|
|
|
|
@ -632,12 +673,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
}
|
|
|
|
|
else if (SOFT_AES) {
|
|
|
|
|
if (ALGO == Algorithm::CN_CCX) {
|
|
|
|
|
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
|
|
|
|
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
|
|
|
|
cryptonight_conceal_tweak(cx, conc_var);
|
|
|
|
|
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
|
|
|
|
cx = soft_aesenc(&l0[interleaved_index<interleave>(idx0 & MASK)], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
@ -645,16 +686,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
|
|
|
|
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
|
|
|
|
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
|
|
|
|
} else {
|
|
|
|
|
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[idx0 & MASK]), _mm_xor_si128(bx0, cx));
|
|
|
|
|
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx0 = static_cast<uint64_t>(_mm_cvtsi128_si64(cx));
|
|
|
|
|
|
|
|
|
|
uint64_t hi, lo, cl, ch;
|
|
|
|
|
cl = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[0];
|
|
|
|
|
ch = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[1];
|
|
|
|
|
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
|
|
|
|
|
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
|
|
|
|
|
|
|
|
|
|
if (BASE == Algorithm::CN_2) {
|
|
|
|
|
if (props.isR()) {
|
|
|
|
@ -681,14 +722,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
al0 += hi;
|
|
|
|
|
ah0 += lo;
|
|
|
|
|
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[0] = al0;
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
|
|
|
|
|
|
|
|
|
|
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
|
|
|
|
} else if (BASE == Algorithm::CN_1) {
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
|
|
|
|
|
} else {
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0;
|
|
|
|
|
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
al0 ^= cl;
|
|
|
|
@ -697,11 +738,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
|
|
|
|
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
|
|
|
|
if (props.isHeavy()) {
|
|
|
|
|
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
|
|
|
|
|
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
|
|
|
|
|
int64_t n = ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0];
|
|
|
|
|
int32_t d = ((int32_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[2];
|
|
|
|
|
int64_t q = n / (d | 0x5);
|
|
|
|
|
|
|
|
|
|
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
|
|
|
|
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
|
|
|
|
|
|
|
|
|
|
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
|
|
|
|
d = ~d;
|
|
|
|
@ -722,7 +763,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
}
|
|
|
|
|
# endif
|
|
|
|
|
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
|
|
|
|
keccakf(h0, 24);
|
|
|
|
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
|
|
|
|
}
|
|
|
|
@ -810,7 +851,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
keccak(input, size, ctx[0]->state);
|
|
|
|
|
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
|
|
|
|
|
|
|
|
|
if (ALGO == Algorithm::CN_2) {
|
|
|
|
|
if (ASM == Assembly::INTEL) {
|
|
|
|
@ -887,7 +928,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|
|
|
|
ctx[0]->generated_code(ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
|
|
|
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
|
|
|
|
}
|
|
|
|
@ -909,8 +950,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|
|
|
|
keccak(input, size, ctx[0]->state);
|
|
|
|
|
keccak(input + size, size, ctx[1]->state);
|
|
|
|
|
|
|
|
|
|
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
|
|
|
|
|
|
|
|
|
if (ALGO == Algorithm::CN_2) {
|
|
|
|
|
cnv2_double_mainloop_sandybridge_asm(ctx);
|
|
|
|
@ -939,8 +980,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|
|
|
|
ctx[0]->generated_code(ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
|
|
|
|
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
|
|
|
@ -991,8 +1032,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
VARIANT4_RANDOM_MATH_INIT(0);
|
|
|
|
|
VARIANT4_RANDOM_MATH_INIT(1);
|
|
|
|
|
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
|
|
|
|
|
|
|
|
|
uint64_t al0 = h0[0] ^ h0[4];
|
|
|
|
|
uint64_t al1 = h1[0] ^ h1[4];
|
|
|
|
@ -1187,8 +1228,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
bx10 = cx1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
|
|
|
|
|
|
|
|
|
keccakf(h0, 24);
|
|
|
|
|
keccakf(h1, 24);
|
|
|
|
@ -1333,7 +1374,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 3; i++) {
|
|
|
|
|
keccak(input + size * i, size, ctx[i]->state);
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t* l0 = ctx[0]->memory;
|
|
|
|
@ -1378,7 +1419,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 3; i++) {
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
|
|
|
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
|
|
|
|
}
|
|
|
|
@ -1407,7 +1448,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 4; i++) {
|
|
|
|
|
keccak(input + size * i, size, ctx[i]->state);
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t* l0 = ctx[0]->memory;
|
|
|
|
@ -1460,7 +1501,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 4; i++) {
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
|
|
|
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
|
|
|
|
}
|
|
|
|
@ -1489,7 +1530,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 5; i++) {
|
|
|
|
|
keccak(input + size * i, size, ctx[i]->state);
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t* l0 = ctx[0]->memory;
|
|
|
|
@ -1550,7 +1591,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 5; i++) {
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
|
|
|
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
|
|
|
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
|
|
|
|
}
|
|
|
|
|