Fixed ARM mining code.

This commit is contained in:
XMRig 2019-06-15 00:28:16 +07:00
parent 088587fa72
commit b73c204e73
3 changed files with 182 additions and 139 deletions

View file

@ -29,11 +29,11 @@
#include "common/crypto/keccak.h" #include "common/crypto/keccak.h"
#include "crypto/common/portable/mm_malloc.h" #include "crypto/cn/CnAlgo.h"
#include "crypto/cn/CryptoNight_constants.h"
#include "crypto/cn/CryptoNight_monero.h" #include "crypto/cn/CryptoNight_monero.h"
#include "crypto/cn/CryptoNight.h" #include "crypto/cn/CryptoNight.h"
#include "crypto/cn/soft_aes.h" #include "crypto/cn/soft_aes.h"
#include "crypto/common/portable/mm_malloc.h"
extern "C" extern "C"
@ -226,9 +226,14 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
} }
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES> namespace xmrig {
template<Algorithm::Id ALGO, bool SOFT_AES>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
{ {
constexpr CnAlgo<ALGO> props;
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -243,7 +248,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
xin6 = _mm_load_si128(input + 10); xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11); xin7 = _mm_load_si128(input + 11);
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (props.isHeavy()) {
for (size_t i = 0; i < 16; i++) { for (size_t i = 0; i < 16; i++) {
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -260,7 +265,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
} }
} }
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -284,37 +289,17 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
} }
#ifdef XMRIG_ALGO_CN_GPU template<Algorithm::Id ALGO, bool SOFT_AES>
template<xmrig::Algo ALGO, size_t MEM>
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
{
constexpr size_t hash_size = 200; // 25x8 bytes
alignas(16) uint64_t hash[25];
for (uint64_t i = 0; i < MEM / 512; i++)
{
memcpy(hash, input, hash_size);
hash[0] ^= i;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 160);
output += 160;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
}
}
#endif
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{ {
constexpr CnAlgo<ALGO> props;
# ifdef XMRIG_ALGO_CN_GPU
constexpr bool IS_HEAVY = props.isHeavy() || ALGO == Algorithm::CN_GPU;
# else
constexpr bool IS_HEAVY = props.isHeavy();
# endif
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -329,8 +314,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_load_si128(output + 10); xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11); xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
@ -351,13 +335,13 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (IS_HEAVY) {
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
} }
} }
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (IS_HEAVY) {
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
@ -408,6 +392,9 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
} }
} /* namespace xmrig */
static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key) static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
{ {
alignas(16) uint32_t k[4]; alignas(16) uint32_t k[4];
@ -430,13 +417,18 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
} }
template<xmrig::Variant VARIANT, xmrig::Variant BASE> namespace xmrig {
template<Algorithm::Id ALGO>
static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
{ {
constexpr CnAlgo<ALGO> props;
uint64_t* mem_out = (uint64_t*)&l[idx]; uint64_t* mem_out = (uint64_t*)&l[idx];
if (BASE == xmrig::VARIANT_2) { if (props.base() == Algorithm::CN_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else { } else {
__m128i tmp = _mm_xor_si128(bx0, cx); __m128i tmp = _mm_xor_si128(bx0, cx);
@ -446,7 +438,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
uint8_t x = vh >> 24; uint8_t x = vh >> 24;
static const uint16_t table = 0x7531; static const uint16_t table = 0x7531;
const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1; const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28; vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh; mem_out[1] = vh;
@ -454,24 +446,28 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
} }
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr CnAlgo<ALGO> props;
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t MASK = props.mask();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr Algorithm::Id BASE = props.base();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (BASE == xmrig::VARIANT_1 && size < 43) { # ifdef XMRIG_ALGO_CN_HEAVY
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); uint8_t* l0 = ctx[0]->memory;
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
VARIANT1_INIT(0); VARIANT1_INIT(0);
@ -485,14 +481,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
uint64_t idx0 = al0; uint64_t idx0 = al0;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx; __m128i cx;
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
if (VARIANT == xmrig::VARIANT_TUBE) { if (IS_CN_HEAVY_TUBE) {
cx = aes_round_tweak_div(cx, ax0); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
@ -502,8 +498,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_aesenc_si128(cx, ax0); cx = _mm_aesenc_si128(cx, ax0);
} }
if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx0, bx1, cx); cryptonight_monero_tweak<ALGO>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
} }
@ -514,10 +510,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { if (props.isR()) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
} }
@ -528,11 +524,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else { } else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
} }
} }
@ -541,9 +537,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == xmrig::VARIANT_1) { } else if (BASE == Algorithm::CN_1) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
@ -553,7 +549,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
ah0 ^= ch; ah0 ^= ch;
idx0 = al0; idx0 = al0;
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { # ifdef XMRIG_ALGO_CN_HEAVY
if (props.isHeavy()) {
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK])); const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
const int64_t n = vgetq_lane_s64(x, 0); const int64_t n = vgetq_lane_s64(x, 0);
const int32_t d = vgetq_lane_s32(x, 2); const int32_t d = vgetq_lane_s32(x, 2);
@ -561,77 +558,113 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (ALGO == Algorithm::CN_HEAVY_XHV) {
idx0 = (~d) ^ q; idx0 = (~d) ^ q;
} }
else { else {
idx0 = d ^ q; idx0 = d ^ q;
} }
} }
# endif
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
bx1 = bx0; bx1 = bx0;
} }
bx0 = cx; bx0 = cx;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(h0, 24);
xmrig::keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
} }
} /* namespace xmrig */
#ifdef XMRIG_ALGO_CN_GPU #ifdef XMRIG_ALGO_CN_GPU
template<size_t ITER, uint32_t MASK> template<size_t ITER, uint32_t MASK>
void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad); void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad);
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> namespace xmrig {
template<size_t MEM>
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
{
constexpr size_t hash_size = 200; // 25x8 bytes
alignas(16) uint64_t hash[25];
for (uint64_t i = 0; i < MEM / 512; i++) {
memcpy(hash, input, hash_size);
hash[0] ^= i;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 160);
output += 160;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
xmrig::keccakf(hash, 24);
memcpy(output, hash, 176);
output += 176;
}
}
template<xmrig::Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; constexpr CnAlgo<ALGO> props;
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); keccak(input, size, ctx[0]->state);
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]->state, ctx[0]->memory);
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
fesetround(FE_TONEAREST); fesetround(FE_TONEAREST);
cn_gpu_inner_arm<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory); cn_gpu_inner_arm<props.iterations(), props.mask()>(ctx[0]->state, ctx[0]->memory);
cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
memcpy(output, ctx[0]->state, 32); memcpy(output, ctx[0]->state, 32);
} }
} /* namespace xmrig */
#endif #endif
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> namespace xmrig {
template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr CnAlgo<ALGO> props;
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t MASK = props.mask();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr Algorithm::Id BASE = props.base();
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
if (BASE == xmrig::VARIANT_1 && size < 43) { # ifdef XMRIG_ALGO_CN_HEAVY
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
# else
constexpr bool IS_CN_HEAVY_TUBE = false;
# endif
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state); keccak(input + size, size, ctx[1]->state);
const uint8_t* l0 = ctx[0]->memory; uint8_t *l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory; uint8_t *l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t *h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
VARIANT1_INIT(0); VARIANT1_INIT(0);
VARIANT1_INIT(1); VARIANT1_INIT(1);
@ -640,8 +673,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
VARIANT4_RANDOM_MATH_INIT(0); VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1); VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1); cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
uint64_t al0 = h0[0] ^ h0[4]; uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4]; uint64_t al1 = h1[0] ^ h1[4];
@ -656,16 +689,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t idx1 = al1; uint64_t idx1 = al1;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx0, cx1; __m128i cx0, cx1;
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0); const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1); const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (VARIANT == xmrig::VARIANT_TUBE) { if (IS_CN_HEAVY_TUBE) {
cx0 = aes_round_tweak_div(cx0, ax0); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, ax1); cx1 = aes_round_tweak_div(cx1, ax1);
} }
@ -678,9 +711,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_aesenc_si128(cx1, ax1); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) { if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx00, bx01, cx0); cryptonight_monero_tweak<ALGO>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT, BASE>(l1, idx1 & MASK, ax1, bx10, bx11, cx1); cryptonight_monero_tweak<ALGO>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1)); _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
@ -693,10 +726,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { if (props.isR()) {
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
} }
@ -707,11 +740,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else { } else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
} }
} }
@ -720,9 +753,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == xmrig::VARIANT_1) { } else if (BASE == Algorithm::CN_1) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} else { } else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
@ -732,7 +765,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ah0 ^= ch; ah0 ^= ch;
idx0 = al0; idx0 = al0;
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { # ifdef XMRIG_ALGO_CN_HEAVY
if (props.isHeavy()) {
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK])); const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
const int64_t n = vgetq_lane_s64(x, 0); const int64_t n = vgetq_lane_s64(x, 0);
const int32_t d = vgetq_lane_s32(x, 2); const int32_t d = vgetq_lane_s32(x, 2);
@ -740,21 +774,22 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (ALGO == Algorithm::CN_HEAVY_XHV) {
idx0 = (~d) ^ q; idx0 = (~d) ^ q;
} }
else { else {
idx0 = d ^ q; idx0 = d ^ q;
} }
} }
# endif
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { if (props.isR()) {
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32); ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
} }
@ -765,11 +800,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
lo = __umul128(idx1, cl, &hi); lo = __umul128(idx1, cl, &hi);
if (BASE == xmrig::VARIANT_2) { if (BASE == Algorithm::CN_2) {
if (VARIANT == xmrig::VARIANT_4) { if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else { } else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
} }
} }
@ -778,9 +813,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} else if (BASE == xmrig::VARIANT_1) { } else if (BASE == Algorithm::CN_1) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} else { } else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
@ -790,7 +825,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ah1 ^= ch; ah1 ^= ch;
idx1 = al1; idx1 = al1;
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { # ifdef XMRIG_ALGO_CN_HEAVY
if (props.isHeavy()) {
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l1[idx1 & MASK])); const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l1[idx1 & MASK]));
const int64_t n = vgetq_lane_s64(x, 0); const int64_t n = vgetq_lane_s64(x, 0);
const int32_t d = vgetq_lane_s32(x, 2); const int32_t d = vgetq_lane_s32(x, 2);
@ -798,47 +834,54 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
if (VARIANT == xmrig::VARIANT_XHV) { if (ALGO == Algorithm::CN_HEAVY_XHV) {
idx1 = (~d) ^ q; idx1 = (~d) ^ q;
} }
else { else {
idx1 = d ^ q; idx1 = d ^ q;
} }
} }
if (BASE == xmrig::VARIANT_2) { # endif
if (BASE == Algorithm::CN_2) {
bx01 = bx00; bx01 = bx00;
bx11 = bx10; bx11 = bx10;
} }
bx00 = cx0; bx00 = cx0;
bx10 = cx1; bx10 = cx1;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0); cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1); cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
xmrig::keccakf(h0, 24); keccakf(h0, 24);
xmrig::keccakf(h1, 24); keccakf(h1, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
} }
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
} }
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
} }
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
{ {
} }
#endif /* __CRYPTONIGHT_ARM_H__ */
} /* namespace xmrig */
#endif /* XMRIG_CRYPTONIGHT_ARM_H */

View file

@ -141,7 +141,7 @@
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
if (ALGO == Algorithm::CN_4) { \ if (ALGO == Algorithm::CN_R) { \
_c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \ _c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \
} \ } \
} while (0) } while (0)

View file

@ -26,7 +26,7 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "crypto/cn/CryptoNight_constants.h" #include "crypto/cn/CnAlgo.h"
inline void vandq_f32(float32x4_t &v, uint32_t v2) inline void vandq_f32(float32x4_t &v, uint32_t v2)
@ -237,4 +237,4 @@ void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad)
} }
} }
template void cn_gpu_inner_arm<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad); template void cn_gpu_inner_arm<xmrig::CnAlgo<xmrig::Algorithm::CN_GPU>().iterations(), xmrig::CnAlgo<xmrig::Algorithm::CN_GPU>().mask()>(const uint8_t* spad, uint8_t* lpad);