mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 20:19:23 +00:00
Fixed ARM mining code.
This commit is contained in:
parent
088587fa72
commit
b73c204e73
3 changed files with 182 additions and 139 deletions
|
@ -29,11 +29,11 @@
|
||||||
|
|
||||||
|
|
||||||
#include "common/crypto/keccak.h"
|
#include "common/crypto/keccak.h"
|
||||||
#include "crypto/common/portable/mm_malloc.h"
|
#include "crypto/cn/CnAlgo.h"
|
||||||
#include "crypto/cn/CryptoNight_constants.h"
|
|
||||||
#include "crypto/cn/CryptoNight_monero.h"
|
#include "crypto/cn/CryptoNight_monero.h"
|
||||||
#include "crypto/cn/CryptoNight.h"
|
#include "crypto/cn/CryptoNight.h"
|
||||||
#include "crypto/cn/soft_aes.h"
|
#include "crypto/cn/soft_aes.h"
|
||||||
|
#include "crypto/common/portable/mm_malloc.h"
|
||||||
|
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -226,9 +226,14 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
{
|
{
|
||||||
|
constexpr CnAlgo<ALGO> props;
|
||||||
|
|
||||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||||
|
|
||||||
|
@ -243,7 +248,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
xin6 = _mm_load_si128(input + 10);
|
xin6 = _mm_load_si128(input + 10);
|
||||||
xin7 = _mm_load_si128(input + 11);
|
xin7 = _mm_load_si128(input + 11);
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
if (props.isHeavy()) {
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
|
@ -260,7 +265,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
|
@ -284,37 +289,17 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_ALGO_CN_GPU
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
template<xmrig::Algo ALGO, size_t MEM>
|
|
||||||
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
|
|
||||||
{
|
|
||||||
constexpr size_t hash_size = 200; // 25x8 bytes
|
|
||||||
alignas(16) uint64_t hash[25];
|
|
||||||
|
|
||||||
for (uint64_t i = 0; i < MEM / 512; i++)
|
|
||||||
{
|
|
||||||
memcpy(hash, input, hash_size);
|
|
||||||
hash[0] ^= i;
|
|
||||||
|
|
||||||
xmrig::keccakf(hash, 24);
|
|
||||||
memcpy(output, hash, 160);
|
|
||||||
output += 160;
|
|
||||||
|
|
||||||
xmrig::keccakf(hash, 24);
|
|
||||||
memcpy(output, hash, 176);
|
|
||||||
output += 176;
|
|
||||||
|
|
||||||
xmrig::keccakf(hash, 24);
|
|
||||||
memcpy(output, hash, 176);
|
|
||||||
output += 176;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
|
|
||||||
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
{
|
{
|
||||||
|
constexpr CnAlgo<ALGO> props;
|
||||||
|
|
||||||
|
# ifdef XMRIG_ALGO_CN_GPU
|
||||||
|
constexpr bool IS_HEAVY = props.isHeavy() || ALGO == Algorithm::CN_GPU;
|
||||||
|
# else
|
||||||
|
constexpr bool IS_HEAVY = props.isHeavy();
|
||||||
|
# endif
|
||||||
|
|
||||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||||
|
|
||||||
|
@ -329,8 +314,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
xout6 = _mm_load_si128(output + 10);
|
xout6 = _mm_load_si128(output + 10);
|
||||||
xout7 = _mm_load_si128(output + 11);
|
xout7 = _mm_load_si128(output + 11);
|
||||||
|
|
||||||
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||||
{
|
|
||||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||||
|
@ -351,13 +335,13 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
if (IS_HEAVY) {
|
||||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
if (IS_HEAVY) {
|
||||||
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
|
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||||
|
@ -408,6 +392,9 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} /* namespace xmrig */
|
||||||
|
|
||||||
|
|
||||||
static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
|
static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
|
||||||
{
|
{
|
||||||
alignas(16) uint32_t k[4];
|
alignas(16) uint32_t k[4];
|
||||||
|
@ -430,13 +417,18 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Variant VARIANT, xmrig::Variant BASE>
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
template<Algorithm::Id ALGO>
|
||||||
static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
|
static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
|
||||||
{
|
{
|
||||||
|
constexpr CnAlgo<ALGO> props;
|
||||||
|
|
||||||
uint64_t* mem_out = (uint64_t*)&l[idx];
|
uint64_t* mem_out = (uint64_t*)&l[idx];
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (props.base() == Algorithm::CN_2) {
|
||||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||||
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
||||||
} else {
|
} else {
|
||||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||||
|
@ -446,7 +438,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
||||||
|
|
||||||
uint8_t x = vh >> 24;
|
uint8_t x = vh >> 24;
|
||||||
static const uint16_t table = 0x7531;
|
static const uint16_t table = 0x7531;
|
||||||
const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
|
const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
|
||||||
vh ^= ((table >> index) & 0x3) << 28;
|
vh ^= ((table >> index) & 0x3) << 28;
|
||||||
|
|
||||||
mem_out[1] = vh;
|
mem_out[1] = vh;
|
||||||
|
@ -454,24 +446,28 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
constexpr CnAlgo<ALGO> props;
|
||||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
|
constexpr size_t MASK = props.mask();
|
||||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
constexpr Algorithm::Id BASE = props.base();
|
||||||
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
|
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 && size < 43) {
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
|
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||||
|
# else
|
||||||
|
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||||
memset(output, 0, 32);
|
memset(output, 0, 32);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
xmrig::keccak(input, size, ctx[0]->state);
|
keccak(input, size, ctx[0]->state);
|
||||||
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||||
|
|
||||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
uint8_t* l0 = ctx[0]->memory;
|
||||||
|
|
||||||
const uint8_t* l0 = ctx[0]->memory;
|
|
||||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||||
|
|
||||||
VARIANT1_INIT(0);
|
VARIANT1_INIT(0);
|
||||||
|
@ -485,14 +481,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
uint64_t idx0 = al0;
|
uint64_t idx0 = al0;
|
||||||
|
|
||||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
for (size_t i = 0; i < props.iterations(); i++) {
|
||||||
__m128i cx;
|
__m128i cx;
|
||||||
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
|
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||||
}
|
}
|
||||||
|
|
||||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
if (VARIANT == xmrig::VARIANT_TUBE) {
|
if (IS_CN_HEAVY_TUBE) {
|
||||||
cx = aes_round_tweak_div(cx, ax0);
|
cx = aes_round_tweak_div(cx, ax0);
|
||||||
}
|
}
|
||||||
else if (SOFT_AES) {
|
else if (SOFT_AES) {
|
||||||
|
@ -502,8 +498,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
cx = _mm_aesenc_si128(cx, ax0);
|
cx = _mm_aesenc_si128(cx, ax0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||||
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
cryptonight_monero_tweak<ALGO>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||||
} else {
|
} else {
|
||||||
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
|
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
|
||||||
}
|
}
|
||||||
|
@ -514,10 +510,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
||||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
|
if (props.isR()) {
|
||||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
}
|
}
|
||||||
|
@ -528,11 +524,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
lo = __umul128(idx0, cl, &hi);
|
lo = __umul128(idx0, cl, &hi);
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -541,9 +537,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
|
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||||
} else if (BASE == xmrig::VARIANT_1) {
|
} else if (BASE == Algorithm::CN_1) {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||||
} else {
|
} else {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
|
||||||
|
@ -553,7 +549,8 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
ah0 ^= ch;
|
ah0 ^= ch;
|
||||||
idx0 = al0;
|
idx0 = al0;
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
|
if (props.isHeavy()) {
|
||||||
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
|
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
|
||||||
const int64_t n = vgetq_lane_s64(x, 0);
|
const int64_t n = vgetq_lane_s64(x, 0);
|
||||||
const int32_t d = vgetq_lane_s32(x, 2);
|
const int32_t d = vgetq_lane_s32(x, 2);
|
||||||
|
@ -561,77 +558,113 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
||||||
|
|
||||||
if (VARIANT == xmrig::VARIANT_XHV) {
|
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||||
idx0 = (~d) ^ q;
|
idx0 = (~d) ^ q;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
idx0 = d ^ q;
|
idx0 = d ^ q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
bx1 = bx0;
|
bx1 = bx0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bx0 = cx;
|
bx0 = cx;
|
||||||
}
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||||
|
keccakf(h0, 24);
|
||||||
xmrig::keccakf(h0, 24);
|
|
||||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} /* namespace xmrig */
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_ALGO_CN_GPU
|
#ifdef XMRIG_ALGO_CN_GPU
|
||||||
template<size_t ITER, uint32_t MASK>
|
template<size_t ITER, uint32_t MASK>
|
||||||
void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad);
|
void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad);
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
template<size_t MEM>
|
||||||
|
void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
|
||||||
|
{
|
||||||
|
constexpr size_t hash_size = 200; // 25x8 bytes
|
||||||
|
alignas(16) uint64_t hash[25];
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < MEM / 512; i++) {
|
||||||
|
memcpy(hash, input, hash_size);
|
||||||
|
hash[0] ^= i;
|
||||||
|
|
||||||
|
xmrig::keccakf(hash, 24);
|
||||||
|
memcpy(output, hash, 160);
|
||||||
|
output += 160;
|
||||||
|
|
||||||
|
xmrig::keccakf(hash, 24);
|
||||||
|
memcpy(output, hash, 176);
|
||||||
|
output += 176;
|
||||||
|
|
||||||
|
xmrig::keccakf(hash, 24);
|
||||||
|
memcpy(output, hash, 176);
|
||||||
|
output += 176;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<xmrig::Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK;
|
constexpr CnAlgo<ALGO> props;
|
||||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
|
|
||||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
|
||||||
|
|
||||||
static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
|
keccak(input, size, ctx[0]->state);
|
||||||
|
cn_explode_scratchpad_gpu<props.memory()>(ctx[0]->state, ctx[0]->memory);
|
||||||
xmrig::keccak(input, size, ctx[0]->state);
|
|
||||||
cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
|
|
||||||
|
|
||||||
fesetround(FE_TONEAREST);
|
fesetround(FE_TONEAREST);
|
||||||
|
|
||||||
cn_gpu_inner_arm<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
|
cn_gpu_inner_arm<props.iterations(), props.mask()>(ctx[0]->state, ctx[0]->memory);
|
||||||
|
|
||||||
cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||||
|
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||||
xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
|
|
||||||
memcpy(output, ctx[0]->state, 32);
|
memcpy(output, ctx[0]->state, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} /* namespace xmrig */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
constexpr CnAlgo<ALGO> props;
|
||||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
|
constexpr size_t MASK = props.mask();
|
||||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
constexpr Algorithm::Id BASE = props.base();
|
||||||
constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
|
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 && size < 43) {
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
|
constexpr bool IS_CN_HEAVY_TUBE = ALGO == Algorithm::CN_HEAVY_TUBE;
|
||||||
|
# else
|
||||||
|
constexpr bool IS_CN_HEAVY_TUBE = false;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||||
memset(output, 0, 64);
|
memset(output, 0, 64);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
xmrig::keccak(input, size, ctx[0]->state);
|
keccak(input, size, ctx[0]->state);
|
||||||
xmrig::keccak(input + size, size, ctx[1]->state);
|
keccak(input + size, size, ctx[1]->state);
|
||||||
|
|
||||||
const uint8_t* l0 = ctx[0]->memory;
|
uint8_t *l0 = ctx[0]->memory;
|
||||||
const uint8_t* l1 = ctx[1]->memory;
|
uint8_t *l1 = ctx[1]->memory;
|
||||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
uint64_t *h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||||
|
|
||||||
VARIANT1_INIT(0);
|
VARIANT1_INIT(0);
|
||||||
VARIANT1_INIT(1);
|
VARIANT1_INIT(1);
|
||||||
|
@ -640,8 +673,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
VARIANT4_RANDOM_MATH_INIT(0);
|
VARIANT4_RANDOM_MATH_INIT(0);
|
||||||
VARIANT4_RANDOM_MATH_INIT(1);
|
VARIANT4_RANDOM_MATH_INIT(1);
|
||||||
|
|
||||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
|
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||||
|
|
||||||
uint64_t al0 = h0[0] ^ h0[4];
|
uint64_t al0 = h0[0] ^ h0[4];
|
||||||
uint64_t al1 = h1[0] ^ h1[4];
|
uint64_t al1 = h1[0] ^ h1[4];
|
||||||
|
@ -656,16 +689,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
uint64_t idx0 = al0;
|
uint64_t idx0 = al0;
|
||||||
uint64_t idx1 = al1;
|
uint64_t idx1 = al1;
|
||||||
|
|
||||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
for (size_t i = 0; i < props.iterations(); i++) {
|
||||||
__m128i cx0, cx1;
|
__m128i cx0, cx1;
|
||||||
if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
|
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||||
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
||||||
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||||
if (VARIANT == xmrig::VARIANT_TUBE) {
|
if (IS_CN_HEAVY_TUBE) {
|
||||||
cx0 = aes_round_tweak_div(cx0, ax0);
|
cx0 = aes_round_tweak_div(cx0, ax0);
|
||||||
cx1 = aes_round_tweak_div(cx1, ax1);
|
cx1 = aes_round_tweak_div(cx1, ax1);
|
||||||
}
|
}
|
||||||
|
@ -678,9 +711,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) {
|
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||||
cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
|
cryptonight_monero_tweak<ALGO>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
|
||||||
cryptonight_monero_tweak<VARIANT, BASE>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
|
cryptonight_monero_tweak<ALGO>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
|
||||||
} else {
|
} else {
|
||||||
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
|
||||||
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
|
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
|
||||||
|
@ -693,10 +726,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
||||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
|
if (props.isR()) {
|
||||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
}
|
}
|
||||||
|
@ -707,11 +740,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
lo = __umul128(idx0, cl, &hi);
|
lo = __umul128(idx0, cl, &hi);
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -720,9 +753,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
|
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||||
} else if (BASE == xmrig::VARIANT_1) {
|
} else if (BASE == Algorithm::CN_1) {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||||
} else {
|
} else {
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
|
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
|
||||||
|
@ -732,7 +765,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
ah0 ^= ch;
|
ah0 ^= ch;
|
||||||
idx0 = al0;
|
idx0 = al0;
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
|
if (props.isHeavy()) {
|
||||||
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
|
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
|
||||||
const int64_t n = vgetq_lane_s64(x, 0);
|
const int64_t n = vgetq_lane_s64(x, 0);
|
||||||
const int32_t d = vgetq_lane_s32(x, 2);
|
const int32_t d = vgetq_lane_s32(x, 2);
|
||||||
|
@ -740,21 +774,22 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
||||||
|
|
||||||
if (VARIANT == xmrig::VARIANT_XHV) {
|
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||||
idx0 = (~d) ^ q;
|
idx0 = (~d) ^ q;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
idx0 = d ^ q;
|
idx0 = d ^ q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
|
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
|
||||||
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
|
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
|
if (props.isR()) {
|
||||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||||
}
|
}
|
||||||
|
@ -765,11 +800,11 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
lo = __umul128(idx1, cl, &hi);
|
lo = __umul128(idx1, cl, &hi);
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == Algorithm::CN_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (ALGO == Algorithm::CN_R) {
|
||||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -778,9 +813,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
|
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
|
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
|
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
|
||||||
} else if (BASE == xmrig::VARIANT_1) {
|
} else if (BASE == Algorithm::CN_1) {
|
||||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
|
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
|
||||||
} else {
|
} else {
|
||||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
|
((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
|
||||||
|
@ -790,7 +825,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
ah1 ^= ch;
|
ah1 ^= ch;
|
||||||
idx1 = al1;
|
idx1 = al1;
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
|
if (props.isHeavy()) {
|
||||||
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l1[idx1 & MASK]));
|
const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l1[idx1 & MASK]));
|
||||||
const int64_t n = vgetq_lane_s64(x, 0);
|
const int64_t n = vgetq_lane_s64(x, 0);
|
||||||
const int32_t d = vgetq_lane_s32(x, 2);
|
const int32_t d = vgetq_lane_s32(x, 2);
|
||||||
|
@ -798,47 +834,54 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
|
((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
|
||||||
|
|
||||||
if (VARIANT == xmrig::VARIANT_XHV) {
|
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||||
idx1 = (~d) ^ q;
|
idx1 = (~d) ^ q;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
idx1 = d ^ q;
|
idx1 = d ^ q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
# endif
|
||||||
|
|
||||||
|
if (BASE == Algorithm::CN_2) {
|
||||||
bx01 = bx00;
|
bx01 = bx00;
|
||||||
bx11 = bx10;
|
bx11 = bx10;
|
||||||
}
|
}
|
||||||
|
|
||||||
bx00 = cx0;
|
bx00 = cx0;
|
||||||
bx10 = cx1;
|
bx10 = cx1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
|
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||||
|
|
||||||
xmrig::keccakf(h0, 24);
|
keccakf(h0, 24);
|
||||||
xmrig::keccakf(h1, 24);
|
keccakf(h1, 24);
|
||||||
|
|
||||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __CRYPTONIGHT_ARM_H__ */
|
|
||||||
|
} /* namespace xmrig */
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* XMRIG_CRYPTONIGHT_ARM_H */
|
||||||
|
|
|
@ -141,7 +141,7 @@
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
||||||
if (ALGO == Algorithm::CN_4) { \
|
if (ALGO == Algorithm::CN_R) { \
|
||||||
_c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \
|
_c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
|
||||||
#include "crypto/cn/CryptoNight_constants.h"
|
#include "crypto/cn/CnAlgo.h"
|
||||||
|
|
||||||
|
|
||||||
inline void vandq_f32(float32x4_t &v, uint32_t v2)
|
inline void vandq_f32(float32x4_t &v, uint32_t v2)
|
||||||
|
@ -237,4 +237,4 @@ void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void cn_gpu_inner_arm<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);
|
template void cn_gpu_inner_arm<xmrig::CnAlgo<xmrig::Algorithm::CN_GPU>().iterations(), xmrig::CnAlgo<xmrig::Algorithm::CN_GPU>().mask()>(const uint8_t* spad, uint8_t* lpad);
|
||||||
|
|
Loading…
Reference in a new issue