From 93d072ff6ebbc7d39d3c81bb6cef3c9756ac4291 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 4 Oct 2018 15:52:12 +0300 Subject: [PATCH] Massive refactoring, preparing for cn/2. --- CMakeLists.txt | 16 +- .../cryptonight-lite/cryptonight_lite_aesni.h | 24 +- ...ite_av1_aesni.c => cryptonight_lite_av1.c} | 72 +++++- ..._aesni_double.c => cryptonight_lite_av2.c} | 108 ++++++++- algo/cryptonight-lite/cryptonight_lite_av3.c | 134 +++++++++++ .../cryptonight_lite_av3_softaes.c | 78 ------ ...oftaes_double.c => cryptonight_lite_av4.c} | 108 ++++++++- .../cryptonight_lite_softaes.h | 19 ++ algo/cryptonight/cryptonight.c | 224 +++++++++++------- algo/cryptonight/cryptonight.h | 28 ++- algo/cryptonight/cryptonight_aesni.h | 24 +- ...ptonight_av1_aesni.c => cryptonight_av1.c} | 80 ++++++- ...t_av2_aesni_double.c => cryptonight_av2.c} | 125 ++++++++-- algo/cryptonight/cryptonight_av3.c | 139 +++++++++++ algo/cryptonight/cryptonight_av3_softaes.c | 84 ------- ...av4_softaes_double.c => cryptonight_av4.c} | 124 ++++++++-- algo/cryptonight/cryptonight_monero.h | 114 +++++++-- algo/cryptonight/cryptonight_softaes.h | 25 +- algo/cryptonight/cryptonight_test.h | 45 +++- memory.c | 43 +--- options.c | 35 ++- options.h | 37 ++- persistent_memory.h | 12 +- stratum.h | 7 +- utils/summary.c | 4 +- xmrig.c | 8 +- 26 files changed, 1259 insertions(+), 458 deletions(-) rename algo/cryptonight-lite/{cryptonight_lite_av1_aesni.c => cryptonight_lite_av1.c} (53%) rename algo/cryptonight-lite/{cryptonight_lite_av2_aesni_double.c => cryptonight_lite_av2.c} (50%) create mode 100644 algo/cryptonight-lite/cryptonight_lite_av3.c delete mode 100644 algo/cryptonight-lite/cryptonight_lite_av3_softaes.c rename algo/cryptonight-lite/{cryptonight_lite_av4_softaes_double.c => cryptonight_lite_av4.c} (51%) rename algo/cryptonight/{cryptonight_av1_aesni.c => cryptonight_av1.c} (52%) rename algo/cryptonight/{cryptonight_av2_aesni_double.c => cryptonight_av2.c} (51%) create mode 100644 algo/cryptonight/cryptonight_av3.c delete mode 100644 algo/cryptonight/cryptonight_av3_softaes.c rename algo/cryptonight/{cryptonight_av4_softaes_double.c => cryptonight_av4.c} (51%) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0a23b29b..5fa22d7f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,10 +43,10 @@ set(HEADERS_UTILS set(SOURCES xmrig.c algo/cryptonight/cryptonight.c - algo/cryptonight/cryptonight_av1_aesni.c - algo/cryptonight/cryptonight_av2_aesni_double.c - algo/cryptonight/cryptonight_av3_softaes.c - algo/cryptonight/cryptonight_av4_softaes_double.c + algo/cryptonight/cryptonight_av1.c + algo/cryptonight/cryptonight_av2.c + algo/cryptonight/cryptonight_av3.c + algo/cryptonight/cryptonight_av4.c util.c options.c stratum.c @@ -127,10 +127,10 @@ endif() if (WITH_AEON) set(SOURCES_AEON - algo/cryptonight-lite/cryptonight_lite_av1_aesni.c - algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c - algo/cryptonight-lite/cryptonight_lite_av3_softaes.c - algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c + algo/cryptonight-lite/cryptonight_lite_av1.c + algo/cryptonight-lite/cryptonight_lite_av2.c + algo/cryptonight-lite/cryptonight_lite_av3.c + algo/cryptonight-lite/cryptonight_lite_av4.c algo/cryptonight-lite/cryptonight_lite_aesni.h algo/cryptonight-lite/cryptonight_lite_softaes.h ) diff --git a/algo/cryptonight-lite/cryptonight_lite_aesni.h b/algo/cryptonight-lite/cryptonight_lite_aesni.h index bb528cfb4..0ac6a1355 100644 --- a/algo/cryptonight-lite/cryptonight_lite_aesni.h +++ b/algo/cryptonight-lite/cryptonight_lite_aesni.h @@ -22,10 +22,12 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_LITE_AESNI_H__ -#define __CRYPTONIGHT_LITE_AESNI_H__ +#ifndef XMRIG_CRYPTONIGHT_LITE_AESNI_H +#define XMRIG_CRYPTONIGHT_LITE_AESNI_H + #include +#include #define aes_genkey_sub(imm8) \ @@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint #endif -#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */ +static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = EXTRACT64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = EXTRACT64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + + +#endif /* XMRIG_CRYPTONIGHT_LITE_AESNI_H */ diff --git a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c b/algo/cryptonight-lite/cryptonight_lite_av1.c similarity index 53% rename from algo/cryptonight-lite/cryptonight_lite_av1_aesni.c rename to algo/cryptonight-lite/cryptonight_lite_av1.c index fb678746e..307c256d1 100644 --- a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c +++ b/algo/cryptonight-lite/cryptonight_lite_av1.c @@ -27,18 +27,19 @@ #include #include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_aesni.h" +#include "algo/cryptonight/cryptonight_monero.h" #include "crypto/c_keccak.h" +#include "cryptonight_lite_aesni.h" -void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_lite_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); + keccak(input, size, ctx[0]->state, 200); - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -71,8 +72,63 @@ void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *r idx0 = al0; } - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_lite_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 32); + return; + } + + keccak(input, size, ctx[0]->state, 200); + + VARIANT1_INIT(0); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); + + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); } diff --git a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c b/algo/cryptonight-lite/cryptonight_lite_av2.c similarity index 50% rename from algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c rename to algo/cryptonight-lite/cryptonight_lite_av2.c index 727e804b4..31b85d8dd 100644 --- a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c +++ b/algo/cryptonight-lite/cryptonight_lite_av2.c @@ -27,19 +27,20 @@ #include #include "algo/cryptonight/cryptonight.h" +#include "algo/cryptonight/cryptonight_monero.h" #include "cryptonight_lite_aesni.h" #include "crypto/c_keccak.h" -void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_lite_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY_LITE; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); @@ -107,6 +108,95 @@ void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, keccakf(h0, 24); keccakf(h1, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32); +} + + +void cryptonight_lite_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 64); + return; + } + + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); + + VARIANT1_INIT(0); + VARIANT1_INIT(1); + + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; + + cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t al1 = h1[0] ^ h1[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + uint64_t ah1 = h1[1] ^ h1[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + uint64_t idx1 = h1[0] ^ h1[4]; + + for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { + __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); + __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]); + + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0)); + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1)); + + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + + bx0 = cx0; + bx1 = cx1; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0; + ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1]; + lo = _umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1; + ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + } + + cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); + + keccakf(h0, 24); + keccakf(h1, 24); + + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32); } diff --git a/algo/cryptonight-lite/cryptonight_lite_av3.c b/algo/cryptonight-lite/cryptonight_lite_av3.c new file mode 100644 index 000000000..b0d5d3684 --- /dev/null +++ b/algo/cryptonight-lite/cryptonight_lite_av3.c @@ -0,0 +1,134 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017 fireice-uk + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2016-2018 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "algo/cryptonight/cryptonight.h" +#include "algo/cryptonight/cryptonight_monero.h" +#include "cryptonight_lite_softaes.h" +#include "crypto/c_keccak.h" + + +void cryptonight_lite_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + keccak(input, size, ctx[0]->state, 200); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + + _mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_lite_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 32); + return; + } + + keccak(input, size, ctx[0]->state, 200); + + VARIANT1_INIT(0); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); + + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} diff --git a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c b/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c deleted file mode 100644 index a5a36fbbc..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c +++ /dev/null @@ -1,78 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017 fireice-uk - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_softaes.h" -#include "crypto/c_keccak.h" - - -void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]); - cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0]; - ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c b/algo/cryptonight-lite/cryptonight_lite_av4.c similarity index 51% rename from algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c rename to algo/cryptonight-lite/cryptonight_lite_av4.c index cdf8ff5d3..4a386642e 100644 --- a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c +++ b/algo/cryptonight-lite/cryptonight_lite_av4.c @@ -27,19 +27,20 @@ #include #include "algo/cryptonight/cryptonight.h" +#include "algo/cryptonight/cryptonight_monero.h" #include "cryptonight_lite_softaes.h" #include "crypto/c_keccak.h" -void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_lite_av4_v0(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY_LITE; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); @@ -107,6 +108,95 @@ void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size keccakf(h0, 24); keccakf(h1, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); +} + + +void cryptonight_lite_av4_v1(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 64); + return; + } + + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); + + VARIANT1_INIT(0); + VARIANT1_INIT(1); + + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; + + cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t al1 = h1[0] ^ h1[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + uint64_t ah1 = h1[1] ^ h1[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + uint64_t idx1 = h1[0] ^ h1[4]; + + for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { + __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); + __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]); + + cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0)); + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1)); + + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + + bx0 = cx0; + bx1 = cx1; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0; + ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0]; + ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1]; + lo = _umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1; + ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + } + + cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); + + keccakf(h0, 24); + keccakf(h1, 24); + + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32); } diff --git a/algo/cryptonight-lite/cryptonight_lite_softaes.h b/algo/cryptonight-lite/cryptonight_lite_softaes.h index bab3dcafe..1e06a0f27 100644 --- a/algo/cryptonight-lite/cryptonight_lite_softaes.h +++ b/algo/cryptonight-lite/cryptonight_lite_softaes.h @@ -25,7 +25,10 @@ #ifndef __CRYPTONIGHT_LITE_SOFTAES_H__ #define __CRYPTONIGHT_LITE_SOFTAES_H__ + #include +#include + extern __m128i soft_aesenc(__m128i in, __m128i key); extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); @@ -234,4 +237,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint #endif +static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = EXTRACT64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = EXTRACT64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + + #endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */ diff --git a/algo/cryptonight/cryptonight.c b/algo/cryptonight/cryptonight.c index fb981df21..cd91f9a8e 100644 --- a/algo/cryptonight/cryptonight.c +++ b/algo/cryptonight/cryptonight.c @@ -23,10 +23,12 @@ */ +#include #include #include #include + #ifndef BUILD_TEST # include "xmrig.h" #endif @@ -39,113 +41,136 @@ #include "cryptonight_test.h" #include "options.h" +#include "utils/applog.h" + + +void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av2_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av3_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); -void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version); -void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version); -void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version); -void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version); #ifndef XMRIG_NO_AEON -void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t); -void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t); -void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t); -void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t); +void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); #endif void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version) = NULL; -static bool self_test() { - if (cryptonight_hash_ctx == NULL) { +static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue) +{ + cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant); + if (func == NULL) { return false; } - char output[64]; + func(test_input, 76, output, ctx); - struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16); - ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16); - - cryptonight_hash_ctx(test_input, 76, output, ctx, 0); - -# ifndef XMRIG_NO_AEON - bool rc = memcmp(output, opt_algo == ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, (opt_double_hash ? 64 : 32)) == 0; -# else - bool rc = memcmp(output, test_output0, opt_double_hash ? 64 : 32)) == 0; -# endif - - if (rc && opt_algo == ALGO_CRYPTONIGHT) { - cryptonight_hash_ctx(test_input, 76, output, ctx, 7); - - rc = memcmp(output, test_output2, (opt_double_hash ? 64 : 32)) == 0; - } - - _mm_free(ctx->memory); - _mm_free(ctx); - - return rc; + return memcmp(output, referenceValue, opt_double_hash ? 64 : 32) == 0; } -#ifndef XMRIG_NO_AEON -bool cryptonight_lite_init(int variant) { - switch (variant) { - case AEON_AV1_AESNI: - cryptonight_hash_ctx = cryptonight_lite_av1_aesni; - break; +static bool self_test() { + struct cryptonight_ctx *ctx[2]; + uint8_t output[64]; - case AEON_AV2_AESNI_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double; - break; + const size_t count = opt_double_hash ? 2 : 1; + const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE; + bool result = false; - case AEON_AV3_SOFT_AES: - cryptonight_hash_ctx = cryptonight_lite_av3_softaes; - break; - - case AEON_AV4_SOFT_AES_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double; - break; - - default: - break; + for (int i = 0; i < count; ++i) { + ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16); + ctx[i]->memory = _mm_malloc(size, 16); } - return self_test(); + if (opt_algo == ALGO_CRYPTONIGHT) { + result = verify(VARIANT_0, output, ctx, test_output_v0) && + verify(VARIANT_1, output, ctx, test_output_v1) && + verify(VARIANT_0, output, ctx, test_output_v0); + } + else { + result = verify(VARIANT_0, output, ctx, test_output_v0_lite) && + verify(VARIANT_1, output, ctx, test_output_v1_lite); + } + + + for (int i = 0; i < count; ++i) { + _mm_free(ctx[i]->memory); + _mm_free(ctx[i]); + } + + return result; } -#endif -bool cryptonight_init(int variant) +cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant) { -# ifndef XMRIG_NO_AEON - if (opt_algo == ALGO_CRYPTONIGHT_LITE) { - return cryptonight_lite_init(variant); - } + assert(av > AV_AUTO && av < AV_MAX); + assert(variant > VARIANT_AUTO && variant < VARIANT_MAX); + + static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = { + cryptonight_av1_v0, + cryptonight_av2_v0, + cryptonight_av3_v0, + cryptonight_av4_v0, + cryptonight_av1_v1, + cryptonight_av2_v1, + cryptonight_av3_v1, + cryptonight_av4_v1, + cryptonight_av1_v2, + cryptonight_av2_v2, + cryptonight_av3_v2, + cryptonight_av4_v2, + +# ifndef XMRIG_NO_AEON + cryptonight_lite_av1_v0, + cryptonight_lite_av2_v0, + cryptonight_lite_av3_v0, + cryptonight_lite_av4_v0, + cryptonight_lite_av1_v1, + cryptonight_lite_av2_v1, + cryptonight_lite_av3_v1, + cryptonight_lite_av4_v1, + NULL, + NULL, + NULL, + NULL +# endif + }; + + const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1; + +# ifndef NDEBUG + cn_hash_fun func = func_table[index]; + + assert(index < sizeof(func_table) / sizeof(func_table[0])); + assert(func != NULL); + + return func; +# else + return func_table[index]; # endif +} - switch (variant) { - case XMR_AV1_AESNI: - cryptonight_hash_ctx = cryptonight_av1_aesni; - break; - case XMR_AV2_AESNI_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_av2_aesni_double; - break; - - case XMR_AV3_SOFT_AES: - cryptonight_hash_ctx = cryptonight_av3_softaes; - break; - - case XMR_AV4_SOFT_AES_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_av4_softaes_double; - break; - - default: - break; - } +bool cryptonight_init(int av) +{ + opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT; return self_test(); } @@ -174,12 +199,32 @@ static inline void do_skein_hash(const void* input, size_t len, char* output) { void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; +static inline enum Variant cryptonight_variant(uint8_t version) +{ + if (opt_variant != VARIANT_AUTO) { + return opt_variant; + } + + if (opt_algo == ALGO_CRYPTONIGHT_LITE) { + return VARIANT_1; + } + + if (version >= 8) { + return VARIANT_2; + } + + return version == 7 ? VARIANT_1 : VARIANT_0; +} + + #ifndef BUILD_TEST -int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) { - uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39); +int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) { + uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39); + enum Variant variant = cryptonight_variant(blob[0]); do { - cryptonight_hash_ctx(blob, blob_size, hash, ctx, ((uint8_t*) blob)[0]); + cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx); + (*hashes_done)++; if (unlikely(hash[7] < target)) { @@ -193,13 +238,14 @@ int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, si } -int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) { - int rc = 0; - uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39); - uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size); +int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) { + int rc = 0; + uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39); + uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size); + enum Variant variant = cryptonight_variant(blob[0]); do { - cryptonight_hash_ctx(blob, blob_size, hash, ctx, ((uint8_t*) blob)[0]); + cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx); (*hashes_done) += 2; if (unlikely(hash[7] < target)) { diff --git a/algo/cryptonight/cryptonight.h b/algo/cryptonight/cryptonight.h index f8002afe6..74646ef5b 100644 --- a/algo/cryptonight/cryptonight.h +++ b/algo/cryptonight/cryptonight.h @@ -22,27 +22,37 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_H__ -#define __CRYPTONIGHT_H__ +#ifndef XMRIG_CRYPTONIGHT_H +#define XMRIG_CRYPTONIGHT_H + #include #include #include + +#include "options.h" + + #define MEMORY 2097152 /* 2 MiB */ #define MEMORY_LITE 1048576 /* 1 MiB */ + struct cryptonight_ctx { - uint8_t state0[200] __attribute__((aligned(16))); - uint8_t state1[200] __attribute__((aligned(16))); - uint8_t* memory __attribute__((aligned(16))); + uint8_t state[224] __attribute__((aligned(16))); + uint8_t* memory __attribute__((aligned(16))); }; +typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); + + extern void (* const extra_hashes[4])(const void *, size_t, char *); -bool cryptonight_init(int variant); -int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx); -int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx); +cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant); -#endif /* __CRYPTONIGHT_H__ */ +bool cryptonight_init(int av); +int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx); +int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx); + +#endif /* XMRIG_CRYPTONIGHT_H */ diff --git a/algo/cryptonight/cryptonight_aesni.h b/algo/cryptonight/cryptonight_aesni.h index e4d6d42f1..b60428979 100644 --- a/algo/cryptonight/cryptonight_aesni.h +++ b/algo/cryptonight/cryptonight_aesni.h @@ -22,10 +22,12 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_AESNI_H__ -#define __CRYPTONIGHT_AESNI_H__ +#ifndef XMRIG_CRYPTONIGHT_AESNI_H +#define XMRIG_CRYPTONIGHT_AESNI_H + #include +#include #define aes_genkey_sub(imm8) \ @@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint #endif -#endif /* __CRYPTONIGHT_AESNI_H__ */ +static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = EXTRACT64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = EXTRACT64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + + +#endif /* XMRIG_CRYPTONIGHT_AESNI_H */ diff --git a/algo/cryptonight/cryptonight_av1_aesni.c b/algo/cryptonight/cryptonight_av1.c similarity index 52% rename from algo/cryptonight/cryptonight_av1_aesni.c rename to algo/cryptonight/cryptonight_av1.c index b2c45c700..4028dd5d0 100644 --- a/algo/cryptonight/cryptonight_av1_aesni.c +++ b/algo/cryptonight/cryptonight_av1.c @@ -32,16 +32,14 @@ #include "cryptonight_monero.h" -void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); + keccak(input, size, ctx[0]->state, 200); - VARIANT1_INIT(0); + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -55,7 +53,6 @@ void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restri cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); - VARIANT1_1(&l0[idx0 & 0x1FFFF0]); idx0 = EXTRACT64(cx); bx0 = cx; @@ -67,18 +64,77 @@ void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restri al0 += hi; ah0 += lo; - VARIANT1_2(ah0, 0); ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; - VARIANT1_2(ah0, 0); ah0 ^= ch; al0 ^= cl; idx0 = al0; } - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 32); + return; + } + + keccak(input, size, ctx[0]->state, 200); + + VARIANT1_INIT(0); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + } diff --git a/algo/cryptonight/cryptonight_av2_aesni_double.c b/algo/cryptonight/cryptonight_av2.c similarity index 51% rename from algo/cryptonight/cryptonight_av2_aesni_double.c rename to algo/cryptonight/cryptonight_av2.c index 345207f73..7e5f4109f 100644 --- a/algo/cryptonight/cryptonight_av2_aesni_double.c +++ b/algo/cryptonight/cryptonight_av2.c @@ -32,18 +32,15 @@ #include "cryptonight_monero.h" -void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); - VARIANT1_INIT(0); - VARIANT1_INIT(1); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); @@ -69,8 +66,94 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); - VARIANT1_1(&l0[idx0 & 0x1FFFF0]); - VARIANT1_1(&l1[idx1 & 0x1FFFF0]); + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + + bx0 = cx0; + bx1 = cx1; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1]; + lo = _umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + } + + cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); + + keccakf(h0, 24); + keccakf(h1, 24); + + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); +} + + +void cryptonight_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 64); + return; + } + + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); + + VARIANT1_INIT(0); + VARIANT1_INIT(1); + + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; + + cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t al1 = h1[0] ^ h1[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + uint64_t ah1 = h1[1] ^ h1[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + uint64_t idx1 = h1[0] ^ h1[4]; + + for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { + __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); + __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]); + + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); idx0 = EXTRACT64(cx0); idx1 = EXTRACT64(cx1); @@ -86,10 +169,8 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void al0 += hi; ah0 += lo; - VARIANT1_2(ah0, 0); ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; - VARIANT1_2(ah0, 0); + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0; ah0 ^= ch; al0 ^= cl; @@ -102,10 +183,8 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void al1 += hi; ah1 += lo; - VARIANT1_2(ah1, 1); ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; - VARIANT1_2(ah1, 1); + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1; ah1 ^= ch; al1 ^= cl; @@ -118,6 +197,12 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void keccakf(h0, 24); keccakf(h1, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); +} + + +void cryptonight_av2_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + } diff --git a/algo/cryptonight/cryptonight_av3.c b/algo/cryptonight/cryptonight_av3.c new file mode 100644 index 000000000..a70197ce4 --- /dev/null +++ b/algo/cryptonight/cryptonight_av3.c @@ -0,0 +1,139 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017 fireice-uk + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2016-2018 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "crypto/c_keccak.h" +#include "cryptonight.h" +#include "cryptonight_monero.h" +#include "cryptonight_softaes.h" + + +void cryptonight_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + keccak(input, size, ctx[0]->state, 200); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + + _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 32); + return; + } + + keccak(input, size, ctx[0]->state, 200); + + VARIANT1_INIT(0); + + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + const uint8_t* l0 = ctx[0]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { + __m128i cx; + cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + + idx0 = EXTRACT64(cx); + bx0 = cx; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + + keccakf(h0, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + +void cryptonight_av3_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ +} diff --git a/algo/cryptonight/cryptonight_av3_softaes.c b/algo/cryptonight/cryptonight_av3_softaes.c deleted file mode 100644 index 1d9f654a9..000000000 --- a/algo/cryptonight/cryptonight_av3_softaes.c +++ /dev/null @@ -1,84 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017 fireice-uk - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "crypto/c_keccak.h" -#include "cryptonight.h" -#include "cryptonight_monero.h" -#include "cryptonight_softaes.h" - - -void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - VARIANT1_INIT(0); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); - cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); - VARIANT1_1(&l0[idx0 & 0x1FFFF0]); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; - ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - VARIANT1_2(ah0, 0); - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; - VARIANT1_2(ah0, 0); - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight/cryptonight_av4_softaes_double.c b/algo/cryptonight/cryptonight_av4.c similarity index 51% rename from algo/cryptonight/cryptonight_av4_softaes_double.c rename to algo/cryptonight/cryptonight_av4.c index 4085429d1..bb4840952 100644 --- a/algo/cryptonight/cryptonight_av4_softaes_double.c +++ b/algo/cryptonight/cryptonight_av4.c @@ -32,18 +32,15 @@ #include "cryptonight_softaes.h" -void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version) +void cryptonight_av4_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); - VARIANT1_INIT(0); - VARIANT1_INIT(1); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); @@ -69,8 +66,94 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); - VARIANT1_1(&l0[idx0 & 0x1FFFF0]); - VARIANT1_1(&l1[idx1 & 0x1FFFF0]); + idx0 = EXTRACT64(cx0); + idx1 = EXTRACT64(cx1); + + bx0 = cx0; + bx1 = cx1; + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; + + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0]; + ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1]; + lo = _umul128(idx1, cl, &hi); + + al1 += hi; + ah1 += lo; + + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; + + ah1 ^= ch; + al1 ^= cl; + idx1 = al1; + } + + cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); + cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); + + keccakf(h0, 24); + keccakf(h1, 24); + + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); +} + + +void cryptonight_av4_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + if (size < 43) { + memset(output, 0, 64); + return; + } + + keccak(input, size, ctx[0]->state, 200); + keccak(input + size, size, ctx[1]->state, 200); + + VARIANT1_INIT(0); + VARIANT1_INIT(1); + + const uint8_t* l0 = ctx[0]->memory; + const uint8_t* l1 = ctx[1]->memory; + uint64_t* h0 = (uint64_t*) ctx[0]->state; + uint64_t* h1 = (uint64_t*) ctx[1]->state; + + cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); + cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t al1 = h1[0] ^ h1[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + uint64_t ah1 = h1[1] ^ h1[5]; + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + uint64_t idx1 = h1[0] ^ h1[4]; + + for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { + __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); + __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]); + + cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); + + cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); + cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); idx0 = EXTRACT64(cx0); idx1 = EXTRACT64(cx1); @@ -86,10 +169,8 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi al0 += hi; ah0 += lo; - VARIANT1_2(ah0, 0); ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; - VARIANT1_2(ah0, 0); + ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0; ah0 ^= ch; al0 ^= cl; @@ -102,10 +183,8 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi al1 += hi; ah1 += lo; - VARIANT1_2(ah1, 1); ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; - VARIANT1_2(ah1, 1); + ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1; ah1 ^= ch; al1 ^= cl; @@ -118,6 +197,11 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi keccakf(h0, 24); keccakf(h1, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); + extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); +} + + +void cryptonight_av4_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx *restrict ctx) +{ } diff --git a/algo/cryptonight/cryptonight_monero.h b/algo/cryptonight/cryptonight_monero.h index 2a4e7ee10..44ac27b03 100644 --- a/algo/cryptonight/cryptonight_monero.h +++ b/algo/cryptonight/cryptonight_monero.h @@ -6,6 +6,7 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett + * Copyright 2018 SChernykh * Copyright 2016-2018 XMRig , * * This program is free software: you can redistribute it and/or modify @@ -22,30 +23,103 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_MONERO_H__ -#define __CRYPTONIGHT_MONERO_H__ +#ifndef XMRIG_CRYPTONIGHT_MONERO_H +#define XMRIG_CRYPTONIGHT_MONERO_H + + +#include +#include -// VARIANT ALTERATIONS #define VARIANT1_INIT(part) \ - uint64_t tweak1_2_##part = 0; \ - if (version > 6) { \ - tweak1_2_##part = (*(const uint64_t*)(((const uint8_t*) input) + 35 + part * size) ^ \ - *((const uint64_t*)(ctx->state##part) + 24)); \ - } + uint64_t tweak1_2_##part = (*(const uint64_t*)(input + 35 + part * size) ^ \ + *((const uint64_t*)(ctx[part]->state) + 24)); \ -#define VARIANT1_1(p) \ - if (version > 6) { \ - const uint8_t tmp = ((const uint8_t*)(p))[11]; \ - static const uint32_t table = 0x75310; \ - const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ - ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ - } +#ifndef XMRIG_ARM +# define VARIANT2_INIT(part) \ + __m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \ + __m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]); -#define VARIANT1_2(p, part) \ - if (version > 6) { \ - (p) ^= tweak1_2_##part; \ - } +#ifdef _MSC_VER +# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); } +#else +# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); } +#endif +# define VARIANT2_INTEGER_MATH(part, cl, cx) \ + do { \ + const uint64_t sqrt_result = static_cast(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \ + const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ + cl ^= static_cast(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \ + const uint32_t d = static_cast(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \ + const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ + const uint64_t division_result = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ + division_result_xmm_##part = _mm_cvtsi64_si128(static_cast(division_result)); \ + sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \ + } while (0) -#endif /* __CRYPTONIGHT_MONERO_H__ */ +# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \ + do { \ + const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ + const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ + } while (0) + +# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \ + do { \ + const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \ + const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ + hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ + const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ + } while (0) + +#else +# define VARIANT2_INIT(part) \ + uint64_t division_result_##part = h##part[12]; \ + uint64_t sqrt_result_##part = h##part[13]; + +# define VARIANT2_INTEGER_MATH(part, cl, cx) \ + do { \ + const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ + cl ^= division_result_##part ^ (sqrt_result_##part << 32); \ + const uint32_t d = static_cast(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \ + const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ + division_result_##part = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ + const uint64_t sqrt_input = cx_0 + division_result_##part; \ + sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \ + const uint64_t s = sqrt_result_##part >> 1; \ + const uint64_t b = sqrt_result_##part & 1; \ + const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \ + sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \ + } while (0) + +# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \ + do { \ + const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \ + const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ + const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ + } while (0) + +# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \ + do { \ + const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \ + const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ + hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ + const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ + } while (0) +#endif +#endif /* XMRIG_CRYPTONIGHT_MONERO_H */ diff --git a/algo/cryptonight/cryptonight_softaes.h b/algo/cryptonight/cryptonight_softaes.h index f12ab8c67..4e25b768e 100644 --- a/algo/cryptonight/cryptonight_softaes.h +++ b/algo/cryptonight/cryptonight_softaes.h @@ -22,10 +22,13 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_SOFTAES_H__ -#define __CRYPTONIGHT_SOFTAES_H__ +#ifndef XMRIG_CRYPTONIGHT_SOFTAES_H +#define XMRIG_CRYPTONIGHT_SOFTAES_H + #include +#include + extern __m128i soft_aesenc(__m128i in, __m128i key); extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); @@ -234,4 +237,20 @@ inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *p #endif -#endif /* __CRYPTONIGHT_SOFTAES_H__ */ +static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) +{ + mem_out[0] = EXTRACT64(tmp); + + tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); + uint64_t vh = EXTRACT64(tmp); + + uint8_t x = vh >> 24; + static const uint16_t table = 0x7531; + const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; + vh ^= ((table >> index) & 0x3) << 28; + + mem_out[1] = vh; +} + + +#endif /* XMRIG_CRYPTONIGHT_SOFTAES_H */ diff --git a/algo/cryptonight/cryptonight_test.h b/algo/cryptonight/cryptonight_test.h index c5ef5037d..04efe911a 100644 --- a/algo/cryptonight/cryptonight_test.h +++ b/algo/cryptonight/cryptonight_test.h @@ -41,31 +41,50 @@ const static uint8_t test_input[152] = { }; -const static uint8_t test_output0[64] = { +const static uint8_t test_output_v0[64] = { 0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7, 0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00, 0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66, - 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F, + 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F }; +// Cryptonight variant 1 (Monero v7) +const static uint8_t test_output_v1[64] = { + 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, + 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, + 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D, + 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22 +}; + + +// Cryptonight variant 2 (Monero v8) +const static uint8_t test_output_v2[64] = { + 0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14, + 0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21, + 0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89, + 0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78 +}; + + + #ifndef XMRIG_NO_AEON -const static uint8_t test_output1[64] = { +const static uint8_t test_output_v0_lite[64] = { 0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E, 0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88, 0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE, - 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD, + 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD +}; + + +// AEON v7 +const static uint8_t test_output_v1_lite[64] = { + 0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22, + 0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41, + 0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45, + 0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F }; #endif -// Monero v7 -const static uint8_t test_output2[64] = { - 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, - 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, - 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D, - 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22, -}; - - #endif /* XMRIG_CRYPTONIGHT_TEST_H */ diff --git a/memory.c b/memory.c index 112f11153..b8a9eb65b 100644 --- a/memory.c +++ b/memory.c @@ -24,32 +24,12 @@ #include #include "persistent_memory.h" -#include "algo/cryptonight/cryptonight.h" #include "options.h" + static size_t offset = 0; -#ifndef XMRIG_NO_AEON -static void * create_persistent_ctx_lite(int thr_id) { - struct cryptonight_ctx *ctx = NULL; - - if (!opt_double_hash) { - const size_t offset = MEMORY * (thr_id + 1); - - ctx = (struct cryptonight_ctx *) &persistent_memory[offset + MEMORY_LITE]; - ctx->memory = (uint8_t*) &persistent_memory[offset]; - return ctx; - } - - ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)]; - ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id + 1)]; - - return ctx; -} -#endif - - void * persistent_calloc(size_t num, size_t size) { void *mem = &persistent_memory[offset]; offset += (num * size); @@ -60,17 +40,14 @@ void * persistent_calloc(size_t num, size_t size) { } -void * create_persistent_ctx(int thr_id) { -# ifndef XMRIG_NO_AEON - if (opt_algo == ALGO_CRYPTONIGHT_LITE) { - return create_persistent_ctx_lite(thr_id); +void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id) +{ + const int ratio = (opt_double_hash && opt_algo == ALGO_CRYPTONIGHT) ? 2 : 1; + ctx[0] = persistent_calloc(1, sizeof(struct cryptonight_ctx)); + ctx[0]->memory = &persistent_memory[MEMORY * (thr_id * ratio + 1)]; + + if (opt_double_hash) { + ctx[1] = persistent_calloc(1, sizeof(struct cryptonight_ctx)); + ctx[1]->memory = ctx[0]->memory + (opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE); } -# endif - - struct cryptonight_ctx *ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)]; - - const int ratio = opt_double_hash ? 2 : 1; - ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id * ratio + 1)]; - - return ctx; } diff --git a/options.c b/options.c index 5dabefbdf..a4cec7960 100644 --- a/options.c +++ b/options.c @@ -38,7 +38,6 @@ int64_t opt_affinity = -1L; int opt_n_threads = 0; -int opt_algo_variant = 0; int opt_retries = 5; int opt_retry_pause = 5; int opt_donate_level = DONATE_LEVEL; @@ -55,13 +54,16 @@ char *opt_userpass = NULL; char *opt_user = NULL; char *opt_pass = NULL; -enum mining_algo opt_algo = ALGO_CRYPTONIGHT; +enum Algo opt_algo = ALGO_CRYPTONIGHT; +enum Variant opt_variant = VARIANT_AUTO; +enum AlgoVariant opt_av = AV_AUTO; static char const usage[] = "\ Usage: " APP_ID " [OPTIONS]\n\ Options:\n\ -a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\ + --variant=N cryptonight variant: 0-2\n\ -o, --url=URL URL of mining server\n\ -b, --backup-url=URL URL of backup mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\ @@ -110,18 +112,27 @@ static struct option const options[] = { { "user", 1, NULL, 'u' }, { "userpass", 1, NULL, 'O' }, { "version", 0, NULL, 'V' }, - { 0, 0, 0, 0 } + { "variant", 1, NULL, 1021 }, + { NULL, 0, NULL, 0 } }; static const char *algo_names[] = { - [ALGO_CRYPTONIGHT] = "cryptonight", + "cryptonight", # ifndef XMRIG_NO_AEON - [ALGO_CRYPTONIGHT_LITE] = "cryptonight-lite" + "cryptonight-lite" # endif }; +static const char *variant_names[] = { + "auto" + "0", + "1", + "2", +}; + + #ifndef XMRIG_NO_AEON static int get_cryptonight_lite_variant(int variant) { if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) { @@ -144,11 +155,11 @@ static int get_algo_variant(int algo, int variant) { } # endif - if (variant <= XMR_AV0_AUTO || variant >= XMR_AV_MAX) { - return (cpu_info.flags & CPU_FLAG_AES) ? XMR_AV1_AESNI : XMR_AV3_SOFT_AES; + if (variant <= AV_AUTO || variant >= AV_MAX) { + return (cpu_info.flags & CPU_FLAG_AES) ? AV_SINGLE : AV_SINGLE_SOFT; } - if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= XMR_AV2_AESNI_DOUBLE) { + if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) { return variant + 2; } @@ -300,11 +311,11 @@ static void parse_arg(int key, char *arg) { case 'v': /* --av */ v = atoi(arg); - if (v < 0 || v > 1000) { + if (v <= AV_AUTO || v >= AV_MAX) { show_usage_and_exit(1); } - opt_algo_variant = v; + opt_av = v; break; case 1020: /* --cpu-affinity */ @@ -451,9 +462,9 @@ void parse_cmdline(int argc, char *argv[]) { sprintf(opt_userpass, "%s:%s", opt_user, opt_pass); } - opt_algo_variant = get_algo_variant(opt_algo, opt_algo_variant); + opt_av = get_algo_variant(opt_algo, opt_av); - if (!cryptonight_init(opt_algo_variant)) { + if (!cryptonight_init(opt_av)) { applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations."); proper_exit(1); } diff --git a/options.h b/options.h index a14aaeeb4..0dffb1cc8 100644 --- a/options.h +++ b/options.h @@ -21,8 +21,8 @@ * along with this program. If not, see . */ -#ifndef __OPTIONS_H__ -#define __OPTIONS_H__ +#ifndef XMRIG_OPTIONS_H +#define XMRIG_OPTIONS_H #include #include @@ -32,19 +32,28 @@ #endif -enum mining_algo { +enum Algo { ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */ ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */ }; -enum xmr_algo_variant { - XMR_AV0_AUTO, - XMR_AV1_AESNI, - XMR_AV2_AESNI_DOUBLE, - XMR_AV3_SOFT_AES, - XMR_AV4_SOFT_AES_DOUBLE, - XMR_AV_MAX +enum Variant { + VARIANT_AUTO = -1, + VARIANT_0 = 0, + VARIANT_1 = 1, + VARIANT_2 = 2, + VARIANT_MAX +}; + + +enum AlgoVariant { + AV_AUTO, // --av=0 Automatic mode. + AV_SINGLE, // --av=1 Single hash mode + AV_DOUBLE, // --av=2 Double hash mode + AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES) + AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES) + AV_MAX }; @@ -72,13 +81,15 @@ extern char *opt_userpass; extern char *opt_user; extern char *opt_pass; extern int opt_n_threads; -extern int opt_algo_variant; extern int opt_retry_pause; extern int opt_retries; extern int opt_donate_level; extern int opt_max_cpu_usage; extern int64_t opt_affinity; -extern enum mining_algo opt_algo; + +extern enum Algo opt_algo; +extern enum Variant opt_variant; +extern enum AlgoVariant opt_av; void parse_cmdline(int argc, char *argv[]); void show_usage_and_exit(int status); @@ -88,4 +99,4 @@ const char* get_current_algo_name(void); extern void proper_exit(int reason); -#endif /* __OPTIONS_H__ */ +#endif /* XMRIG_OPTIONS_H */ diff --git a/persistent_memory.h b/persistent_memory.h index 5a6d6ca74..171a86acb 100644 --- a/persistent_memory.h +++ b/persistent_memory.h @@ -21,12 +21,16 @@ * along with this program. If not, see . */ -#ifndef __PERSISTENT_MEMORY_H__ -#define __PERSISTENT_MEMORY_H__ +#ifndef XMRIG_PERSISTENT_MEMORY_H +#define XMRIG_PERSISTENT_MEMORY_H + #include +#include "algo/cryptonight/cryptonight.h" + + enum memory_flags { MEMORY_HUGEPAGES_AVAILABLE = 1, MEMORY_HUGEPAGES_ENABLED = 2, @@ -44,7 +48,7 @@ extern int persistent_memory_flags; const char * persistent_memory_allocate(); void persistent_memory_free(); void * persistent_calloc(size_t num, size_t size); -void * create_persistent_ctx(int thr_id); +void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id); -#endif /* __PERSISTENT_MEMORY_H__ */ +#endif /* XMRIG_PERSISTENT_MEMORY_H */ diff --git a/stratum.h b/stratum.h index 483695674..00fd426fd 100644 --- a/stratum.h +++ b/stratum.h @@ -21,8 +21,9 @@ * along with this program. If not, see . */ -#ifndef __STRATUM_H__ -#define __STRATUM_H__ +#ifndef XMRIG_STRATUM_H +#define XMRIG_STRATUM_H + #include #include @@ -75,4 +76,4 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s); bool stratum_handle_response(char *buf); bool stratum_keepalived(struct stratum_ctx *sctx); -#endif /* __STRATUM_H__ */ +#endif /* XMRIG_STRATUM_H */ diff --git a/utils/summary.c b/utils/summary.c index 65912bb0b..85cf0e415 100644 --- a/utils/summary.c +++ b/utils/summary.c @@ -77,10 +77,10 @@ static void print_threads() { } if (opt_colors) { - applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra); + applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), opt_donate_level, extra); } else { - applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra); + applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), opt_donate_level, extra); } } diff --git a/xmrig.c b/xmrig.c index 7b14933b1..d79808db0 100644 --- a/xmrig.c +++ b/xmrig.c @@ -260,7 +260,8 @@ static void *miner_thread(void *userdata) { uint32_t max_nonce; uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; - struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id); + struct cryptonight_ctx *persistentctx[1]; + create_cryptonight_ctx(persistentctx, thr_id); if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) { affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity); @@ -306,7 +307,7 @@ static void *miner_thread(void *userdata) { gettimeofday(&tv_start, NULL); /* scan nonces for a proof-of-work hash */ - const int rc = scanhash_cryptonight(thr_id, hash, work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx); + const int rc = scanhash_cryptonight(thr_id, hash, (const uint8_t *) work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx); stats_add_hashes(thr_id, &tv_start, hashes_done); if (!rc) { @@ -335,7 +336,8 @@ static void *miner_thread_double(void *userdata) { uint32_t max_nonce; uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; - struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id); + struct cryptonight_ctx *persistentctx[2]; + create_cryptonight_ctx(persistentctx, thr_id); if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) { affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);