diff --git a/CMakeLists.txt b/CMakeLists.txt
index e0a23b29b..5fa22d7f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,10 +43,10 @@ set(HEADERS_UTILS
set(SOURCES
xmrig.c
algo/cryptonight/cryptonight.c
- algo/cryptonight/cryptonight_av1_aesni.c
- algo/cryptonight/cryptonight_av2_aesni_double.c
- algo/cryptonight/cryptonight_av3_softaes.c
- algo/cryptonight/cryptonight_av4_softaes_double.c
+ algo/cryptonight/cryptonight_av1.c
+ algo/cryptonight/cryptonight_av2.c
+ algo/cryptonight/cryptonight_av3.c
+ algo/cryptonight/cryptonight_av4.c
util.c
options.c
stratum.c
@@ -127,10 +127,10 @@ endif()
if (WITH_AEON)
set(SOURCES_AEON
- algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
- algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
- algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
- algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
+ algo/cryptonight-lite/cryptonight_lite_av1.c
+ algo/cryptonight-lite/cryptonight_lite_av2.c
+ algo/cryptonight-lite/cryptonight_lite_av3.c
+ algo/cryptonight-lite/cryptonight_lite_av4.c
algo/cryptonight-lite/cryptonight_lite_aesni.h
algo/cryptonight-lite/cryptonight_lite_softaes.h
)
diff --git a/algo/cryptonight-lite/cryptonight_lite_aesni.h b/algo/cryptonight-lite/cryptonight_lite_aesni.h
index bb528cfb4..0ac6a1355 100644
--- a/algo/cryptonight-lite/cryptonight_lite_aesni.h
+++ b/algo/cryptonight-lite/cryptonight_lite_aesni.h
@@ -22,10 +22,12 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_LITE_AESNI_H__
-#define __CRYPTONIGHT_LITE_AESNI_H__
+#ifndef XMRIG_CRYPTONIGHT_LITE_AESNI_H
+#define XMRIG_CRYPTONIGHT_LITE_AESNI_H
+
#include
+#include
#define aes_genkey_sub(imm8) \
@@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
#endif
-#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */
+static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+ mem_out[0] = EXTRACT64(tmp);
+
+ tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+ uint64_t vh = EXTRACT64(tmp);
+
+ uint8_t x = vh >> 24;
+ static const uint16_t table = 0x7531;
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+}
+
+
+#endif /* XMRIG_CRYPTONIGHT_LITE_AESNI_H */
diff --git a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c b/algo/cryptonight-lite/cryptonight_lite_av1.c
similarity index 53%
rename from algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
rename to algo/cryptonight-lite/cryptonight_lite_av1.c
index fb678746e..307c256d1 100644
--- a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
+++ b/algo/cryptonight-lite/cryptonight_lite_av1.c
@@ -27,18 +27,19 @@
#include
#include "algo/cryptonight/cryptonight.h"
-#include "cryptonight_lite_aesni.h"
+#include "algo/cryptonight/cryptonight_monero.h"
#include "crypto/c_keccak.h"
+#include "cryptonight_lite_aesni.h"
-void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_lite_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
+ keccak(input, size, ctx[0]->state, 200);
- cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
- const uint8_t* l0 = ctx->memory;
- uint64_t* h0 = (uint64_t*) ctx->state0;
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@@ -71,8 +72,63 @@ void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *r
idx0 = al0;
}
- cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_lite_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 32);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+
+ VARIANT1_INIT(0);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
+ cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
+
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
diff --git a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c b/algo/cryptonight-lite/cryptonight_lite_av2.c
similarity index 50%
rename from algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
rename to algo/cryptonight-lite/cryptonight_lite_av2.c
index 727e804b4..31b85d8dd 100644
--- a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
+++ b/algo/cryptonight-lite/cryptonight_lite_av2.c
@@ -27,19 +27,20 @@
#include
#include "algo/cryptonight/cryptonight.h"
+#include "algo/cryptonight/cryptonight_monero.h"
#include "cryptonight_lite_aesni.h"
#include "crypto/c_keccak.h"
-void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_lite_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
- keccak((const uint8_t *) input + size, size, ctx->state1, 200);
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
- const uint8_t* l0 = ctx->memory;
- const uint8_t* l1 = ctx->memory + MEMORY_LITE;
- uint64_t* h0 = (uint64_t*) ctx->state0;
- uint64_t* h1 = (uint64_t*) ctx->state1;
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
@@ -107,6 +108,95 @@ void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size,
keccakf(h0, 24);
keccakf(h1, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
- extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
+}
+
+
+void cryptonight_lite_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 64);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
+
+ VARIANT1_INIT(0);
+ VARIANT1_INIT(1);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t al1 = h1[0] ^ h1[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ uint64_t ah1 = h1[1] ^ h1[5];
+
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+ uint64_t idx1 = h1[0] ^ h1[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
+ __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
+ __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
+
+ cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
+ cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+
+ bx0 = cx0;
+ bx1 = cx1;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
+ ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
+ lo = _umul128(idx1, cl, &hi);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
+ ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
}
diff --git a/algo/cryptonight-lite/cryptonight_lite_av3.c b/algo/cryptonight-lite/cryptonight_lite_av3.c
new file mode 100644
index 000000000..b0d5d3684
--- /dev/null
+++ b/algo/cryptonight-lite/cryptonight_lite_av3.c
@@ -0,0 +1,134 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017 fireice-uk
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2018 Lee Clagett
+ * Copyright 2016-2018 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include
+#include
+
+#include "algo/cryptonight/cryptonight.h"
+#include "algo/cryptonight/cryptonight_monero.h"
+#include "cryptonight_lite_softaes.h"
+#include "crypto/c_keccak.h"
+
+
+void cryptonight_lite_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ keccak(input, size, ctx[0]->state, 200);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
+ cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+
+ _mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_lite_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 32);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+
+ VARIANT1_INIT(0);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
+ cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
+
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
diff --git a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c b/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
deleted file mode 100644
index a5a36fbbc..000000000
--- a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* XMRig
- * Copyright 2010 Jeff Garzik
- * Copyright 2012-2014 pooler
- * Copyright 2014 Lucas Jones
- * Copyright 2014-2016 Wolf9466
- * Copyright 2016 Jay D Dee
- * Copyright 2017 fireice-uk
- * Copyright 2017-2018 XMR-Stak ,
- * Copyright 2018 Lee Clagett
- * Copyright 2016-2018 XMRig ,
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- */
-
-#include
-#include
-
-#include "algo/cryptonight/cryptonight.h"
-#include "cryptonight_lite_softaes.h"
-#include "crypto/c_keccak.h"
-
-
-void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
-{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
-
- cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
-
- const uint8_t* l0 = ctx->memory;
- uint64_t* h0 = (uint64_t*) ctx->state0;
-
- uint64_t al0 = h0[0] ^ h0[4];
- uint64_t ah0 = h0[1] ^ h0[5];
- __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-
- uint64_t idx0 = h0[0] ^ h0[4];
-
- for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
- __m128i cx;
- cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]);
- cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
-
- _mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
- idx0 = EXTRACT64(cx);
- bx0 = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0];
- ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1];
- lo = _umul128(idx0, cl, &hi);
-
- al0 += hi;
- ah0 += lo;
-
- ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
- ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
-
- ah0 ^= ch;
- al0 ^= cl;
- idx0 = al0;
- }
-
- cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
-
- keccakf(h0, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
-}
diff --git a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c b/algo/cryptonight-lite/cryptonight_lite_av4.c
similarity index 51%
rename from algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
rename to algo/cryptonight-lite/cryptonight_lite_av4.c
index cdf8ff5d3..4a386642e 100644
--- a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
+++ b/algo/cryptonight-lite/cryptonight_lite_av4.c
@@ -27,19 +27,20 @@
#include
#include "algo/cryptonight/cryptonight.h"
+#include "algo/cryptonight/cryptonight_monero.h"
#include "cryptonight_lite_softaes.h"
#include "crypto/c_keccak.h"
-void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_lite_av4_v0(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
- keccak((const uint8_t *) input + size, size, ctx->state1, 200);
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
- const uint8_t* l0 = ctx->memory;
- const uint8_t* l1 = ctx->memory + MEMORY_LITE;
- uint64_t* h0 = (uint64_t*) ctx->state0;
- uint64_t* h1 = (uint64_t*) ctx->state1;
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
@@ -107,6 +108,95 @@ void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size
keccakf(h0, 24);
keccakf(h1, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
- extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+
+
+void cryptonight_lite_av4_v1(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 64);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
+
+ VARIANT1_INIT(0);
+ VARIANT1_INIT(1);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t al1 = h1[0] ^ h1[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ uint64_t ah1 = h1[1] ^ h1[5];
+
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+ uint64_t idx1 = h1[0] ^ h1[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
+ __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
+ __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
+
+ cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
+ cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+
+ bx0 = cx0;
+ bx1 = cx1;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
+ ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
+ ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
+ lo = _umul128(idx1, cl, &hi);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
+ ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
}
diff --git a/algo/cryptonight-lite/cryptonight_lite_softaes.h b/algo/cryptonight-lite/cryptonight_lite_softaes.h
index bab3dcafe..1e06a0f27 100644
--- a/algo/cryptonight-lite/cryptonight_lite_softaes.h
+++ b/algo/cryptonight-lite/cryptonight_lite_softaes.h
@@ -25,7 +25,10 @@
#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__
#define __CRYPTONIGHT_LITE_SOFTAES_H__
+
#include
+#include
+
extern __m128i soft_aesenc(__m128i in, __m128i key);
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
@@ -234,4 +237,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
#endif
+static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+ mem_out[0] = EXTRACT64(tmp);
+
+ tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+ uint64_t vh = EXTRACT64(tmp);
+
+ uint8_t x = vh >> 24;
+ static const uint16_t table = 0x7531;
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+}
+
+
#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */
diff --git a/algo/cryptonight/cryptonight.c b/algo/cryptonight/cryptonight.c
index fb981df21..cd91f9a8e 100644
--- a/algo/cryptonight/cryptonight.c
+++ b/algo/cryptonight/cryptonight.c
@@ -23,10 +23,12 @@
*/
+#include
#include
#include
#include
+
#ifndef BUILD_TEST
# include "xmrig.h"
#endif
@@ -39,113 +41,136 @@
#include "cryptonight_test.h"
#include "options.h"
+#include "utils/applog.h"
+
+
+void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av2_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av3_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
-void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version);
-void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version);
-void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version);
-void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version);
#ifndef XMRIG_NO_AEON
-void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t);
-void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t);
-void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t);
-void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t);
+void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#endif
void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx, uint8_t version) = NULL;
-static bool self_test() {
- if (cryptonight_hash_ctx == NULL) {
+static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
+{
+ cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
+ if (func == NULL) {
return false;
}
- char output[64];
+ func(test_input, 76, output, ctx);
- struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
- ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
-
- cryptonight_hash_ctx(test_input, 76, output, ctx, 0);
-
-# ifndef XMRIG_NO_AEON
- bool rc = memcmp(output, opt_algo == ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, (opt_double_hash ? 64 : 32)) == 0;
-# else
- bool rc = memcmp(output, test_output0, opt_double_hash ? 64 : 32)) == 0;
-# endif
-
- if (rc && opt_algo == ALGO_CRYPTONIGHT) {
- cryptonight_hash_ctx(test_input, 76, output, ctx, 7);
-
- rc = memcmp(output, test_output2, (opt_double_hash ? 64 : 32)) == 0;
- }
-
- _mm_free(ctx->memory);
- _mm_free(ctx);
-
- return rc;
+ return memcmp(output, referenceValue, opt_double_hash ? 64 : 32) == 0;
}
-#ifndef XMRIG_NO_AEON
-bool cryptonight_lite_init(int variant) {
- switch (variant) {
- case AEON_AV1_AESNI:
- cryptonight_hash_ctx = cryptonight_lite_av1_aesni;
- break;
+static bool self_test() {
+ struct cryptonight_ctx *ctx[2];
+ uint8_t output[64];
- case AEON_AV2_AESNI_DOUBLE:
- opt_double_hash = true;
- cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double;
- break;
+ const size_t count = opt_double_hash ? 2 : 1;
+ const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
+ bool result = false;
- case AEON_AV3_SOFT_AES:
- cryptonight_hash_ctx = cryptonight_lite_av3_softaes;
- break;
-
- case AEON_AV4_SOFT_AES_DOUBLE:
- opt_double_hash = true;
- cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double;
- break;
-
- default:
- break;
+ for (int i = 0; i < count; ++i) {
+ ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
+ ctx[i]->memory = _mm_malloc(size, 16);
}
- return self_test();
+ if (opt_algo == ALGO_CRYPTONIGHT) {
+ result = verify(VARIANT_0, output, ctx, test_output_v0) &&
+ verify(VARIANT_1, output, ctx, test_output_v1) &&
+ verify(VARIANT_0, output, ctx, test_output_v0);
+ }
+ else {
+ result = verify(VARIANT_0, output, ctx, test_output_v0_lite) &&
+ verify(VARIANT_1, output, ctx, test_output_v1_lite);
+ }
+
+
+ for (int i = 0; i < count; ++i) {
+ _mm_free(ctx[i]->memory);
+ _mm_free(ctx[i]);
+ }
+
+ return result;
}
-#endif
-bool cryptonight_init(int variant)
+cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
{
-# ifndef XMRIG_NO_AEON
- if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
- return cryptonight_lite_init(variant);
- }
+ assert(av > AV_AUTO && av < AV_MAX);
+ assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
+
+ static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
+ cryptonight_av1_v0,
+ cryptonight_av2_v0,
+ cryptonight_av3_v0,
+ cryptonight_av4_v0,
+ cryptonight_av1_v1,
+ cryptonight_av2_v1,
+ cryptonight_av3_v1,
+ cryptonight_av4_v1,
+ cryptonight_av1_v2,
+ cryptonight_av2_v2,
+ cryptonight_av3_v2,
+ cryptonight_av4_v2,
+
+# ifndef XMRIG_NO_AEON
+ cryptonight_lite_av1_v0,
+ cryptonight_lite_av2_v0,
+ cryptonight_lite_av3_v0,
+ cryptonight_lite_av4_v0,
+ cryptonight_lite_av1_v1,
+ cryptonight_lite_av2_v1,
+ cryptonight_lite_av3_v1,
+ cryptonight_lite_av4_v1,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+# endif
+ };
+
+ const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
+
+# ifndef NDEBUG
+ cn_hash_fun func = func_table[index];
+
+ assert(index < sizeof(func_table) / sizeof(func_table[0]));
+ assert(func != NULL);
+
+ return func;
+# else
+ return func_table[index];
# endif
+}
- switch (variant) {
- case XMR_AV1_AESNI:
- cryptonight_hash_ctx = cryptonight_av1_aesni;
- break;
- case XMR_AV2_AESNI_DOUBLE:
- opt_double_hash = true;
- cryptonight_hash_ctx = cryptonight_av2_aesni_double;
- break;
-
- case XMR_AV3_SOFT_AES:
- cryptonight_hash_ctx = cryptonight_av3_softaes;
- break;
-
- case XMR_AV4_SOFT_AES_DOUBLE:
- opt_double_hash = true;
- cryptonight_hash_ctx = cryptonight_av4_softaes_double;
- break;
-
- default:
- break;
- }
+bool cryptonight_init(int av)
+{
+ opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
return self_test();
}
@@ -174,12 +199,32 @@ static inline void do_skein_hash(const void* input, size_t len, char* output) {
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
+static inline enum Variant cryptonight_variant(uint8_t version)
+{
+ if (opt_variant != VARIANT_AUTO) {
+ return opt_variant;
+ }
+
+ if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
+ return VARIANT_1;
+ }
+
+ if (version >= 8) {
+ return VARIANT_2;
+ }
+
+ return version == 7 ? VARIANT_1 : VARIANT_0;
+}
+
+
#ifndef BUILD_TEST
-int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
- uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
+int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
+ uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
+ enum Variant variant = cryptonight_variant(blob[0]);
do {
- cryptonight_hash_ctx(blob, blob_size, hash, ctx, ((uint8_t*) blob)[0]);
+ cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
+
(*hashes_done)++;
if (unlikely(hash[7] < target)) {
@@ -193,13 +238,14 @@ int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, si
}
-int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
- int rc = 0;
- uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
- uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
+int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
+ int rc = 0;
+ uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
+ uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
+ enum Variant variant = cryptonight_variant(blob[0]);
do {
- cryptonight_hash_ctx(blob, blob_size, hash, ctx, ((uint8_t*) blob)[0]);
+ cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
(*hashes_done) += 2;
if (unlikely(hash[7] < target)) {
diff --git a/algo/cryptonight/cryptonight.h b/algo/cryptonight/cryptonight.h
index f8002afe6..74646ef5b 100644
--- a/algo/cryptonight/cryptonight.h
+++ b/algo/cryptonight/cryptonight.h
@@ -22,27 +22,37 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_H__
-#define __CRYPTONIGHT_H__
+#ifndef XMRIG_CRYPTONIGHT_H
+#define XMRIG_CRYPTONIGHT_H
+
#include
#include
#include
+
+#include "options.h"
+
+
#define MEMORY 2097152 /* 2 MiB */
#define MEMORY_LITE 1048576 /* 1 MiB */
+
struct cryptonight_ctx {
- uint8_t state0[200] __attribute__((aligned(16)));
- uint8_t state1[200] __attribute__((aligned(16)));
- uint8_t* memory __attribute__((aligned(16)));
+ uint8_t state[224] __attribute__((aligned(16)));
+ uint8_t* memory __attribute__((aligned(16)));
};
+typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+
+
extern void (* const extra_hashes[4])(const void *, size_t, char *);
-bool cryptonight_init(int variant);
-int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
-int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
+cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
-#endif /* __CRYPTONIGHT_H__ */
+bool cryptonight_init(int av);
+int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx);
+int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx);
+
+#endif /* XMRIG_CRYPTONIGHT_H */
diff --git a/algo/cryptonight/cryptonight_aesni.h b/algo/cryptonight/cryptonight_aesni.h
index e4d6d42f1..b60428979 100644
--- a/algo/cryptonight/cryptonight_aesni.h
+++ b/algo/cryptonight/cryptonight_aesni.h
@@ -22,10 +22,12 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_AESNI_H__
-#define __CRYPTONIGHT_AESNI_H__
+#ifndef XMRIG_CRYPTONIGHT_AESNI_H
+#define XMRIG_CRYPTONIGHT_AESNI_H
+
#include
+#include
#define aes_genkey_sub(imm8) \
@@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
#endif
-#endif /* __CRYPTONIGHT_AESNI_H__ */
+static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+ mem_out[0] = EXTRACT64(tmp);
+
+ tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+ uint64_t vh = EXTRACT64(tmp);
+
+ uint8_t x = vh >> 24;
+ static const uint16_t table = 0x7531;
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+}
+
+
+#endif /* XMRIG_CRYPTONIGHT_AESNI_H */
diff --git a/algo/cryptonight/cryptonight_av1_aesni.c b/algo/cryptonight/cryptonight_av1.c
similarity index 52%
rename from algo/cryptonight/cryptonight_av1_aesni.c
rename to algo/cryptonight/cryptonight_av1.c
index b2c45c700..4028dd5d0 100644
--- a/algo/cryptonight/cryptonight_av1_aesni.c
+++ b/algo/cryptonight/cryptonight_av1.c
@@ -32,16 +32,14 @@
#include "cryptonight_monero.h"
-void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
+ keccak(input, size, ctx[0]->state, 200);
- VARIANT1_INIT(0);
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
- cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
-
- const uint8_t* l0 = ctx->memory;
- uint64_t* h0 = (uint64_t*) ctx->state0;
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@@ -55,7 +53,6 @@ void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restri
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
- VARIANT1_1(&l0[idx0 & 0x1FFFF0]);
idx0 = EXTRACT64(cx);
bx0 = cx;
@@ -67,18 +64,77 @@ void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restri
al0 += hi;
ah0 += lo;
- VARIANT1_2(ah0, 0);
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
- VARIANT1_2(ah0, 0);
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
- cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 32);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+
+ VARIANT1_INIT(0);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
+ cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
+
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+
}
diff --git a/algo/cryptonight/cryptonight_av2_aesni_double.c b/algo/cryptonight/cryptonight_av2.c
similarity index 51%
rename from algo/cryptonight/cryptonight_av2_aesni_double.c
rename to algo/cryptonight/cryptonight_av2.c
index 345207f73..7e5f4109f 100644
--- a/algo/cryptonight/cryptonight_av2_aesni_double.c
+++ b/algo/cryptonight/cryptonight_av2.c
@@ -32,18 +32,15 @@
#include "cryptonight_monero.h"
-void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
- keccak((const uint8_t *) input + size, size, ctx->state1, 200);
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
- VARIANT1_INIT(0);
- VARIANT1_INIT(1);
-
- const uint8_t* l0 = ctx->memory;
- const uint8_t* l1 = ctx->memory + MEMORY;
- uint64_t* h0 = (uint64_t*) ctx->state0;
- uint64_t* h1 = (uint64_t*) ctx->state1;
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
@@ -69,8 +66,94 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
- VARIANT1_1(&l0[idx0 & 0x1FFFF0]);
- VARIANT1_1(&l1[idx1 & 0x1FFFF0]);
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+
+ bx0 = cx0;
+ bx1 = cx1;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
+ lo = _umul128(idx1, cl, &hi);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+
+
+void cryptonight_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 64);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
+
+ VARIANT1_INIT(0);
+ VARIANT1_INIT(1);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t al1 = h1[0] ^ h1[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ uint64_t ah1 = h1[1] ^ h1[5];
+
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+ uint64_t idx1 = h1[0] ^ h1[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
+ __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
+ __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
+
+ cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
+ cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@@ -86,10 +169,8 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void
al0 += hi;
ah0 += lo;
- VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
- ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
- VARIANT1_2(ah0, 0);
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
@@ -102,10 +183,8 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void
al1 += hi;
ah1 += lo;
- VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
- ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
- VARIANT1_2(ah1, 1);
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
@@ -118,6 +197,12 @@ void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void
keccakf(h0, 24);
keccakf(h1, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
- extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+
+
+void cryptonight_av2_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+
}
diff --git a/algo/cryptonight/cryptonight_av3.c b/algo/cryptonight/cryptonight_av3.c
new file mode 100644
index 000000000..a70197ce4
--- /dev/null
+++ b/algo/cryptonight/cryptonight_av3.c
@@ -0,0 +1,139 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017 fireice-uk
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2018 Lee Clagett
+ * Copyright 2016-2018 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include
+#include
+
+#include "crypto/c_keccak.h"
+#include "cryptonight.h"
+#include "cryptonight_monero.h"
+#include "cryptonight_softaes.h"
+
+
+void cryptonight_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ keccak(input, size, ctx[0]->state, 200);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
+ cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+
+ _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 32);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+
+ VARIANT1_INIT(0);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
+ __m128i cx;
+ cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
+ cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
+
+ idx0 = EXTRACT64(cx);
+ bx0 = cx;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
+ ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+
+ keccakf(h0, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_av3_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+}
diff --git a/algo/cryptonight/cryptonight_av3_softaes.c b/algo/cryptonight/cryptonight_av3_softaes.c
deleted file mode 100644
index 1d9f654a9..000000000
--- a/algo/cryptonight/cryptonight_av3_softaes.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* XMRig
- * Copyright 2010 Jeff Garzik
- * Copyright 2012-2014 pooler
- * Copyright 2014 Lucas Jones
- * Copyright 2014-2016 Wolf9466
- * Copyright 2016 Jay D Dee
- * Copyright 2017 fireice-uk
- * Copyright 2017-2018 XMR-Stak ,
- * Copyright 2018 Lee Clagett
- * Copyright 2016-2018 XMRig ,
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- */
-
-#include
-#include
-
-#include "crypto/c_keccak.h"
-#include "cryptonight.h"
-#include "cryptonight_monero.h"
-#include "cryptonight_softaes.h"
-
-
-void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
-{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
-
- VARIANT1_INIT(0);
-
- cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
-
- const uint8_t* l0 = ctx->memory;
- uint64_t* h0 = (uint64_t*) ctx->state0;
-
- uint64_t al0 = h0[0] ^ h0[4];
- uint64_t ah0 = h0[1] ^ h0[5];
- __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-
- uint64_t idx0 = h0[0] ^ h0[4];
-
- for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
- __m128i cx;
- cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
- cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
-
- _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
- VARIANT1_1(&l0[idx0 & 0x1FFFF0]);
- idx0 = EXTRACT64(cx);
- bx0 = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
- ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
- lo = _umul128(idx0, cl, &hi);
-
- al0 += hi;
- ah0 += lo;
-
- VARIANT1_2(ah0, 0);
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
- ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
- VARIANT1_2(ah0, 0);
-
- ah0 ^= ch;
- al0 ^= cl;
- idx0 = al0;
- }
-
- cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
-
- keccakf(h0, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
-}
diff --git a/algo/cryptonight/cryptonight_av4_softaes_double.c b/algo/cryptonight/cryptonight_av4.c
similarity index 51%
rename from algo/cryptonight/cryptonight_av4_softaes_double.c
rename to algo/cryptonight/cryptonight_av4.c
index 4085429d1..bb4840952 100644
--- a/algo/cryptonight/cryptonight_av4_softaes_double.c
+++ b/algo/cryptonight/cryptonight_av4.c
@@ -32,18 +32,15 @@
#include "cryptonight_softaes.h"
-void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx, uint8_t version)
+void cryptonight_av4_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
- keccak((const uint8_t *) input, size, ctx->state0, 200);
- keccak((const uint8_t *) input + size, size, ctx->state1, 200);
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
- VARIANT1_INIT(0);
- VARIANT1_INIT(1);
-
- const uint8_t* l0 = ctx->memory;
- const uint8_t* l1 = ctx->memory + MEMORY;
- uint64_t* h0 = (uint64_t*) ctx->state0;
- uint64_t* h1 = (uint64_t*) ctx->state1;
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
@@ -69,8 +66,94 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
- VARIANT1_1(&l0[idx0 & 0x1FFFF0]);
- VARIANT1_1(&l1[idx1 & 0x1FFFF0]);
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+
+ bx0 = cx0;
+ bx1 = cx1;
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
+ lo = _umul128(idx0, cl, &hi);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
+ ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
+ lo = _umul128(idx1, cl, &hi);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+
+
+void cryptonight_av4_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ if (size < 43) {
+ memset(output, 0, 64);
+ return;
+ }
+
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
+
+ VARIANT1_INIT(0);
+ VARIANT1_INIT(1);
+
+ const uint8_t* l0 = ctx[0]->memory;
+ const uint8_t* l1 = ctx[1]->memory;
+ uint64_t* h0 = (uint64_t*) ctx[0]->state;
+ uint64_t* h1 = (uint64_t*) ctx[1]->state;
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+
+ uint64_t al0 = h0[0] ^ h0[4];
+ uint64_t al1 = h1[0] ^ h1[4];
+ uint64_t ah0 = h0[1] ^ h0[5];
+ uint64_t ah1 = h1[1] ^ h1[5];
+
+ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+ uint64_t idx0 = h0[0] ^ h0[4];
+ uint64_t idx1 = h1[0] ^ h1[4];
+
+ for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
+ __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
+ __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
+
+ cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
+
+ cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
+ cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@@ -86,10 +169,8 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi
al0 += hi;
ah0 += lo;
- VARIANT1_2(ah0, 0);
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
- ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
- VARIANT1_2(ah0, 0);
+ ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
ah0 ^= ch;
al0 ^= cl;
@@ -102,10 +183,8 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi
al1 += hi;
ah1 += lo;
- VARIANT1_2(ah1, 1);
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
- ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
- VARIANT1_2(ah1, 1);
+ ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
ah1 ^= ch;
al1 ^= cl;
@@ -118,6 +197,11 @@ void cryptonight_av4_softaes_double(const void *restrict input, size_t size, voi
keccakf(h0, 24);
keccakf(h1, 24);
- extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
- extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+
+
+void cryptonight_av4_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx *restrict ctx)
+{
}
diff --git a/algo/cryptonight/cryptonight_monero.h b/algo/cryptonight/cryptonight_monero.h
index 2a4e7ee10..44ac27b03 100644
--- a/algo/cryptonight/cryptonight_monero.h
+++ b/algo/cryptonight/cryptonight_monero.h
@@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee
* Copyright 2017-2018 XMR-Stak ,
* Copyright 2018 Lee Clagett
+ * Copyright 2018 SChernykh
* Copyright 2016-2018 XMRig ,
*
* This program is free software: you can redistribute it and/or modify
@@ -22,30 +23,103 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_MONERO_H__
-#define __CRYPTONIGHT_MONERO_H__
+#ifndef XMRIG_CRYPTONIGHT_MONERO_H
+#define XMRIG_CRYPTONIGHT_MONERO_H
+
+
+#include
+#include
-// VARIANT ALTERATIONS
#define VARIANT1_INIT(part) \
- uint64_t tweak1_2_##part = 0; \
- if (version > 6) { \
- tweak1_2_##part = (*(const uint64_t*)(((const uint8_t*) input) + 35 + part * size) ^ \
- *((const uint64_t*)(ctx->state##part) + 24)); \
- }
+ uint64_t tweak1_2_##part = (*(const uint64_t*)(input + 35 + part * size) ^ \
+ *((const uint64_t*)(ctx[part]->state) + 24)); \
-#define VARIANT1_1(p) \
- if (version > 6) { \
- const uint8_t tmp = ((const uint8_t*)(p))[11]; \
- static const uint32_t table = 0x75310; \
- const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
- ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
- }
+#ifndef XMRIG_ARM
+# define VARIANT2_INIT(part) \
+ __m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
+ __m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
-#define VARIANT1_2(p, part) \
- if (version > 6) { \
- (p) ^= tweak1_2_##part; \
- }
+#ifdef _MSC_VER
+# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
+#else
+# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); }
+#endif
+# define VARIANT2_INTEGER_MATH(part, cl, cx) \
+ do { \
+ const uint64_t sqrt_result = static_cast(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
+ const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
+ cl ^= static_cast(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
+ const uint32_t d = static_cast(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
+ const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
+ const uint64_t division_result = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \
+ division_result_xmm_##part = _mm_cvtsi64_si128(static_cast(division_result)); \
+ sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
+ } while (0)
-#endif /* __CRYPTONIGHT_MONERO_H__ */
+# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
+ do { \
+ const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
+ const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
+ const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
+ } while (0)
+
+# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
+ do { \
+ const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
+ const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
+ hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
+ const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
+ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
+ } while (0)
+
+#else
+# define VARIANT2_INIT(part) \
+ uint64_t division_result_##part = h##part[12]; \
+ uint64_t sqrt_result_##part = h##part[13];
+
+# define VARIANT2_INTEGER_MATH(part, cl, cx) \
+ do { \
+ const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
+ cl ^= division_result_##part ^ (sqrt_result_##part << 32); \
+ const uint32_t d = static_cast(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \
+ const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
+ division_result_##part = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \
+ const uint64_t sqrt_input = cx_0 + division_result_##part; \
+ sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
+ const uint64_t s = sqrt_result_##part >> 1; \
+ const uint64_t b = sqrt_result_##part & 1; \
+ const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \
+ sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
+ } while (0)
+
+# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
+ do { \
+ const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \
+ const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
+ const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
+ } while (0)
+
+# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
+ do { \
+ const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
+ const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
+ hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
+ const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
+ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
+ } while (0)
+#endif
+#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
diff --git a/algo/cryptonight/cryptonight_softaes.h b/algo/cryptonight/cryptonight_softaes.h
index f12ab8c67..4e25b768e 100644
--- a/algo/cryptonight/cryptonight_softaes.h
+++ b/algo/cryptonight/cryptonight_softaes.h
@@ -22,10 +22,13 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_SOFTAES_H__
-#define __CRYPTONIGHT_SOFTAES_H__
+#ifndef XMRIG_CRYPTONIGHT_SOFTAES_H
+#define XMRIG_CRYPTONIGHT_SOFTAES_H
+
#include
+#include
+
extern __m128i soft_aesenc(__m128i in, __m128i key);
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
@@ -234,4 +237,20 @@ inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *p
#endif
-#endif /* __CRYPTONIGHT_SOFTAES_H__ */
+static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
+{
+ mem_out[0] = EXTRACT64(tmp);
+
+ tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
+ uint64_t vh = EXTRACT64(tmp);
+
+ uint8_t x = vh >> 24;
+ static const uint16_t table = 0x7531;
+ const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
+ vh ^= ((table >> index) & 0x3) << 28;
+
+ mem_out[1] = vh;
+}
+
+
+#endif /* XMRIG_CRYPTONIGHT_SOFTAES_H */
diff --git a/algo/cryptonight/cryptonight_test.h b/algo/cryptonight/cryptonight_test.h
index c5ef5037d..04efe911a 100644
--- a/algo/cryptonight/cryptonight_test.h
+++ b/algo/cryptonight/cryptonight_test.h
@@ -41,31 +41,50 @@ const static uint8_t test_input[152] = {
};
-const static uint8_t test_output0[64] = {
+const static uint8_t test_output_v0[64] = {
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
- 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
+ 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F
};
+// Cryptonight variant 1 (Monero v7)
+const static uint8_t test_output_v1[64] = {
+ 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
+ 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
+ 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
+ 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22
+};
+
+
+// Cryptonight variant 2 (Monero v8)
+const static uint8_t test_output_v2[64] = {
+ 0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
+ 0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
+ 0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
+ 0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78
+};
+
+
+
#ifndef XMRIG_NO_AEON
-const static uint8_t test_output1[64] = {
+const static uint8_t test_output_v0_lite[64] = {
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
- 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
+ 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD
+};
+
+
+// AEON v7
+const static uint8_t test_output_v1_lite[64] = {
+ 0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
+ 0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
+ 0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
+ 0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F
};
#endif
-// Monero v7
-const static uint8_t test_output2[64] = {
- 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
- 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
- 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
- 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
-};
-
-
#endif /* XMRIG_CRYPTONIGHT_TEST_H */
diff --git a/memory.c b/memory.c
index 112f11153..b8a9eb65b 100644
--- a/memory.c
+++ b/memory.c
@@ -24,32 +24,12 @@
#include
#include "persistent_memory.h"
-#include "algo/cryptonight/cryptonight.h"
#include "options.h"
+
static size_t offset = 0;
-#ifndef XMRIG_NO_AEON
-static void * create_persistent_ctx_lite(int thr_id) {
- struct cryptonight_ctx *ctx = NULL;
-
- if (!opt_double_hash) {
- const size_t offset = MEMORY * (thr_id + 1);
-
- ctx = (struct cryptonight_ctx *) &persistent_memory[offset + MEMORY_LITE];
- ctx->memory = (uint8_t*) &persistent_memory[offset];
- return ctx;
- }
-
- ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
- ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id + 1)];
-
- return ctx;
-}
-#endif
-
-
void * persistent_calloc(size_t num, size_t size) {
void *mem = &persistent_memory[offset];
offset += (num * size);
@@ -60,17 +40,14 @@ void * persistent_calloc(size_t num, size_t size) {
}
-void * create_persistent_ctx(int thr_id) {
-# ifndef XMRIG_NO_AEON
- if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
- return create_persistent_ctx_lite(thr_id);
+void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id)
+{
+ const int ratio = (opt_double_hash && opt_algo == ALGO_CRYPTONIGHT) ? 2 : 1;
+ ctx[0] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
+ ctx[0]->memory = &persistent_memory[MEMORY * (thr_id * ratio + 1)];
+
+ if (opt_double_hash) {
+ ctx[1] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
+ ctx[1]->memory = ctx[0]->memory + (opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE);
}
-# endif
-
- struct cryptonight_ctx *ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
-
- const int ratio = opt_double_hash ? 2 : 1;
- ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id * ratio + 1)];
-
- return ctx;
}
diff --git a/options.c b/options.c
index 5dabefbdf..a4cec7960 100644
--- a/options.c
+++ b/options.c
@@ -38,7 +38,6 @@
int64_t opt_affinity = -1L;
int opt_n_threads = 0;
-int opt_algo_variant = 0;
int opt_retries = 5;
int opt_retry_pause = 5;
int opt_donate_level = DONATE_LEVEL;
@@ -55,13 +54,16 @@ char *opt_userpass = NULL;
char *opt_user = NULL;
char *opt_pass = NULL;
-enum mining_algo opt_algo = ALGO_CRYPTONIGHT;
+enum Algo opt_algo = ALGO_CRYPTONIGHT;
+enum Variant opt_variant = VARIANT_AUTO;
+enum AlgoVariant opt_av = AV_AUTO;
static char const usage[] = "\
Usage: " APP_ID " [OPTIONS]\n\
Options:\n\
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
+ --variant=N cryptonight variant: 0-2\n\
-o, --url=URL URL of mining server\n\
-b, --backup-url=URL URL of backup mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\
@@ -110,18 +112,27 @@ static struct option const options[] = {
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
- { 0, 0, 0, 0 }
+ { "variant", 1, NULL, 1021 },
+ { NULL, 0, NULL, 0 }
};
static const char *algo_names[] = {
- [ALGO_CRYPTONIGHT] = "cryptonight",
+ "cryptonight",
# ifndef XMRIG_NO_AEON
- [ALGO_CRYPTONIGHT_LITE] = "cryptonight-lite"
+ "cryptonight-lite"
# endif
};
+static const char *variant_names[] = {
+ "auto"
+ "0",
+ "1",
+ "2",
+};
+
+
#ifndef XMRIG_NO_AEON
static int get_cryptonight_lite_variant(int variant) {
if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) {
@@ -144,11 +155,11 @@ static int get_algo_variant(int algo, int variant) {
}
# endif
- if (variant <= XMR_AV0_AUTO || variant >= XMR_AV_MAX) {
- return (cpu_info.flags & CPU_FLAG_AES) ? XMR_AV1_AESNI : XMR_AV3_SOFT_AES;
+ if (variant <= AV_AUTO || variant >= AV_MAX) {
+ return (cpu_info.flags & CPU_FLAG_AES) ? AV_SINGLE : AV_SINGLE_SOFT;
}
- if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= XMR_AV2_AESNI_DOUBLE) {
+ if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
return variant + 2;
}
@@ -300,11 +311,11 @@ static void parse_arg(int key, char *arg) {
case 'v': /* --av */
v = atoi(arg);
- if (v < 0 || v > 1000) {
+ if (v <= AV_AUTO || v >= AV_MAX) {
show_usage_and_exit(1);
}
- opt_algo_variant = v;
+ opt_av = v;
break;
case 1020: /* --cpu-affinity */
@@ -451,9 +462,9 @@ void parse_cmdline(int argc, char *argv[]) {
sprintf(opt_userpass, "%s:%s", opt_user, opt_pass);
}
- opt_algo_variant = get_algo_variant(opt_algo, opt_algo_variant);
+ opt_av = get_algo_variant(opt_algo, opt_av);
- if (!cryptonight_init(opt_algo_variant)) {
+ if (!cryptonight_init(opt_av)) {
applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
proper_exit(1);
}
diff --git a/options.h b/options.h
index a14aaeeb4..0dffb1cc8 100644
--- a/options.h
+++ b/options.h
@@ -21,8 +21,8 @@
* along with this program. If not, see .
*/
-#ifndef __OPTIONS_H__
-#define __OPTIONS_H__
+#ifndef XMRIG_OPTIONS_H
+#define XMRIG_OPTIONS_H
#include
#include
@@ -32,19 +32,28 @@
#endif
-enum mining_algo {
+enum Algo {
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
};
-enum xmr_algo_variant {
- XMR_AV0_AUTO,
- XMR_AV1_AESNI,
- XMR_AV2_AESNI_DOUBLE,
- XMR_AV3_SOFT_AES,
- XMR_AV4_SOFT_AES_DOUBLE,
- XMR_AV_MAX
+enum Variant {
+ VARIANT_AUTO = -1,
+ VARIANT_0 = 0,
+ VARIANT_1 = 1,
+ VARIANT_2 = 2,
+ VARIANT_MAX
+};
+
+
+enum AlgoVariant {
+ AV_AUTO, // --av=0 Automatic mode.
+ AV_SINGLE, // --av=1 Single hash mode
+ AV_DOUBLE, // --av=2 Double hash mode
+ AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES)
+ AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES)
+ AV_MAX
};
@@ -72,13 +81,15 @@ extern char *opt_userpass;
extern char *opt_user;
extern char *opt_pass;
extern int opt_n_threads;
-extern int opt_algo_variant;
extern int opt_retry_pause;
extern int opt_retries;
extern int opt_donate_level;
extern int opt_max_cpu_usage;
extern int64_t opt_affinity;
-extern enum mining_algo opt_algo;
+
+extern enum Algo opt_algo;
+extern enum Variant opt_variant;
+extern enum AlgoVariant opt_av;
void parse_cmdline(int argc, char *argv[]);
void show_usage_and_exit(int status);
@@ -88,4 +99,4 @@ const char* get_current_algo_name(void);
extern void proper_exit(int reason);
-#endif /* __OPTIONS_H__ */
+#endif /* XMRIG_OPTIONS_H */
diff --git a/persistent_memory.h b/persistent_memory.h
index 5a6d6ca74..171a86acb 100644
--- a/persistent_memory.h
+++ b/persistent_memory.h
@@ -21,12 +21,16 @@
* along with this program. If not, see .
*/
-#ifndef __PERSISTENT_MEMORY_H__
-#define __PERSISTENT_MEMORY_H__
+#ifndef XMRIG_PERSISTENT_MEMORY_H
+#define XMRIG_PERSISTENT_MEMORY_H
+
#include
+#include "algo/cryptonight/cryptonight.h"
+
+
enum memory_flags {
MEMORY_HUGEPAGES_AVAILABLE = 1,
MEMORY_HUGEPAGES_ENABLED = 2,
@@ -44,7 +48,7 @@ extern int persistent_memory_flags;
const char * persistent_memory_allocate();
void persistent_memory_free();
void * persistent_calloc(size_t num, size_t size);
-void * create_persistent_ctx(int thr_id);
+void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id);
-#endif /* __PERSISTENT_MEMORY_H__ */
+#endif /* XMRIG_PERSISTENT_MEMORY_H */
diff --git a/stratum.h b/stratum.h
index 483695674..00fd426fd 100644
--- a/stratum.h
+++ b/stratum.h
@@ -21,8 +21,9 @@
* along with this program. If not, see .
*/
-#ifndef __STRATUM_H__
-#define __STRATUM_H__
+#ifndef XMRIG_STRATUM_H
+#define XMRIG_STRATUM_H
+
#include
#include
@@ -75,4 +76,4 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
bool stratum_handle_response(char *buf);
bool stratum_keepalived(struct stratum_ctx *sctx);
-#endif /* __STRATUM_H__ */
+#endif /* XMRIG_STRATUM_H */
diff --git a/utils/summary.c b/utils/summary.c
index 65912bb0b..85cf0e415 100644
--- a/utils/summary.c
+++ b/utils/summary.c
@@ -77,10 +77,10 @@ static void print_threads() {
}
if (opt_colors) {
- applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
+ applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), opt_donate_level, extra);
}
else {
- applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
+ applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), opt_donate_level, extra);
}
}
diff --git a/xmrig.c b/xmrig.c
index 7b14933b1..d79808db0 100644
--- a/xmrig.c
+++ b/xmrig.c
@@ -260,7 +260,8 @@ static void *miner_thread(void *userdata) {
uint32_t max_nonce;
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
- struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
+ struct cryptonight_ctx *persistentctx[1];
+ create_cryptonight_ctx(persistentctx, thr_id);
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
@@ -306,7 +307,7 @@ static void *miner_thread(void *userdata) {
gettimeofday(&tv_start, NULL);
/* scan nonces for a proof-of-work hash */
- const int rc = scanhash_cryptonight(thr_id, hash, work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
+ const int rc = scanhash_cryptonight(thr_id, hash, (const uint8_t *) work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
stats_add_hashes(thr_id, &tv_start, hashes_done);
if (!rc) {
@@ -335,7 +336,8 @@ static void *miner_thread_double(void *userdata) {
uint32_t max_nonce;
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
- struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
+ struct cryptonight_ctx *persistentctx[2];
+ create_cryptonight_ctx(persistentctx, thr_id);
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);