From 04c5d6d00ad4ebbebebc014417261b41342f20d3 Mon Sep 17 00:00:00 2001 From: XMRig <support@xmrig.com> Date: Fri, 9 Jun 2017 02:47:46 +0300 Subject: [PATCH] Print threads summary. --- .../cryptonight-lite/cryptonight_lite_aesni.h | 256 ------------------ .../cryptonight_lite_av1_aesni.c | 77 ------ .../cryptonight_lite_av2_aesni_double.c | 111 -------- .../cryptonight_lite_av3_softaes.c | 77 ------ .../cryptonight_lite_av4_softaes_double.c | 111 -------- .../cryptonight_lite_softaes.h | 237 ---------------- algo/cryptonight/cryptonight.c | 244 ----------------- algo/cryptonight/cryptonight.h | 47 ---- algo/cryptonight/cryptonight_aesni.h | 256 ------------------ algo/cryptonight/cryptonight_av1_aesni.c | 77 ------ .../cryptonight_av2_aesni_double.c | 111 -------- algo/cryptonight/cryptonight_av3_softaes.c | 77 ------ .../cryptonight_av4_softaes_double.c | 111 -------- algo/cryptonight/cryptonight_softaes.h | 237 ---------------- crypto/hash.c | 24 -- src/Options.cpp | 33 +-- src/Options.h | 3 + src/Summary.cpp | 34 ++- 18 files changed, 51 insertions(+), 2072 deletions(-) delete mode 100644 algo/cryptonight-lite/cryptonight_lite_aesni.h delete mode 100644 algo/cryptonight-lite/cryptonight_lite_av1_aesni.c delete mode 100644 algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c delete mode 100644 algo/cryptonight-lite/cryptonight_lite_av3_softaes.c delete mode 100644 algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c delete mode 100644 algo/cryptonight-lite/cryptonight_lite_softaes.h delete mode 100644 algo/cryptonight/cryptonight.c delete mode 100644 algo/cryptonight/cryptonight.h delete mode 100644 algo/cryptonight/cryptonight_aesni.h delete mode 100644 algo/cryptonight/cryptonight_av1_aesni.c delete mode 100644 algo/cryptonight/cryptonight_av2_aesni_double.c delete mode 100644 algo/cryptonight/cryptonight_av3_softaes.c delete mode 100644 algo/cryptonight/cryptonight_av4_softaes_double.c delete mode 100644 algo/cryptonight/cryptonight_softaes.h delete mode 100644 crypto/hash.c diff --git a/algo/cryptonight-lite/cryptonight_lite_aesni.h b/algo/cryptonight-lite/cryptonight_lite_aesni.h deleted file mode 100644 index bb528cfb4..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_aesni.h +++ /dev/null @@ -1,256 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __CRYPTONIGHT_LITE_AESNI_H__ -#define __CRYPTONIGHT_LITE_AESNI_H__ - -#include <x86intrin.h> - - -#define aes_genkey_sub(imm8) \ - __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \ - xout1 = _mm_shuffle_epi32(xout1, 0xFF); \ - *xout0 = sl_xor(*xout0); \ - *xout0 = _mm_xor_si128(*xout0, xout1); \ - xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\ - xout1 = _mm_shuffle_epi32(xout1, 0xAA); \ - *xout2 = sl_xor(*xout2); \ - *xout2 = _mm_xor_si128(*xout2, xout1); \ - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x1) -} - - -static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x2) -} - - -static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x4) -} - - -static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x8) -} - - -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); -} - - -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - aes_genkey_sub1(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - aes_genkey_sub2(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - aes_genkey_sub4(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - aes_genkey_sub8(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -#if defined(__x86_64__) -# define EXTRACT64(X) _mm_cvtsi128_si64(X) - -static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#elif defined(__i386__) -# define HI32(X) \ - _mm_srli_si128((X), 4) - - -# define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) - -static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */ diff --git a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c b/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c deleted file mode 100644 index 80110fb2a..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_av1_aesni.c +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_aesni.h" -#include "crypto/c_keccak.h" - - -void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c b/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c deleted file mode 100644 index 055435c6a..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c +++ /dev/null @@ -1,111 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_aesni.h" -#include "crypto/c_keccak.h" - - -void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY_LITE; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - uint64_t idx1 = h1[0] ^ h1[4]; - - for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { - __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); - __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]); - - cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - - bx0 = cx0; - bx1 = cx1; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0]; - ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1]; - lo = _umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - keccakf(h0, 24); - keccakf(h1, 24); - - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); -} diff --git a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c b/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c deleted file mode 100644 index 3dec6e335..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_av3_softaes.c +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_softaes.h" -#include "crypto/c_keccak.h" - - -void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]); - cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0]; - ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c b/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c deleted file mode 100644 index 873b8cac0..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c +++ /dev/null @@ -1,111 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "algo/cryptonight/cryptonight.h" -#include "cryptonight_lite_softaes.h" -#include "crypto/c_keccak.h" - - -void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY_LITE; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - uint64_t idx1 = h1[0] ^ h1[4]; - - for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) { - __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]); - __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]); - - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - - bx0 = cx0; - bx1 = cx1; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0]; - ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1]; - lo = _umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - keccakf(h0, 24); - keccakf(h1, 24); - - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); -} diff --git a/algo/cryptonight-lite/cryptonight_lite_softaes.h b/algo/cryptonight-lite/cryptonight_lite_softaes.h deleted file mode 100644 index bab3dcafe..000000000 --- a/algo/cryptonight-lite/cryptonight_lite_softaes.h +++ /dev/null @@ -1,237 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__ -#define __CRYPTONIGHT_LITE_SOFTAES_H__ - -#include <x86intrin.h> - -extern __m128i soft_aesenc(__m128i in, __m128i key); -extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2, rcon); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist(*xout0, 0x00); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = soft_aesenc(*x0, key); - *x1 = soft_aesenc(*x1, key); - *x2 = soft_aesenc(*x2, key); - *x3 = soft_aesenc(*x3, key); - *x4 = soft_aesenc(*x4, key); - *x5 = soft_aesenc(*x5, key); - *x6 = soft_aesenc(*x6, key); - *x7 = soft_aesenc(*x7, key); -} - - -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x1); - *k2 = xout0; - *k3 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x2); - *k4 = xout0; - *k5 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x4); - *k6 = xout0; - *k7 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x8); - *k8 = xout0; - *k9 = xout2; -} - - -static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - for (size_t i = 0; i < MEMORY_LITE / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -#if defined(__x86_64__) -# define EXTRACT64(X) _mm_cvtsi128_si64(X) - -static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#elif defined(__i386__) -# define HI32(X) \ - _mm_srli_si128((X), 4) - - -# define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) - -static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */ diff --git a/algo/cryptonight/cryptonight.c b/algo/cryptonight/cryptonight.c deleted file mode 100644 index 730187280..000000000 --- a/algo/cryptonight/cryptonight.c +++ /dev/null @@ -1,244 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -#include <stdlib.h> -#include <string.h> -#include <mm_malloc.h> - -#ifndef BUILD_TEST -# include "xmrig.h" -#endif - -#include "crypto/c_groestl.h" -#include "crypto/c_blake256.h" -#include "crypto/c_jh.h" -#include "crypto/c_skein.h" -#include "cryptonight.h" -#include "options.h" - - -const static char test_input[152] = { - 0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19, - 0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9, - 0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F, - 0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46, - 0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02, - 0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00, - 0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B, - 0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62, - 0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92, - 0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01 -}; - - -const static char test_output0[64] = { - 0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66, - 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F, - 0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7, - 0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00 -}; - - -void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); - -#ifndef XMRIG_NO_AEON -const static char test_output1[64] = { - 0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE, - 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD, - 0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E, - 0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88, -}; - -void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx); -#endif - -void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) = NULL; - - -static bool self_test() { - if (cryptonight_hash_ctx == NULL) { - return false; - } - - char output[64]; - - struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16); - ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16); - - cryptonight_hash_ctx(test_input, 76, output, ctx); - - _mm_free(ctx->memory); - _mm_free(ctx); - -# ifndef XMRIG_NO_AEON - if (opt_algo == ALGO_CRYPTONIGHT_LITE) { - return memcmp(output, test_output1, (opt_double_hash ? 64 : 32)) == 0; - } -# endif - - return memcmp(output, test_output0, (opt_double_hash ? 64 : 32)) == 0; -} - - -#ifndef XMRIG_NO_AEON -bool cryptonight_lite_init(int variant) { - switch (variant) { - case AEON_AV1_AESNI: - cryptonight_hash_ctx = cryptonight_lite_av1_aesni; - break; - - case AEON_AV2_AESNI_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double; - break; - - case AEON_AV3_SOFT_AES: - cryptonight_hash_ctx = cryptonight_lite_av3_softaes; - break; - - case AEON_AV4_SOFT_AES_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double; - break; - - default: - break; - } - - return self_test(); -} -#endif - - -bool cryptonight_init(int variant) -{ -# ifndef XMRIG_NO_AEON - if (opt_algo == ALGO_CRYPTONIGHT_LITE) { - return cryptonight_lite_init(variant); - } -# endif - - switch (variant) { - case XMR_AV1_AESNI: - cryptonight_hash_ctx = cryptonight_av1_aesni; - break; - - case XMR_AV2_AESNI_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_av2_aesni_double; - break; - - case XMR_AV3_SOFT_AES: - cryptonight_hash_ctx = cryptonight_av3_softaes; - break; - - case XMR_AV4_SOFT_AES_DOUBLE: - opt_double_hash = true; - cryptonight_hash_ctx = cryptonight_av4_softaes_double; - break; - - default: - break; - } - - return self_test(); -} - - -static inline void do_blake_hash(const void* input, size_t len, char* output) { - blake256_hash((uint8_t*)output, input, len); -} - - -static inline void do_groestl_hash(const void* input, size_t len, char* output) { - groestl(input, len * 8, (uint8_t*)output); -} - - -static inline void do_jh_hash(const void* input, size_t len, char* output) { - jh_hash(32 * 8, input, 8 * len, (uint8_t*)output); -} - - -static inline void do_skein_hash(const void* input, size_t len, char* output) { - skein_hash(8 * 32, input, 8 * len, (uint8_t*)output); -} - - -void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; - - -#ifndef BUILD_TEST -int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) { - uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39); - - do { - cryptonight_hash_ctx(blob, blob_size, hash, ctx); - (*hashes_done)++; - - if (unlikely(hash[7] < target)) { - return 1; - } - - (*nonceptr)++; - } while (likely(((*nonceptr) < max_nonce && !work_restart[thr_id].restart))); - - return 0; -} - - -int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) { - int rc = 0; - uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39); - uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size); - - do { - cryptonight_hash_ctx(blob, blob_size, hash, ctx); - (*hashes_done) += 2; - - if (unlikely(hash[7] < target)) { - return rc |= 1; - } - - if (unlikely(hash[15] < target)) { - return rc |= 2; - } - - if (rc) { - break; - } - - (*nonceptr0)++; - (*nonceptr1)++; - } while (likely(((*nonceptr0) < max_nonce && !work_restart[thr_id].restart))); - - return rc; -} -#endif diff --git a/algo/cryptonight/cryptonight.h b/algo/cryptonight/cryptonight.h deleted file mode 100644 index 0b0170735..000000000 --- a/algo/cryptonight/cryptonight.h +++ /dev/null @@ -1,47 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __CRYPTONIGHT_H__ -#define __CRYPTONIGHT_H__ - -#include <stddef.h> -#include <stdint.h> -#include <stdbool.h> - -#define MEMORY 2097152 /* 2 MiB */ -#define MEMORY_LITE 1048576 /* 1 MiB */ - -struct cryptonight_ctx { - uint8_t state0[200] __attribute__((aligned(16))); - uint8_t state1[200] __attribute__((aligned(16))); - uint8_t* memory __attribute__((aligned(16))); -}; - - -extern void (* const extra_hashes[4])(const void *, size_t, char *); - -bool cryptonight_init(int variant); -int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx); -int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx); - -#endif /* __CRYPTONIGHT_H__ */ diff --git a/algo/cryptonight/cryptonight_aesni.h b/algo/cryptonight/cryptonight_aesni.h deleted file mode 100644 index e4d6d42f1..000000000 --- a/algo/cryptonight/cryptonight_aesni.h +++ /dev/null @@ -1,256 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __CRYPTONIGHT_AESNI_H__ -#define __CRYPTONIGHT_AESNI_H__ - -#include <x86intrin.h> - - -#define aes_genkey_sub(imm8) \ - __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \ - xout1 = _mm_shuffle_epi32(xout1, 0xFF); \ - *xout0 = sl_xor(*xout0); \ - *xout0 = _mm_xor_si128(*xout0, xout1); \ - xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\ - xout1 = _mm_shuffle_epi32(xout1, 0xAA); \ - *xout2 = sl_xor(*xout2); \ - *xout2 = _mm_xor_si128(*xout2, xout1); \ - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x1) -} - - -static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x2) -} - - -static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x4) -} - - -static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2) -{ - aes_genkey_sub(0x8) -} - - -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); -} - - -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - aes_genkey_sub1(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - aes_genkey_sub2(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - aes_genkey_sub4(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - aes_genkey_sub8(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -#if defined(__x86_64__) -# define EXTRACT64(X) _mm_cvtsi128_si64(X) - -static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#elif defined(__i386__) -# define HI32(X) \ - _mm_srli_si128((X), 4) - - -# define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) - -static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -#endif /* __CRYPTONIGHT_AESNI_H__ */ diff --git a/algo/cryptonight/cryptonight_av1_aesni.c b/algo/cryptonight/cryptonight_av1_aesni.c deleted file mode 100644 index 3f30544e6..000000000 --- a/algo/cryptonight/cryptonight_av1_aesni.c +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "cryptonight.h" -#include "cryptonight_aesni.h" -#include "crypto/c_keccak.h" - - -void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight/cryptonight_av2_aesni_double.c b/algo/cryptonight/cryptonight_av2_aesni_double.c deleted file mode 100644 index 779b9bc3f..000000000 --- a/algo/cryptonight/cryptonight_av2_aesni_double.c +++ /dev/null @@ -1,111 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "cryptonight.h" -#include "cryptonight_aesni.h" -#include "crypto/c_keccak.h" - - -void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - uint64_t idx1 = h1[0] ^ h1[4]; - - for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); - __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]); - - cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - - bx0 = cx0; - bx1 = cx1; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0]; - ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1]; - lo = _umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - keccakf(h0, 24); - keccakf(h1, 24); - - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); -} diff --git a/algo/cryptonight/cryptonight_av3_softaes.c b/algo/cryptonight/cryptonight_av3_softaes.c deleted file mode 100644 index 22be894d8..000000000 --- a/algo/cryptonight/cryptonight_av3_softaes.c +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "cryptonight.h" -#include "cryptonight_softaes.h" -#include "crypto/c_keccak.h" - - -void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - - cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory); - - const uint8_t* l0 = ctx->memory; - uint64_t* h0 = (uint64_t*) ctx->state0; - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - - for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i cx; - cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); - cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); - - _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); - idx0 = EXTRACT64(cx); - bx0 = cx; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; - ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - } - - cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0); - - keccakf(h0, 24); - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); -} diff --git a/algo/cryptonight/cryptonight_av4_softaes_double.c b/algo/cryptonight/cryptonight_av4_softaes_double.c deleted file mode 100644 index afd4bebe1..000000000 --- a/algo/cryptonight/cryptonight_av4_softaes_double.c +++ /dev/null @@ -1,111 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <x86intrin.h> -#include <string.h> - -#include "cryptonight.h" -#include "cryptonight_softaes.h" -#include "crypto/c_keccak.h" - - -void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx) -{ - keccak((const uint8_t *) input, size, ctx->state0, 200); - keccak((const uint8_t *) input + size, size, ctx->state1, 200); - - const uint8_t* l0 = ctx->memory; - const uint8_t* l1 = ctx->memory + MEMORY; - uint64_t* h0 = (uint64_t*) ctx->state0; - uint64_t* h1 = (uint64_t*) ctx->state1; - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - - uint64_t idx0 = h0[0] ^ h0[4]; - uint64_t idx1 = h1[0] ^ h1[4]; - - for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]); - __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]); - - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - - _mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1)); - - idx0 = EXTRACT64(cx0); - idx1 = EXTRACT64(cx1); - - bx0 = cx0; - bx1 = cx1; - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0]; - ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1]; - lo = _umul128(idx0, cl, &hi); - - al0 += hi; - ah0 += lo; - - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0; - ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0; - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0]; - ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1]; - lo = _umul128(idx1, cl, &hi); - - al1 += hi; - ah1 += lo; - - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1; - ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1; - - ah1 ^= ch; - al1 ^= cl; - idx1 = al1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - keccakf(h0, 24); - keccakf(h1, 24); - - extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output); - extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32); -} diff --git a/algo/cryptonight/cryptonight_softaes.h b/algo/cryptonight/cryptonight_softaes.h deleted file mode 100644 index f12ab8c67..000000000 --- a/algo/cryptonight/cryptonight_softaes.h +++ /dev/null @@ -1,237 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> - * Copyright 2012-2014 pooler <pooler@litecoinpool.org> - * Copyright 2014 Lucas Jones <https://github.com/lucasjones> - * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> - * Copyright 2016 Jay D Dee <jayddee246@gmail.com> - * Copyright 2017 fireice-uk <https://github.com/fireice-uk> - * Copyright 2016-2017 XMRig <support@xmrig.com> - * - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __CRYPTONIGHT_SOFTAES_H__ -#define __CRYPTONIGHT_SOFTAES_H__ - -#include <x86intrin.h> - -extern __m128i soft_aesenc(__m128i in, __m128i key); -extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2, rcon); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist(*xout0, 0x00); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = soft_aesenc(*x0, key); - *x1 = soft_aesenc(*x1, key); - *x2 = soft_aesenc(*x2, key); - *x3 = soft_aesenc(*x3, key); - *x4 = soft_aesenc(*x4, key); - *x5 = soft_aesenc(*x5, key); - *x6 = soft_aesenc(*x6, key); - *x7 = soft_aesenc(*x7, key); -} - - -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x1); - *k2 = xout0; - *k3 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x2); - *k4 = xout0; - *k5 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x4); - *k6 = xout0; - *k7 = xout2; - - aes_genkey_sub(&xout0, &xout2, 0x8); - *k8 = xout0; - *k9 = xout2; -} - - -static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) -{ - // This is more than we have registers, compiler will assign 2 keys on the stack - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -#if defined(__x86_64__) -# define EXTRACT64(X) _mm_cvtsi128_si64(X) - -static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#elif defined(__i386__) -# define HI32(X) \ - _mm_srli_si128((X), 4) - - -# define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) - -inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -#endif /* __CRYPTONIGHT_SOFTAES_H__ */ diff --git a/crypto/hash.c b/crypto/hash.c deleted file mode 100644 index f3a16f0c1..000000000 --- a/crypto/hash.c +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2012-2013 The Cryptonote developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -#include "hash-ops.h" -#include "c_keccak.h" - -void hash_permutation(union hash_state *state) { - keccakf((uint64_t*)state, 24); -} - -void hash_process(union hash_state *state, const uint8_t *buf, size_t count) { - keccak1600(buf, count, (uint8_t*)state); -} - -void cn_fast_hash(const void *data, size_t length, char *hash) { - union hash_state state; - hash_process(&state, data, length); - memcpy(hash, &state, HASH_SIZE); -} diff --git a/src/Options.cpp b/src/Options.cpp index 11f01d00d..3cd3ac5f3 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -28,10 +28,11 @@ #include "Console.h" -#include "Options.h" -#include "version.h" +#include "Cpu.h" #include "donate.h" #include "net/Url.h" +#include "Options.h" +#include "version.h" #ifndef ARRAY_SIZE @@ -198,9 +199,9 @@ Options::~Options() bool Options::parseArg(int key, char *arg) { -// char *p; + char *p; int v; -// uint64_t ul; + uint64_t ul; Url *url; switch (key) { @@ -243,7 +244,7 @@ bool Options::parseArg(int key, char *arg) break; case 'r': /* --retries */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 1 || v > 1000) { showUsage(1); return false; @@ -253,7 +254,7 @@ bool Options::parseArg(int key, char *arg) break; case 'R': /* --retry-pause */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 1 || v > 3600) { showUsage(1); return false; @@ -263,7 +264,7 @@ bool Options::parseArg(int key, char *arg) break; case 't': /* --threads */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 1 || v > 1024) { showUsage(1); return false; @@ -273,7 +274,7 @@ bool Options::parseArg(int key, char *arg) break; case 1004: /* --max-cpu-usage */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 1 || v > 100) { showUsage(1); return false; @@ -304,7 +305,7 @@ bool Options::parseArg(int key, char *arg) break; case 'v': /* --av */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 0 || v > 1000) { showUsage(1); return false; @@ -314,13 +315,13 @@ bool Options::parseArg(int key, char *arg) break; case 1020: /* --cpu-affinity */ -// p = strstr(arg, "0x"); -// ul = p ? strtoul(p, NULL, 16) : atol(arg); -// if (ul > (1UL << cpu_info.total_logical_cpus) -1) { -// ul = -1; -// } + p = strstr(arg, "0x"); + ul = p ? strtoul(p, NULL, 16) : atol(arg); + if (ul > (1UL << Cpu::threads()) -1) { + ul = -1; + } -// opt_affinity = ul; + m_affinity = ul; break; case 1002: /* --no-color */ @@ -328,7 +329,7 @@ bool Options::parseArg(int key, char *arg) break; case 1003: /* --donate-level */ - v = atoi(arg); + v = strtol(arg, nullptr, 10); if (v < 1 || v > 99) { showUsage(1); return false; diff --git a/src/Options.h b/src/Options.h index cbad99fa9..e9f4a7437 100644 --- a/src/Options.h +++ b/src/Options.h @@ -55,6 +55,7 @@ public: inline bool doubleHash() const { return m_doubleHash; } inline bool isReady() const { return m_ready; } inline bool keepAlive() const { return m_keepAlive; } + inline bool nicehash() const { return m_nicehash; } inline const char *pass() const { return m_pass; } inline const char *user() const { return m_user; } inline const Url *backupUrl() const { return m_backupUrl; } @@ -64,6 +65,8 @@ public: inline int donateLevel() const { return m_donateLevel; } inline int retries() const { return m_retries; } inline int retryPause() const { return m_retryPause; } + inline int threads() const { return m_threads; } + inline int64_t affinity() const { return m_affinity; } const char *algoName() const; diff --git a/src/Summary.cpp b/src/Summary.cpp index 38f1b1c55..b7e97a0fd 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -34,7 +34,7 @@ static void print_versions() { - char *buf = static_cast<char*>(malloc(16)); + char *buf = static_cast<char*>(alloca(16)); # ifdef __GNUC__ snprintf(buf, 16, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); @@ -48,8 +48,6 @@ static void print_versions() } else { Console::i()->text(" * VERSIONS: XMRig/%s libuv/%s%s", APP_VERSION, uv_version_string(), buf); } - - free(buf); } @@ -74,10 +72,40 @@ static void print_cpu() } +static void print_threads() +{ + char *buf = static_cast<char*>(alloca(32)); + if (Options::i()->affinity() != -1L) { + snprintf(buf, 32, ", affinity=0x%llX", Options::i()->affinity()); + } + else { + buf[0] = '\0'; + } + + if (Options::i()->colors()) { + Console::i()->text("\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, av=%d, donate=%d%%%s%s", + Options::i()->threads(), + Options::i()->algoName(), + Options::i()->algoVariant(), + Options::i()->donateLevel(), + Options::i()->nicehash() ? ", nicehash" : "", buf); + } + else { + Console::i()->text(" * THREADS: %d, %s, av=%d, donate=%d%%%s%s", + Options::i()->threads(), + Options::i()->algoName(), + Options::i()->algoVariant(), + Options::i()->donateLevel(), + Options::i()->nicehash() ? ", nicehash" : "", buf); + } +} + + void Summary::print() { print_versions(); print_cpu(); + print_threads(); }