Much better software AES implementation (--av 4).

This commit is contained in:
XMRig 2017-04-19 10:03:40 +03:00
parent 1013aa5004
commit 21c243ed8f
12 changed files with 359 additions and 1921 deletions

View file

@ -21,9 +21,6 @@ set(HEADERS_CRYPTO
crypto/c_blake256.h crypto/c_blake256.h
crypto/c_jh.h crypto/c_jh.h
crypto/c_skein.h crypto/c_skein.h
crypto/oaes_lib.h
crypto/oaes_config.h
crypto/aesb.h
) )
set(HEADERS_COMPAT set(HEADERS_COMPAT
@ -53,8 +50,7 @@ set(SOURCES_CRYPTO
crypto/c_blake256.c crypto/c_blake256.c
crypto/c_jh.c crypto/c_jh.c
crypto/c_skein.c crypto/c_skein.c
crypto/oaes_lib.c crypto/soft_aes.c
crypto/aesb.c
) )
set(SOURCES_UTILS set(SOURCES_UTILS
@ -106,7 +102,7 @@ if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CRYPTONIGHT64 set(CRYPTONIGHT64
algo/cryptonight/cryptonight_av1_aesni.c algo/cryptonight/cryptonight_av1_aesni.c
algo/cryptonight/cryptonight_av2_aesni_wolf.c algo/cryptonight/cryptonight_av2_aesni_wolf.c
algo/cryptonight/cryptonight_av4_legacy.c algo/cryptonight/cryptonight_av4_softaes.c
algo/cryptonight/cryptonight_av5_aesni_stak.c algo/cryptonight/cryptonight_av5_aesni_stak.c
algo/cryptonight/cryptonight_av6_aesni_experimental.c algo/cryptonight/cryptonight_av6_aesni_experimental.c
) )
@ -116,7 +112,7 @@ if (CMAKE_SIZEOF_VOID_P EQUAL 8)
else() else()
set(CRYPTONIGHT32 set(CRYPTONIGHT32
algo/cryptonight/cryptonight_av1_aesni32.c algo/cryptonight/cryptonight_av1_aesni32.c
algo/cryptonight/cryptonight_av4_legacy.c algo/cryptonight/cryptonight_av4_softaes.c
) )
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT32}) add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${CRYPTONIGHT32})

View file

@ -108,9 +108,12 @@ static inline void ExpandAESKey256(char *keybuf)
keys[14] = tmp1; keys[14] = tmp1;
} }
void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx) void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
{ {
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200); uint64_t* state = ctx->state.hs.w;
keccak((const uint8_t *)input, 76, (uint8_t *) state, 200);
uint8_t ExpandedKey[256]; uint8_t ExpandedKey[256];
size_t i, j; size_t i, j;
@ -146,38 +149,32 @@ void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict inpu
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]); _mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
} }
for (i = 0; i < 2; i++) uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
{ uint64_t c __attribute((aligned(16)));
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
}
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
uint64_t c[2] __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16))); uint64_t d[2] __attribute((aligned(16)));
uint64_t hi; uint64_t hi;
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) { __m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a)); __m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]);
_mm_store_si128((__m128i *) c, c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0]; for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x)); __m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) a));
c = _mm_cvtsi128_si64(c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x; b_x = c_x;
d[0] = d_ptr[0]; d[0] = d_ptr[0];
d[1] = d_ptr[1]; d[1] = d_ptr[1];
d_ptr[1] = ctx->a[1] += _mulx_u64(c[0], d[0], &hi); d_ptr[1] = a[1] += _mulx_u64(c, d[0], &hi);
d_ptr[0] = ctx->a[0] += hi; d_ptr[0] = a[0] += hi;
ctx->a[0] ^= d[0]; a[0] ^= d[0];
ctx->a[1] ^= d[1]; a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
} }
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
@ -209,6 +206,6 @@ void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict inpu
} }
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24); keccakf((uint64_t *) state, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
} }

View file

@ -1,151 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "compat.h"
#include "crypto/c_keccak.h"
#include "crypto/aesb.h"
#include "crypto/oaes_lib.h"
static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
static inline void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];
((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = lo;
}
static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
}
static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
}
void cryptonight_av4_legacy(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx) {
oaes_ctx *aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], aes_ctx->key->exp_data);
memcpy((void *) &memory[i], ctx->text, INIT_SIZE_BYTE);
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], (uint8_t*) ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], (uint8_t*) ctx->b);
for (i = 0; likely(i < ITER / 4); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = ctx->a[0] & 0x1FFFF0;
aesb_single_round((const uint8_t*) &memory[j], (uint8_t *) ctx->c, (const uint8_t *) ctx->a);
xor_blocks_dst((const uint8_t*) ctx->c, (const uint8_t*) ctx->b, (uint8_t*) &memory[j]);
/* Iteration 2 */
mul_sum_xor_dst((const uint8_t*) ctx->c, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->c[0] & 0x1FFFF0]);
/* Iteration 3 */
j = ctx->a[0] & 0x1FFFF0;
aesb_single_round(&memory[j], (uint8_t *) ctx->b, (uint8_t *) ctx->a);
xor_blocks_dst((const uint8_t*) ctx->b, (const uint8_t*) ctx->c, (uint8_t*) &memory[j]);
/* Iteration 4 */
mul_sum_xor_dst((const uint8_t*) ctx->b, (uint8_t*) ctx->a, (uint8_t*) &memory[ctx->b[0] & 0x1FFFF0]);
}
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
xor_blocks(&ctx->text[0 * AES_BLOCK_SIZE], &memory[i + 0 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[0 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[1 * AES_BLOCK_SIZE], &memory[i + 1 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[1 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[2 * AES_BLOCK_SIZE], &memory[i + 2 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[2 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[3 * AES_BLOCK_SIZE], &memory[i + 3 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[3 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[4 * AES_BLOCK_SIZE], &memory[i + 4 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[4 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[5 * AES_BLOCK_SIZE], &memory[i + 5 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[5 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[6 * AES_BLOCK_SIZE], &memory[i + 6 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[6 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
xor_blocks(&ctx->text[7 * AES_BLOCK_SIZE], &memory[i + 7 * AES_BLOCK_SIZE]);
aesb_pseudo_round_mut(&ctx->text[7 * AES_BLOCK_SIZE], aes_ctx->key->exp_data);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
keccakf((uint64_t *) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
oaes_free((OAES_CTX **) &aes_ctx);
}

View file

@ -0,0 +1,248 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "crypto/c_keccak.h"
__m128i soft_aesenc(__m128i in, __m128i key);
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
#ifdef __GNUC__
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128)a * (unsigned __int128)b;
*hi = r >> 64;
return (uint64_t)r;
}
#endif
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x1);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x4);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x8);
*k8 = xout0;
*k9 = xout2;
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_prefetch((const char*)output + i + 0, _MM_HINT_T2);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
_mm_prefetch((const char*)output + i + 4, _MM_HINT_T2);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8)
{
_mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA);
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
_mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
void cryptonight_av4_softaes(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
uint64_t* state = ctx->state.hs.w;
keccak((const uint8_t *) input, 76, (uint8_t *) state, 200);
cn_explode_scratchpad((__m128i*) state, (__m128i*) memory);
uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
uint64_t c __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
__m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
__m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]);
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = soft_aesenc(a_x, _mm_load_si128((__m128i *) a));
c = _mm_cvtsi128_si64(c_x);
uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
{
unsigned __int128 res = (unsigned __int128) c * d[0];
d_ptr[0] = a[0] += res >> 64;
d_ptr[1] = a[1] += (uint64_t) res;
}
a[0] ^= d[0];
a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
}
cn_implode_scratchpad((__m128i*) memory, (__m128i*) state);
keccakf(state, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
}

View file

@ -46,7 +46,7 @@
void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av1_aesni32(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
#endif #endif
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av4_softaes(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void (*cryptonight_hash_ctx)(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx) = NULL; void (*cryptonight_hash_ctx)(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx) = NULL;
@ -81,7 +81,7 @@ void cryptonight_init(int variant)
#endif #endif
case XMR_VARIANT_LEGACY: case XMR_VARIANT_LEGACY:
cryptonight_hash_ctx = cryptonight_av4_legacy; cryptonight_hash_ctx = cryptonight_av4_softaes;
break; break;
default: default:

View file

@ -1,10 +0,0 @@
#ifndef __AESB_H__
#define __AESB_H__
void aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
void aesb_pseudo_round_mut(uint8_t *val, const uint8_t *expandedKey);
#define fast_aesb_single_round aesb_single_round
#define fast_aesb_pseudo_round_mut aesb_pseudo_round_mut
#endif /* __AESB_H__ */

View file

@ -1,50 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_CONFIG_H
#define _OAES_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
//#ifndef OAES_HAVE_ISAAC
//#define OAES_HAVE_ISAAC 1
//#endif // OAES_HAVE_ISAAC
//#ifndef OAES_DEBUG
//#define OAES_DEBUG 0
//#endif // OAES_DEBUG
#ifdef __cplusplus
}
#endif
#endif // _OAES_CONFIG_H

File diff suppressed because it is too large Load diff

View file

@ -1,214 +0,0 @@
/*
* ---------------------------------------------------------------------------
* OpenAES License
* ---------------------------------------------------------------------------
* Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------
*/
#ifndef _OAES_LIB_H
#define _OAES_LIB_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
# ifdef OAES_SHARED
# ifdef oaes_lib_EXPORTS
# define OAES_API __declspec(dllexport)
# else
# define OAES_API __declspec(dllimport)
# endif
# else
# define OAES_API
# endif
#else
# define OAES_API
#endif // WIN32
#define OAES_VERSION "0.8.1"
#define OAES_BLOCK_SIZE 16
typedef void OAES_CTX;
typedef enum
{
OAES_RET_FIRST = 0,
OAES_RET_SUCCESS = 0,
OAES_RET_UNKNOWN,
OAES_RET_ARG1,
OAES_RET_ARG2,
OAES_RET_ARG3,
OAES_RET_ARG4,
OAES_RET_ARG5,
OAES_RET_NOKEY,
OAES_RET_MEM,
OAES_RET_BUF,
OAES_RET_HEADER,
OAES_RET_COUNT
} OAES_RET;
/*
* oaes_set_option() takes one of these values for its [option] parameter
* some options accept either an optional or a required [value] parameter
*/
// no option
#define OAES_OPTION_NONE 0
// enable ECB mode, disable CBC mode
#define OAES_OPTION_ECB 1
// enable CBC mode, disable ECB mode
// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
// the value of the initialization vector, iv
#define OAES_OPTION_CBC 2
#ifdef OAES_DEBUG
typedef int ( * oaes_step_cb ) (
const uint8_t state[OAES_BLOCK_SIZE],
const char * step_name,
int step_count,
void * user_data );
// enable state stepping mode
// value is required, must pass oaes_step_cb to receive the state at each step
#define OAES_OPTION_STEP_ON 4
// disable state stepping mode
#define OAES_OPTION_STEP_OFF 8
#endif // OAES_DEBUG
typedef uint16_t OAES_OPTION;
typedef struct _oaes_key
{
size_t data_len;
uint8_t *data;
size_t exp_data_len;
uint8_t *exp_data;
size_t num_keys;
size_t key_base;
} oaes_key;
typedef struct _oaes_ctx
{
#ifdef OAES_HAVE_ISAAC
randctx * rctx;
#endif // OAES_HAVE_ISAAC
#ifdef OAES_DEBUG
oaes_step_cb step_cb;
#endif // OAES_DEBUG
oaes_key * key;
OAES_OPTION options;
uint8_t iv[OAES_BLOCK_SIZE];
} oaes_ctx;
/*
* // usage:
*
* OAES_CTX * ctx = oaes_alloc();
* .
* .
* .
* {
* oaes_gen_key_xxx( ctx );
* {
* oaes_key_export( ctx, _buf, &_buf_len );
* // or
* oaes_key_export_data( ctx, _buf, &_buf_len );\
* }
* }
* // or
* {
* oaes_key_import( ctx, _buf, _buf_len );
* // or
* oaes_key_import_data( ctx, _buf, _buf_len );
* }
* .
* .
* .
* oaes_encrypt( ctx, m, m_len, c, &c_len );
* .
* .
* .
* oaes_decrypt( ctx, c, c_len, m, &m_len );
* .
* .
* .
* oaes_free( &ctx );
*/
OAES_API OAES_CTX * oaes_alloc(void);
OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
OAES_OPTION option, const void * value );
OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
// export key with header information
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// directly export the data from key
// set data == NULL to get the required data_len
OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
uint8_t * data, size_t * data_len );
// import key with header information
OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// directly import data into key
OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
const uint8_t * data, size_t data_len );
// set c == NULL to get the required c_len
OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
// set m == NULL to get the required m_len
OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
// set buf == NULL to get the required buf_len
OAES_API OAES_RET oaes_sprintf(
char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
#ifdef __cplusplus
}
#endif
#endif // _OAES_LIB_H

View file

@ -1,31 +1,37 @@
/* /*
--------------------------------------------------------------------------- * This program is free software: you can redistribute it and/or modify
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. * it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
The redistribution and use of this software (with or without changes) * any later version.
is allowed without the payment of fees or royalties provided that: *
* This program is distributed in the hope that it will be useful,
source code distributions include the above copyright notice, this * but WITHOUT ANY WARRANTY; without even the implied warranty of
list of conditions and the following disclaimer; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
binary distributions include the above copyright notice, this list *
of conditions and the following disclaimer in their documentation. * You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
This software is provided 'as is' with no explicit or implied warranties *
in respect of its operation, including, but not limited to, correctness * Additional permission under GNU GPL version 3 section 7
and fitness for purpose. *
--------------------------------------------------------------------------- * If you modify this Program, or any covered work, by linking or combining
Issue Date: 20/12/2007 * it with OpenSSL (or a modified version of that library), containing parts
* covered by the terms of OpenSSL License and SSLeay License, the licensors
* of this Program grant you additional permission to convey the resulting work.
*
*/ */
#include <stdint.h> /*
* The orginal author of this AES implementation is Karl Malbrain.
*/
#include "aesb.h" #ifdef __GNUC__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif // __GNUC__
#if defined(__cplusplus) #include <inttypes.h>
extern "C"
{
#endif
#define TABLE_ALIGN 32 #define TABLE_ALIGN 32
#define WPOLY 0x011b #define WPOLY 0x011b
@ -146,25 +152,61 @@ y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3); d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
inline void aesb_single_round(const uint8_t *restrict in, uint8_t *out, const uint8_t *restrict expandedKey) { __m128i soft_aesenc(__m128i in, __m128i key)
round(((uint32_t*) out), ((uint32_t*) in), ((uint32_t*) expandedKey)); {
uint32_t x0, x1, x2, x3;
x0 = _mm_cvtsi128_si32(in);
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
__m128i out = _mm_set_epi32(
(t_fn[0][x3 & 0xff] ^ t_fn[1][(x0 >> 8) & 0xff] ^ t_fn[2][(x1 >> 16) & 0xff] ^ t_fn[3][x2 >> 24]),
(t_fn[0][x2 & 0xff] ^ t_fn[1][(x3 >> 8) & 0xff] ^ t_fn[2][(x0 >> 16) & 0xff] ^ t_fn[3][x1 >> 24]),
(t_fn[0][x1 & 0xff] ^ t_fn[1][(x2 >> 8) & 0xff] ^ t_fn[2][(x3 >> 16) & 0xff] ^ t_fn[3][x0 >> 24]),
(t_fn[0][x0 & 0xff] ^ t_fn[1][(x1 >> 8) & 0xff] ^ t_fn[2][(x2 >> 16) & 0xff] ^ t_fn[3][x3 >> 24]));
return _mm_xor_si128(out, key);
} }
inline void aesb_pseudo_round_mut(uint8_t *restrict val, const uint8_t *restrict expandedKey) { uint8_t Sbox[256] = { // forward s-box
uint32_t b1[4]; 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey)); 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 1 * N_COLS); 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 2 * N_COLS); 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 3 * N_COLS); 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 4 * N_COLS); 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 5 * N_COLS); 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 6 * N_COLS); 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 7 * N_COLS); 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
round(b1, ((uint32_t*) val), ((const uint32_t *) expandedKey) + 8 * N_COLS); 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
round(((uint32_t*) val), b1, ((const uint32_t *) expandedKey) + 9 * N_COLS); 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
static inline void sub_word(uint8_t* key)
{
key[0] = Sbox[key[0]];
key[1] = Sbox[key[1]];
key[2] = Sbox[key[2]];
key[3] = Sbox[key[3]];
} }
#ifdef __clang__
#if defined(__cplusplus) uint32_t _rotr(uint32_t value, uint32_t amount)
{
return (value >> amount) | (value << ((32 - amount) & 31));
} }
#endif #endif
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
{
uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
sub_word((uint8_t*)&X1);
sub_word((uint8_t*)&X3);
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1);
}

View file

@ -1,16 +1,13 @@
set(SOURCES set(SOURCES
../../algo/cryptonight/cryptonight.h ../../algo/cryptonight/cryptonight.h
../../algo/cryptonight/cryptonight_common.c ../../algo/cryptonight/cryptonight_common.c
../../algo/cryptonight/cryptonight_av4_legacy.c ../../algo/cryptonight/cryptonight_av4_softaes.c
../../crypto/c_keccak.c ../../crypto/c_keccak.c
../../crypto/c_blake256.c ../../crypto/c_blake256.c
../../crypto/c_groestl.c ../../crypto/c_groestl.c
../../crypto/c_jh.c ../../crypto/c_jh.c
../../crypto/c_skein.c ../../crypto/c_skein.c
../../crypto/oaes_config.h ../../crypto/soft_aes.c
../../crypto/oaes_lib.h
../../crypto/oaes_lib.c
../../crypto/aesb.c
) )
if (CMAKE_SIZEOF_VOID_P EQUAL 8) if (CMAKE_SIZEOF_VOID_P EQUAL 8)

View file

@ -7,7 +7,7 @@
void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av1_aesni(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av2_aesni_wolf(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av3_aesni_bmi2(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av4_legacy(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av4_softaes(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av5_aesni_stak(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av5_aesni_stak(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
void cryptonight_av6_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx); void cryptonight_av6_aesni_experimental(void* output, const void* input, const char *memory, struct cryptonight_ctx* ctx);
@ -117,7 +117,7 @@ void test_cryptonight_av4_should_CalcHash(void)
uint8_t *memory = (uint8_t *) malloc(MEMORY); uint8_t *memory = (uint8_t *) malloc(MEMORY);
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptonight_av4_legacy(&hash, data, memory, ctx); cryptonight_av4_softaes(&hash, data, memory, ctx);
free(memory); free(memory);
free(ctx); free(ctx);