mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-05 10:29:38 +00:00
Update av1/av6
This commit is contained in:
parent
44875b0a94
commit
1013aa5004
3 changed files with 46 additions and 55 deletions
|
@ -79,7 +79,6 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||||
set(CMAKE_BUILD_TYPE Release)
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -mbmi2")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
|
||||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
||||||
|
|
|
@ -110,7 +110,9 @@ static inline void ExpandAESKey256(char *keybuf)
|
||||||
|
|
||||||
void cryptonight_av1_aesni(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
void cryptonight_av1_aesni(void *restrict output, const void *restrict input, const char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||||
{
|
{
|
||||||
keccak((const uint8_t *)input, 76, (uint8_t *) &ctx->state.hs, 200);
|
uint64_t* state = ctx->state.hs.w;
|
||||||
|
|
||||||
|
keccak((const uint8_t *)input, 76, (uint8_t *) state, 200);
|
||||||
uint8_t ExpandedKey[256];
|
uint8_t ExpandedKey[256];
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
|
|
||||||
|
@ -146,40 +148,35 @@ void cryptonight_av1_aesni(void *restrict output, const void *restrict input, co
|
||||||
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
_mm_store_si128(&(longoutput[(i >> 4) + 7]), xmminput[7]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 2; i++)
|
uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
|
||||||
{
|
uint64_t c __attribute((aligned(16)));
|
||||||
ctx->a[i] = ((uint64_t *)ctx->state.k)[i] ^ ((uint64_t *)ctx->state.k)[i+4];
|
|
||||||
ctx->b[i] = ((uint64_t *)ctx->state.k)[i+2] ^ ((uint64_t *)ctx->state.k)[i+6];
|
|
||||||
}
|
|
||||||
|
|
||||||
__m128i a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
|
||||||
__m128i b_x = _mm_load_si128((__m128i *) ctx->b);
|
|
||||||
|
|
||||||
uint64_t c[2] __attribute((aligned(16)));
|
|
||||||
uint64_t d[2] __attribute((aligned(16)));
|
uint64_t d[2] __attribute((aligned(16)));
|
||||||
|
|
||||||
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
__m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
|
||||||
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) ctx->a));
|
__m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]);
|
||||||
_mm_store_si128((__m128i *) c, c_x);
|
|
||||||
|
|
||||||
uint64_t *restrict d_ptr = (uint64_t *) &memory[c[0] & 0x1FFFF0];
|
for (i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
_mm_store_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) a));
|
||||||
|
c = _mm_cvtsi128_si64(c_x);
|
||||||
|
|
||||||
|
uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0];
|
||||||
|
_mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||||
b_x = c_x;
|
b_x = c_x;
|
||||||
|
|
||||||
d[0] = d_ptr[0];
|
d[0] = d_ptr[0];
|
||||||
d[1] = d_ptr[1];
|
d[1] = d_ptr[1];
|
||||||
|
|
||||||
{
|
{
|
||||||
unsigned __int128 res = (unsigned __int128) c[0] * d[0];
|
unsigned __int128 res = (unsigned __int128) c * d[0];
|
||||||
|
|
||||||
d_ptr[0] = ctx->a[0] += res >> 64;
|
d_ptr[0] = a[0] += res >> 64;
|
||||||
d_ptr[1] = ctx->a[1] += (uint64_t) res;
|
d_ptr[1] = a[1] += (uint64_t) res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->a[0] ^= d[0];
|
a[0] ^= d[0];
|
||||||
ctx->a[1] ^= d[1];
|
a[1] ^= d[1];
|
||||||
|
|
||||||
a_x = _mm_load_si128((__m128i *) &memory[ctx->a[0] & 0x1FFFF0]);
|
a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
|
||||||
|
@ -211,6 +208,6 @@ void cryptonight_av1_aesni(void *restrict output, const void *restrict input, co
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
|
||||||
keccakf((uint64_t *) &ctx->state.hs, 24);
|
keccakf((uint64_t *) state, 24);
|
||||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
|
@ -223,50 +224,44 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
|
||||||
|
|
||||||
void cryptonight_av6_aesni_experimental(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
void cryptonight_av6_aesni_experimental(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
|
||||||
{
|
{
|
||||||
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
|
uint64_t* state = ctx->state.hs.w;
|
||||||
|
|
||||||
cn_explode_scratchpad((__m128i*) &ctx->state.hs, (__m128i*) memory);
|
keccak((const uint8_t *) input, 76, (uint8_t *) state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) state, (__m128i*) memory);
|
||||||
|
|
||||||
const uint8_t* l0 = memory;
|
uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
|
||||||
uint64_t* h0 = (uint64_t*) &ctx->state.hs;
|
uint64_t c __attribute((aligned(16)));
|
||||||
|
uint64_t d[2] __attribute((aligned(16)));
|
||||||
|
|
||||||
uint64_t al0 = h0[0] ^ h0[4];
|
__m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
|
||||||
uint64_t ah0 = h0[1] ^ h0[5];
|
__m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]);
|
||||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
|
||||||
|
|
||||||
uint64_t idx0 = h0[0] ^ h0[4];
|
|
||||||
|
|
||||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
__m128i cx;
|
__m128i c_x = _mm_aesenc_si128(a_x, _mm_load_si128((__m128i *) a));
|
||||||
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
|
c = _mm_cvtsi128_si64(c_x);
|
||||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0];
|
||||||
idx0 = _mm_cvtsi128_si64(cx);
|
_mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
|
||||||
bx0 = cx;
|
b_x = c_x;
|
||||||
|
|
||||||
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
|
d[0] = d_ptr[0];
|
||||||
|
d[1] = d_ptr[1];
|
||||||
|
|
||||||
uint64_t hi, lo, cl, ch;
|
{
|
||||||
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
|
unsigned __int128 res = (unsigned __int128) c * d[0];
|
||||||
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
|
|
||||||
lo = _umul128(idx0, cl, &hi);
|
|
||||||
|
|
||||||
al0 += hi;
|
d_ptr[0] = a[0] += res >> 64;
|
||||||
ah0 += lo;
|
d_ptr[1] = a[1] += (uint64_t) res;
|
||||||
|
|
||||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
|
||||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
|
||||||
|
|
||||||
ah0 ^= ch;
|
|
||||||
al0 ^= cl;
|
|
||||||
idx0 = al0;
|
|
||||||
|
|
||||||
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs);
|
a[0] ^= d[0];
|
||||||
|
a[1] ^= d[1];
|
||||||
|
|
||||||
keccakf((uint64_t*) &ctx->state.hs, 24);
|
a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) memory, (__m128i*) state);
|
||||||
|
|
||||||
|
keccakf(state, 24);
|
||||||
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue