Simplify cryptonight_ctx.

This commit is contained in:
XMRig 2017-04-21 11:14:27 +03:00
parent 1474d3fe53
commit f29d05bdde
7 changed files with 58 additions and 76 deletions

View file

@ -27,34 +27,12 @@
#include <stddef.h>
#include <stdint.h>
#define MEMORY 2097152 /* 2 MiB */
#define ITER (1 << 20)
#define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128
union hash_state {
uint8_t b[200];
uint64_t w[25];
};
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#define MEMORY 2097152 /* 2 MiB */
struct cryptonight_ctx {
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16)));
uint64_t a[2] __attribute__((aligned(16)));
uint64_t b[2] __attribute__((aligned(16)));
uint64_t c[2] __attribute__((aligned(16)));
uint8_t state[200] __attribute__((aligned(16)));
uint8_t* memory __attribute__((aligned(16)));
};

View file

@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
void cryptonight_av1_aesni(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
keccak((const uint8_t *) input, 76, ctx->state, 200);
cn_explode_scratchpad((__m128i*) &ctx->state.hs, (__m128i*) memory);
cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory);
const uint8_t* l0 = memory;
uint64_t* h0 = (uint64_t*) &ctx->state.hs;
uint64_t* h0 = (uint64_t*) ctx->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -262,8 +262,8 @@ void cryptonight_av1_aesni(void *restrict output, const void *restrict input, ch
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs);
cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state);
keccakf((uint64_t*) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
keccakf(h0, 24);
extra_hashes[ctx->state[0] & 3](ctx->state, 200, output);
}

View file

@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
void cryptonight_av2_aesni_stak(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
keccak((const uint8_t *) input, 76, ctx->state, 200);
cn_explode_scratchpad((__m128i*) &ctx->state.hs, (__m128i*) memory);
cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory);
const uint8_t* l0 = memory;
uint64_t* h0 = (uint64_t*) &ctx->state.hs;
uint64_t* h0 = (uint64_t*) ctx->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -266,8 +266,8 @@ void cryptonight_av2_aesni_stak(void *restrict output, const void *restrict inpu
_mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0);
}
cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs);
cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state);
keccakf((uint64_t*) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
keccakf(h0, 24);
extra_hashes[ctx->state[0] & 3](ctx->state, 200, output);
}

View file

@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200);
keccak((const uint8_t *) input, 76, ctx->state, 200);
cn_explode_scratchpad((__m128i*) &ctx->state, (__m128i*) memory);
cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory);
const uint8_t* l0 = memory;
uint64_t* h0 = (uint64_t*) &ctx->state;
uint64_t* h0 = (uint64_t*) ctx->state;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -262,8 +262,8 @@ void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict inpu
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs);
cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state);
keccakf((uint64_t*) &ctx->state.hs, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
keccakf(h0, 24);
extra_hashes[ctx->state[0] & 3](ctx->state, 200, output);
}

View file

@ -205,44 +205,46 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
void cryptonight_av4_softaes(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
uint64_t* state = ctx->state.hs.w;
keccak((const uint8_t *) input, 76, ctx->state, 200);
keccak((const uint8_t *) input, 76, (uint8_t *) state, 200);
cn_explode_scratchpad((__m128i*) state, (__m128i*) memory);
cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory);
uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
uint64_t c __attribute((aligned(16)));
uint64_t d[2] __attribute((aligned(16)));
const uint8_t* l0 = memory;
uint64_t* h0 = (uint64_t*) ctx->state;
__m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
__m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i c_x = soft_aesenc(a_x, _mm_load_si128((__m128i *) a));
c = _mm_cvtsi128_si64(c_x);
__m128i cx;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0];
_mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x));
b_x = c_x;
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
d[0] = d_ptr[0];
d[1] = d_ptr[1];
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
{
unsigned __int128 res = (unsigned __int128) c * d[0];
al0 += hi;
ah0 += lo;
d_ptr[0] = a[0] += res >> 64;
d_ptr[1] = a[1] += (uint64_t) res;
}
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
a[0] ^= d[0];
a[1] ^= d[1];
a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]);
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) memory, (__m128i*) state);
cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state);
keccakf(state, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
keccakf(h0, 24);
extra_hashes[ctx->state[0] & 3](ctx->state, 200, output);
}

View file

@ -215,10 +215,10 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
void cryptonight_av5_aesni_experimental(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx)
{
uint64_t* state = ctx->state.hs.w;
keccak((const uint8_t *) input, 76, ctx->state, 200);
cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory);
keccak((const uint8_t *) input, 76, (uint8_t *) state, 200);
cn_explode_scratchpad((__m128i*) state, (__m128i*) memory);
uint64_t* state = (uint64_t*) ctx->state;
uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] };
uint64_t c __attribute((aligned(16)));
@ -254,5 +254,5 @@ void cryptonight_av5_aesni_experimental(void *restrict output, const void *restr
cn_implode_scratchpad((__m128i*) memory, (__m128i*) state);
keccakf(state, 24);
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
extra_hashes[ctx->state[0] & 3](ctx->state, 200, output);
}

View file

@ -282,7 +282,9 @@ static void *miner_thread(void *userdata) {
uint32_t max_nonce;
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) &persistent_memory[TWO_MB_PAGE - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
applog(LOG_BLUE, "%d", sizeof(struct cryptonight_ctx));
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
if (cpu_info.count > 1 && opt_n_threads > 1 && opt_affinity != -1L) {
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
@ -335,7 +337,7 @@ static void *miner_thread(void *userdata) {
gettimeofday(&tv_start, NULL );
/* scan nonces for a proof-of-work hash */
rc = scanhash_cryptonight(thr_id, hash, work.data, work.target, max_nonce, &hashes_done, &persistent_memory[TWO_MB_PAGE * (thr_id + 1)], persistentctx);
rc = scanhash_cryptonight(thr_id, hash, work.data, work.target, max_nonce, &hashes_done, &persistent_memory[MEMORY * (thr_id + 1)], persistentctx);
stats_add_hashes(thr_id, &tv_start, hashes_done);
memcpy(work.hash, hash, 32);