mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-25 12:06:04 +00:00
Optimized quad hash for Ryzens
This commit is contained in:
parent
e67eb47796
commit
5c951ddb8a
1 changed files with 22 additions and 12 deletions
|
@ -1375,8 +1375,8 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
uint64_t idx2 = al2;
|
uint64_t idx2 = al2;
|
||||||
uint64_t idx3 = al3;
|
uint64_t idx3 = al3;
|
||||||
|
|
||||||
for (size_t i = 0; i < props.iterations(); i++) {
|
|
||||||
__m128i cx0, cx1, cx2, cx3;
|
__m128i cx0, cx1, cx2, cx3;
|
||||||
|
|
||||||
if (!SOFT_AES) {
|
if (!SOFT_AES) {
|
||||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||||
|
@ -1384,6 +1384,7 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < props.iterations(); i++) {
|
||||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||||
const __m128i ax2 = _mm_set_epi64x(ah2, al2);
|
const __m128i ax2 = _mm_set_epi64x(ah2, al2);
|
||||||
|
@ -1400,6 +1401,12 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||||
cx2 = _mm_aesenc_si128(cx2, ax2);
|
cx2 = _mm_aesenc_si128(cx2, ax2);
|
||||||
cx3 = _mm_aesenc_si128(cx3, ax3);
|
cx3 = _mm_aesenc_si128(cx3, ax3);
|
||||||
|
if (MASK > 131072) {
|
||||||
|
_mm_prefetch((const char*)(&l0[_mm_cvtsi128_si32(cx0) & MASK]), _MM_HINT_T0);
|
||||||
|
_mm_prefetch((const char*)(&l1[_mm_cvtsi128_si32(cx1) & MASK]), _MM_HINT_T0);
|
||||||
|
_mm_prefetch((const char*)(&l2[_mm_cvtsi128_si32(cx2) & MASK]), _MM_HINT_T0);
|
||||||
|
_mm_prefetch((const char*)(&l3[_mm_cvtsi128_si32(cx3) & MASK]), _MM_HINT_T0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
|
cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
|
||||||
|
@ -1424,6 +1431,8 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
al0 ^= cl;
|
al0 ^= cl;
|
||||||
ah0 ^= ch;
|
ah0 ^= ch;
|
||||||
idx0 = al0;
|
idx0 = al0;
|
||||||
|
bx00 = cx0;
|
||||||
|
if (!SOFT_AES) cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||||
|
|
||||||
cl = ((uint64_t*)&l1[idx1 & MASK])[0];
|
cl = ((uint64_t*)&l1[idx1 & MASK])[0];
|
||||||
ch = ((uint64_t*)&l1[idx1 & MASK])[1];
|
ch = ((uint64_t*)&l1[idx1 & MASK])[1];
|
||||||
|
@ -1435,6 +1444,8 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
al1 ^= cl;
|
al1 ^= cl;
|
||||||
ah1 ^= ch;
|
ah1 ^= ch;
|
||||||
idx1 = al1;
|
idx1 = al1;
|
||||||
|
bx10 = cx1;
|
||||||
|
if (!SOFT_AES) cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||||
|
|
||||||
cl = ((uint64_t*)&l2[idx2 & MASK])[0];
|
cl = ((uint64_t*)&l2[idx2 & MASK])[0];
|
||||||
ch = ((uint64_t*)&l2[idx2 & MASK])[1];
|
ch = ((uint64_t*)&l2[idx2 & MASK])[1];
|
||||||
|
@ -1446,6 +1457,8 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
al2 ^= cl;
|
al2 ^= cl;
|
||||||
ah2 ^= ch;
|
ah2 ^= ch;
|
||||||
idx2 = al2;
|
idx2 = al2;
|
||||||
|
bx20 = cx2;
|
||||||
|
if (!SOFT_AES) cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
|
||||||
|
|
||||||
cl = ((uint64_t*)&l3[idx3 & MASK])[0];
|
cl = ((uint64_t*)&l3[idx3 & MASK])[0];
|
||||||
ch = ((uint64_t*)&l3[idx3 & MASK])[1];
|
ch = ((uint64_t*)&l3[idx3 & MASK])[1];
|
||||||
|
@ -1457,11 +1470,8 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||||
al3 ^= cl;
|
al3 ^= cl;
|
||||||
ah3 ^= ch;
|
ah3 ^= ch;
|
||||||
idx3 = al3;
|
idx3 = al3;
|
||||||
|
|
||||||
bx00 = cx0;
|
|
||||||
bx10 = cx1;
|
|
||||||
bx20 = cx2;
|
|
||||||
bx30 = cx3;
|
bx30 = cx3;
|
||||||
|
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||||
}
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||||
|
|
Loading…
Reference in a new issue