mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-17 08:17:40 +00:00
commit
ae6c536e98
3 changed files with 19 additions and 14 deletions
|
@ -377,12 +377,15 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
_mm_store_si128(output + 1, xin1);
|
_mm_store_si128(output + 1, xin1);
|
||||||
_mm_store_si128(output + 2, xin2);
|
_mm_store_si128(output + 2, xin2);
|
||||||
_mm_store_si128(output + 3, xin3);
|
_mm_store_si128(output + 3, xin3);
|
||||||
output += (64 << interleave) / sizeof(__m128i);
|
|
||||||
_mm_store_si128(output + 0, xin4);
|
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||||
_mm_store_si128(output + 1, xin5);
|
|
||||||
_mm_store_si128(output + 2, xin6);
|
_mm_store_si128(output + output_increment + 0, xin4);
|
||||||
_mm_store_si128(output + 3, xin7);
|
_mm_store_si128(output + output_increment + 1, xin5);
|
||||||
output += (64 << interleave) / sizeof(__m128i);
|
_mm_store_si128(output + output_increment + 2, xin6);
|
||||||
|
_mm_store_si128(output + output_increment + 3, xin7);
|
||||||
|
|
||||||
|
output += output_increment * 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,13 +417,15 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||||
input += (64 << interleave) / sizeof(__m128i);
|
|
||||||
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
|
||||||
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
|
||||||
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
|
||||||
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
|
||||||
input += (64 << interleave) / sizeof(__m128i);
|
|
||||||
|
|
||||||
|
constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
|
||||||
|
|
||||||
|
xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
|
||||||
|
xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
|
||||||
|
xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
|
||||||
|
xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
|
||||||
|
|
||||||
|
input += input_increment * 2;
|
||||||
i += 8;
|
i += 8;
|
||||||
|
|
||||||
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
movdqa XMMWORD PTR [rsp+32], xmm0
|
movdqa XMMWORD PTR [rsp+32], xmm0
|
||||||
|
|
||||||
stmxcsr DWORD PTR [rsp+24]
|
stmxcsr DWORD PTR [rsp+24]
|
||||||
mov DWORD PTR [rsp+28], 24448
|
mov DWORD PTR [rsp+28], 16256
|
||||||
ldmxcsr DWORD PTR [rsp+28]
|
ldmxcsr DWORD PTR [rsp+28]
|
||||||
|
|
||||||
mov rcx, QWORD PTR [rbx+56]
|
mov rcx, QWORD PTR [rbx+56]
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
movdqa XMMWORD PTR [rsp+32], xmm0
|
movdqa XMMWORD PTR [rsp+32], xmm0
|
||||||
|
|
||||||
stmxcsr DWORD PTR [rsp+24]
|
stmxcsr DWORD PTR [rsp+24]
|
||||||
mov DWORD PTR [rsp+28], 24448
|
mov DWORD PTR [rsp+28], 16256
|
||||||
ldmxcsr DWORD PTR [rsp+28]
|
ldmxcsr DWORD PTR [rsp+28]
|
||||||
|
|
||||||
mov rcx, QWORD PTR [rbx+56]
|
mov rcx, QWORD PTR [rbx+56]
|
||||||
|
|
Loading…
Reference in a new issue