mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 12:09:22 +00:00
Optimized soft AES implementations
cn-pico: +6.7% cn/half: +6.2% cn/2: +4.3% cn-heavy: +9.1% cn/wow, cn/r: 2.4-2.6 times faster
This commit is contained in:
parent
a5dcd6dd1f
commit
488cec09dd
18 changed files with 2380 additions and 1090 deletions
|
@ -49,6 +49,10 @@ struct cryptonight_r_data {
|
|||
struct cryptonight_ctx {
|
||||
alignas(16) uint8_t state[224];
|
||||
alignas(16) uint8_t *memory;
|
||||
|
||||
uint8_t unused[40];
|
||||
const uint32_t* saes_table;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cn_mainloop_double_fun_ms_abi generated_code_double;
|
||||
cryptonight_r_data generated_code_data;
|
||||
|
|
|
@ -158,6 +158,16 @@
|
|||
#define SWAP64LE(x) x
|
||||
#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
|
||||
|
||||
#ifndef NOINLINE
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__ ((noinline))
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "common/xmrig.h"
|
||||
#include "variant4_random_math.h"
|
||||
|
||||
|
|
|
@ -192,31 +192,102 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
|
|||
}
|
||||
|
||||
|
||||
template<bool SOFT_AES>
|
||||
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
static FORCEINLINE void soft_aesenc(void* __restrict ptr, const void* __restrict key, const uint32_t* __restrict t)
|
||||
{
|
||||
if (SOFT_AES) {
|
||||
*x0 = soft_aesenc((uint32_t*)x0, key);
|
||||
*x1 = soft_aesenc((uint32_t*)x1, key);
|
||||
*x2 = soft_aesenc((uint32_t*)x2, key);
|
||||
*x3 = soft_aesenc((uint32_t*)x3, key);
|
||||
*x4 = soft_aesenc((uint32_t*)x4, key);
|
||||
*x5 = soft_aesenc((uint32_t*)x5, key);
|
||||
*x6 = soft_aesenc((uint32_t*)x6, key);
|
||||
*x7 = soft_aesenc((uint32_t*)x7, key);
|
||||
}
|
||||
else {
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
uint32_t x0 = ((const uint32_t*)(ptr))[0];
|
||||
uint32_t x1 = ((const uint32_t*)(ptr))[1];
|
||||
uint32_t x2 = ((const uint32_t*)(ptr))[2];
|
||||
uint32_t x3 = ((const uint32_t*)(ptr))[3];
|
||||
|
||||
uint32_t y0 = t[x0 & 0xff]; x0 >>= 8;
|
||||
uint32_t y1 = t[x1 & 0xff]; x1 >>= 8;
|
||||
uint32_t y2 = t[x2 & 0xff]; x2 >>= 8;
|
||||
uint32_t y3 = t[x3 & 0xff]; x3 >>= 8;
|
||||
t += 256;
|
||||
|
||||
y0 ^= t[x1 & 0xff]; x1 >>= 8;
|
||||
y1 ^= t[x2 & 0xff]; x2 >>= 8;
|
||||
y2 ^= t[x3 & 0xff]; x3 >>= 8;
|
||||
y3 ^= t[x0 & 0xff]; x0 >>= 8;
|
||||
t += 256;
|
||||
|
||||
y0 ^= t[x2 & 0xff]; x2 >>= 8;
|
||||
y1 ^= t[x3 & 0xff]; x3 >>= 8;
|
||||
y2 ^= t[x0 & 0xff]; x0 >>= 8;
|
||||
y3 ^= t[x1 & 0xff]; x1 >>= 8;
|
||||
t += 256;
|
||||
|
||||
y0 ^= t[x3];
|
||||
y1 ^= t[x0];
|
||||
y2 ^= t[x1];
|
||||
y3 ^= t[x2];
|
||||
|
||||
((uint32_t*)ptr)[0] = y0 ^ ((uint32_t*)key)[0];
|
||||
((uint32_t*)ptr)[1] = y1 ^ ((uint32_t*)key)[1];
|
||||
((uint32_t*)ptr)[2] = y2 ^ ((uint32_t*)key)[2];
|
||||
((uint32_t*)ptr)[3] = y3 ^ ((uint32_t*)key)[3];
|
||||
}
|
||||
|
||||
static FORCEINLINE __m128i soft_aesenc(const void* __restrict ptr, const __m128i key, const uint32_t* __restrict t)
|
||||
{
|
||||
uint32_t x0 = ((const uint32_t*)(ptr))[0];
|
||||
uint32_t x1 = ((const uint32_t*)(ptr))[1];
|
||||
uint32_t x2 = ((const uint32_t*)(ptr))[2];
|
||||
uint32_t x3 = ((const uint32_t*)(ptr))[3];
|
||||
|
||||
uint32_t y0 = t[x0 & 0xff]; x0 >>= 8;
|
||||
uint32_t y1 = t[x1 & 0xff]; x1 >>= 8;
|
||||
uint32_t y2 = t[x2 & 0xff]; x2 >>= 8;
|
||||
uint32_t y3 = t[x3 & 0xff]; x3 >>= 8;
|
||||
t += 256;
|
||||
|
||||
y0 ^= t[x1 & 0xff]; x1 >>= 8;
|
||||
y1 ^= t[x2 & 0xff]; x2 >>= 8;
|
||||
y2 ^= t[x3 & 0xff]; x3 >>= 8;
|
||||
y3 ^= t[x0 & 0xff]; x0 >>= 8;
|
||||
t += 256;
|
||||
|
||||
y0 ^= t[x2 & 0xff]; x2 >>= 8;
|
||||
y1 ^= t[x3 & 0xff]; x3 >>= 8;
|
||||
y2 ^= t[x0 & 0xff]; x0 >>= 8;
|
||||
y3 ^= t[x1 & 0xff]; x1 >>= 8;
|
||||
|
||||
y0 ^= t[x3 + 256];
|
||||
y1 ^= t[x0 + 256];
|
||||
y2 ^= t[x1 + 256];
|
||||
y3 ^= t[x2 + 256];
|
||||
|
||||
return _mm_xor_si128(_mm_set_epi32(y3, y2, y1, y0), key);
|
||||
}
|
||||
|
||||
template<bool SOFT_AES>
|
||||
void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7);
|
||||
|
||||
template<>
|
||||
static NOINLINE void aes_round<true>(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = soft_aesenc((uint32_t*)x0, key, (const uint32_t*)saes_table);
|
||||
*x1 = soft_aesenc((uint32_t*)x1, key, (const uint32_t*)saes_table);
|
||||
*x2 = soft_aesenc((uint32_t*)x2, key, (const uint32_t*)saes_table);
|
||||
*x3 = soft_aesenc((uint32_t*)x3, key, (const uint32_t*)saes_table);
|
||||
*x4 = soft_aesenc((uint32_t*)x4, key, (const uint32_t*)saes_table);
|
||||
*x5 = soft_aesenc((uint32_t*)x5, key, (const uint32_t*)saes_table);
|
||||
*x6 = soft_aesenc((uint32_t*)x6, key, (const uint32_t*)saes_table);
|
||||
*x7 = soft_aesenc((uint32_t*)x7, key, (const uint32_t*)saes_table);
|
||||
}
|
||||
|
||||
template<>
|
||||
static FORCEINLINE void aes_round<false>(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
|
||||
{
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
|
||||
inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
|
||||
{
|
||||
|
@ -478,6 +549,8 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l,
|
|||
}
|
||||
}
|
||||
|
||||
void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
||||
void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
|
@ -498,9 +571,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
if (SOFT_AES && xmrig::cn_is_cryptonight_r<VARIANT>())
|
||||
{
|
||||
if (!ctx[0]->generated_code_data.match(VARIANT, height)) {
|
||||
V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init<VARIANT>(code, height);
|
||||
|
||||
if (VARIANT == xmrig::VARIANT_WOW)
|
||||
wow_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), xmrig::ASM_NONE);
|
||||
else if (VARIANT == xmrig::VARIANT_4)
|
||||
v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), xmrig::ASM_NONE);
|
||||
|
||||
ctx[0]->generated_code_data.variant = VARIANT;
|
||||
ctx[0]->generated_code_data.height = height;
|
||||
}
|
||||
|
||||
ctx[0]->saes_table = (const uint32_t*)saes_table;
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
} else {
|
||||
#endif
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
@ -524,7 +619,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx = aes_round_tweak_div(cx, ax0);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table);
|
||||
}
|
||||
else {
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
@ -602,6 +697,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
bx0 = cx;
|
||||
}
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
}
|
||||
#endif
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
xmrig::keccakf(h0, 24);
|
||||
|
@ -857,8 +956,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
cx1 = aes_round_tweak_div(cx1, ax1);
|
||||
}
|
||||
else if (SOFT_AES) {
|
||||
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
|
||||
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
|
||||
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table);
|
||||
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1, (const uint32_t*)saes_table);
|
||||
}
|
||||
else {
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
|
@ -1019,7 +1118,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
c = aes_round_tweak_div(c, a); \
|
||||
} \
|
||||
else if (SOFT_AES) { \
|
||||
c = soft_aesenc(c, a); \
|
||||
c = soft_aesenc(&c, a, (const uint32_t*)saes_table); \
|
||||
} else { \
|
||||
c = _mm_aesenc_si128(c, a); \
|
||||
} \
|
||||
|
|
|
@ -159,4 +159,32 @@ void v4_compile_code_double(const V4_Instruction* code, int code_size, void* mac
|
|||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
279
src/crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
279
src/crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
279
src/crypto/asm/CryptonightR_soft_aes_template_win.inc
Normal file
279
src/crypto/asm/CryptonightR_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC CryptonightR_soft_aes_template_part1
|
||||
PUBLIC CryptonightR_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightR_soft_aes_template_part2
|
||||
PUBLIC CryptonightR_soft_aes_template_part3
|
||||
PUBLIC CryptonightR_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
CryptonightR_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightR_soft_aes_template_mainloop
|
||||
|
||||
CryptonightR_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightR_soft_aes_template_end:
|
|
@ -531,6 +531,8 @@ PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
|
|||
|
||||
#include "CryptonightWOW_template.inc"
|
||||
#include "CryptonightR_template.inc"
|
||||
#include "CryptonightWOW_soft_aes_template.inc"
|
||||
#include "CryptonightR_soft_aes_template.inc"
|
||||
|
||||
FN_PREFIX(CryptonightR_instruction0):
|
||||
imul rbx, rbx
|
||||
|
|
|
@ -518,6 +518,8 @@ PUBLIC CryptonightR_instruction_mov256
|
|||
|
||||
INCLUDE CryptonightWOW_template_win.inc
|
||||
INCLUDE CryptonightR_template_win.inc
|
||||
INCLUDE CryptonightWOW_soft_aes_template_win.inc
|
||||
INCLUDE CryptonightR_soft_aes_template_win.inc
|
||||
|
||||
CryptonightR_instruction0:
|
||||
imul rbx, rbx
|
||||
|
|
|
@ -26,6 +26,30 @@ extern "C"
|
|||
void CryptonightR_template_double_part4();
|
||||
void CryptonightR_template_double_end();
|
||||
|
||||
void CryptonightWOW_soft_aes_template_part1();
|
||||
void CryptonightWOW_soft_aes_template_mainloop();
|
||||
void CryptonightWOW_soft_aes_template_part2();
|
||||
void CryptonightWOW_soft_aes_template_part3();
|
||||
void CryptonightWOW_soft_aes_template_end();
|
||||
void CryptonightWOW_soft_aes_template_double_part1();
|
||||
void CryptonightWOW_soft_aes_template_double_mainloop();
|
||||
void CryptonightWOW_soft_aes_template_double_part2();
|
||||
void CryptonightWOW_soft_aes_template_double_part3();
|
||||
void CryptonightWOW_soft_aes_template_double_part4();
|
||||
void CryptonightWOW_soft_aes_template_double_end();
|
||||
|
||||
void CryptonightR_soft_aes_template_part1();
|
||||
void CryptonightR_soft_aes_template_mainloop();
|
||||
void CryptonightR_soft_aes_template_part2();
|
||||
void CryptonightR_soft_aes_template_part3();
|
||||
void CryptonightR_soft_aes_template_end();
|
||||
void CryptonightR_soft_aes_template_double_part1();
|
||||
void CryptonightR_soft_aes_template_double_mainloop();
|
||||
void CryptonightR_soft_aes_template_double_part2();
|
||||
void CryptonightR_soft_aes_template_double_part3();
|
||||
void CryptonightR_soft_aes_template_double_part4();
|
||||
void CryptonightR_soft_aes_template_double_end();
|
||||
|
||||
void CryptonightR_instruction0();
|
||||
void CryptonightR_instruction1();
|
||||
void CryptonightR_instruction2();
|
||||
|
|
266
src/crypto/asm/CryptonightWOW_soft_aes_template.inc
Normal file
266
src/crypto/asm/CryptonightWOW_soft_aes_template.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_end):
|
266
src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc
Normal file
266
src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC CryptonightWOW_soft_aes_template_part1
|
||||
PUBLIC CryptonightWOW_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part2
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part3
|
||||
PUBLIC CryptonightWOW_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
CryptonightWOW_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightWOW_soft_aes_template_mainloop
|
||||
|
||||
CryptonightWOW_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightWOW_soft_aes_template_end:
|
279
src/crypto/asm/win64/CryptonightR_soft_aes_template.inc
Normal file
279
src/crypto/asm/win64/CryptonightR_soft_aes_template.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
279
src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc
Normal file
279
src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC CryptonightR_soft_aes_template_part1
|
||||
PUBLIC CryptonightR_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightR_soft_aes_template_part2
|
||||
PUBLIC CryptonightR_soft_aes_template_part3
|
||||
PUBLIC CryptonightR_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
CryptonightR_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightR_soft_aes_template_mainloop
|
||||
|
||||
CryptonightR_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightR_soft_aes_template_end:
|
|
@ -531,6 +531,8 @@ PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
|
|||
|
||||
#include "CryptonightWOW_template.inc"
|
||||
#include "CryptonightR_template.inc"
|
||||
#include "CryptonightWOW_soft_aes_template.inc"
|
||||
#include "CryptonightR_soft_aes_template.inc"
|
||||
|
||||
FN_PREFIX(CryptonightR_instruction0):
|
||||
imul rbx, rbx
|
||||
|
|
|
@ -518,6 +518,8 @@ PUBLIC CryptonightR_instruction_mov256
|
|||
|
||||
INCLUDE CryptonightWOW_template_win.inc
|
||||
INCLUDE CryptonightR_template_win.inc
|
||||
INCLUDE CryptonightWOW_soft_aes_template_win.inc
|
||||
INCLUDE CryptonightR_soft_aes_template_win.inc
|
||||
|
||||
CryptonightR_instruction0:
|
||||
imul rbx, rbx
|
||||
|
|
File diff suppressed because it is too large
Load diff
266
src/crypto/asm/win64/CryptonightWOW_soft_aes_template.inc
Normal file
266
src/crypto/asm/win64/CryptonightWOW_soft_aes_template.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_end):
|
266
src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc
Normal file
266
src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC CryptonightWOW_soft_aes_template_part1
|
||||
PUBLIC CryptonightWOW_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part2
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part3
|
||||
PUBLIC CryptonightWOW_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
CryptonightWOW_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightWOW_soft_aes_template_mainloop
|
||||
|
||||
CryptonightWOW_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightWOW_soft_aes_template_end:
|
Loading…
Reference in a new issue