Initial ASM wrapper.

This commit is contained in:
XMRig 2018-09-24 09:51:21 +03:00
parent f163aad38c
commit ba65a34a01
7 changed files with 58 additions and 29 deletions

View file

@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG)
endif()
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})

View file

@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM)
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
else()
# set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "")
add_definitions(/DXMRIG_NO_ASM)
endif()

View file

@ -22,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_H__
#define __CRYPTONIGHT_H__
#ifndef XMRIG_CRYPTONIGHT_H
#define XMRIG_CRYPTONIGHT_H
#include <stddef.h>
@ -31,9 +31,9 @@
struct cryptonight_ctx {
alignas(16) uint8_t state[200];
alignas(16) uint8_t* memory;
alignas(16) uint8_t state[224];
alignas(16) uint8_t *memory;
};
#endif /* __CRYPTONIGHT_H__ */
#endif /* XMRIG_CRYPTONIGHT_H */

View file

@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
xmrig::keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
if (ASM == xmrig::ASM_INTEL) {
cnv2_mainloop_ivybridge_asm(ctx[0]);
}
else {
cnv2_mainloop_ryzen_asm(ctx[0]);
}
cn_implode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{

View file

@ -48,10 +48,10 @@
punpcklqdq xmm4, xmm0
movq xmm0, rcx
punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16
main_loop_ivybridge:
movdqu xmm6, XMMWORD PTR [r10+rbx]
lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d
mov eax, r10d
@ -63,28 +63,29 @@ main_loop_ivybridge:
movq xmm7, r8
punpcklqdq xmm7, xmm0
aesenc xmm6, xmm7
movq rbp, xmm6
mov r9, rbp
and r9d, 2097136
movdqu xmm2, XMMWORD PTR [rcx+rbx]
movdqu xmm1, XMMWORD PTR [rax+rbx]
movdqu xmm0, XMMWORD PTR [r10+rbx]
paddq xmm1, xmm7
movdqu xmm2, XMMWORD PTR [rcx+rbx]
paddq xmm0, xmm5
paddq xmm2, xmm4
movdqu XMMWORD PTR [rcx+rbx], xmm0
movq rcx, xmm3
movdqu XMMWORD PTR [rax+rbx], xmm2
mov rax, rcx
movdqu XMMWORD PTR [r10+rbx], xmm1
mov r10, r9
xor r10d, 32
movq rcx, xmm3
mov rax, rcx
shl rax, 32
xor rdi, rax
movq rbp, xmm6
movdqa xmm0, xmm6
pxor xmm0, xmm4
mov r10, rbp
and r10d, 2097136
movdqu XMMWORD PTR [rdx], xmm0
xor rdi, QWORD PTR [r10+rbx]
lea r14, QWORD PTR [r10+rbx]
xor r10d, 32
xor rdi, QWORD PTR [r9+rbx]
lea r14, QWORD PTR [r9+rbx]
mov r12, QWORD PTR [r14+8]
xor edx, edx
lea r9d, DWORD PTR [ecx+ecx]
@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret:
mul rbp
movq xmm2, rdx
xor rdx, [rcx+rbx]
add r8, rdx
mov QWORD PTR [r14], r8
xor r8, rdi
mov edi, r8d
and edi, 2097136
movq xmm0, rax
xor rax, [rcx+rbx+8]
add r11, rax
mov QWORD PTR [r14+8], r11
punpcklqdq xmm2, xmm0
mov r9d, r10d
@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret:
movdqa xmm4, xmm6
movdqu XMMWORD PTR [rcx+rbx], xmm2
movdqu XMMWORD PTR [r10+rbx], xmm1
add r8, rdx
mov QWORD PTR [r14], r8
xor r8, rdi
mov r10, r8
add r11, rax
mov QWORD PTR [r14+8], r11
and r10d, 2097136
movdqu xmm6, [rdi+rbx]
mov r10d, edi
xor r11, r12
dec rsi
jne main_loop_ivybridge

View file

@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
{
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = {
constexpr const size_t count = VARIANT_MAX * 10 * 3;
static const cn_hash_fun func_table[count + 2] = {
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>
};
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;

View file

@ -21,8 +21,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __WORKER_H__
#define __WORKER_H__
#ifndef XMRIG_WORKER_H
#define XMRIG_WORKER_H
#include <atomic>
@ -33,7 +33,6 @@
#include "Mem.h"
struct cryptonight_ctx;
class Handle;
@ -67,4 +66,4 @@ protected:
};
#endif /* __WORKER_H__ */
#endif /* XMRIG_WORKER_H */