Use new style method to call ASM functions for cn/2 & added bulldozer ASM code.

This commit is contained in:
XMRig 2019-03-04 13:31:25 +07:00
parent 7574bfab60
commit ef2e8bed6e
16 changed files with 325 additions and 150 deletions

View file

@ -44,6 +44,9 @@
#include "options.h" #include "options.h"
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
@ -78,6 +81,7 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output,
#ifndef XMRIG_NO_ASM #ifndef XMRIG_NO_ASM
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
#endif #endif
@ -171,34 +175,20 @@ static bool self_test() {
} }
size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly) #ifndef XMRIG_NO_ASM
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
{ {
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
# ifndef XMRIG_NO_ASM
if (assembly == ASM_AUTO) { if (assembly == ASM_AUTO) {
assembly = cpu_info.assembly; assembly = (enum Assembly) cpu_info.assembly;
} }
if (assembly == ASM_NONE) { if (assembly == ASM_NONE) {
return index; return NULL;
} }
const size_t offset = VARIANT_MAX * 4 * 2; return asm_func_map[av][variant][assembly];
if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) {
if (av == AV_SINGLE) {
return offset + assembly - 2;
}
if (av == AV_DOUBLE) {
return offset + 2;
}
}
# endif
return index;
} }
#endif
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant) cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
@ -207,10 +197,15 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX); assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
# ifndef XMRIG_NO_ASM # ifndef XMRIG_NO_ASM
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = { if (algorithm == ALGO_CRYPTONIGHT) {
# else cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = { if (fun) {
return fun;
}
}
# endif # endif
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
cryptonight_av1_v0, cryptonight_av1_v0,
cryptonight_av2_v0, cryptonight_av2_v0,
cryptonight_av3_v0, cryptonight_av3_v0,
@ -263,16 +258,11 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
NULL, NULL,
NULL, NULL,
NULL, NULL,
# endif
# ifndef XMRIG_NO_ASM
cryptonight_single_hash_asm_intel,
cryptonight_single_hash_asm_ryzen,
cryptonight_double_hash_asm
# endif # endif
}; };
# ifndef NDEBUG # ifndef NDEBUG
const size_t index = fn_index(algorithm, av, variant, opt_assembly); const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
cn_hash_fun func = func_table[index]; cn_hash_fun func = func_table[index];
@ -281,7 +271,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
return func; return func;
# else # else
return func_table[fn_index(algorithm, av, variant, opt_assembly)]; return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
# endif # endif
} }
@ -290,6 +280,16 @@ bool cryptonight_init(int av)
{ {
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT; opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
# ifndef XMRIG_NO_ASM
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
# endif
return self_test(); return self_test();
} }

View file

@ -196,6 +196,7 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res
#ifndef XMRIG_NO_ASM #ifndef XMRIG_NO_ASM
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx); extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx); extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1); extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
@ -225,6 +226,19 @@ void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t siz
} }
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_bulldozer_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{ {
keccak(input, size, ctx[0]->state, 200); keccak(input, size, ctx[0]->state, 200);

View file

@ -97,54 +97,5 @@ void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *rest
#ifndef XMRIG_NO_ASM #ifndef XMRIG_NO_ASM
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ivybridge_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cnv2_mainloop_ryzen_asm(ctx[0]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
{
keccak(input, size, ctx[0]->state, 200);
keccak(input + size, size, ctx[1]->state, 200);
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
keccakf((uint64_t*) ctx[0]->state, 24);
keccakf((uint64_t*) ctx[1]->state, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#endif #endif

View file

@ -1,28 +1,16 @@
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set(XMRIG_ASM_LIBRARY "xmrig-asm") set(XMRIG_ASM_LIBRARY "xmrig-asm")
if (CMAKE_C_COMPILER_ID MATCHES MSVC) enable_language(ASM)
enable_language(ASM_MASM)
if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.asm") set(XMRIG_ASM_FILE "crypto/asm/win64/cn_main_loop.S")
else()
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.asm")
endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
else() else()
enable_language(ASM) set(XMRIG_ASM_FILE "crypto/asm/cn_main_loop.S")
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.S")
else()
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.S")
endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
endif() endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE}) add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_SOURCES "")
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)

23
cpu.c
View file

@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones> * Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* * Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -64,20 +65,20 @@ void cpu_init_common() {
if (data.flags[CPU_FEATURE_AES]) { if (data.flags[CPU_FEATURE_AES]) {
cpu_info.flags |= CPU_FLAG_AES; cpu_info.flags |= CPU_FLAG_AES;
# ifndef XMRIG_NO_ASM
if (data.vendor == VENDOR_AMD) {
cpu_info.assembly = ASM_RYZEN;
}
else if (data.vendor == VENDOR_INTEL) {
cpu_info.assembly = ASM_INTEL;
}
# endif
} }
if (data.flags[CPU_FEATURE_BMI2]) { if (data.flags[CPU_FEATURE_BMI2]) {
cpu_info.flags |= CPU_FLAG_BMI2; cpu_info.flags |= CPU_FLAG_BMI2;
} }
# ifndef XMRIG_NO_ASM
if (data.vendor == VENDOR_AMD) {
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
}
else if (data.vendor == VENDOR_INTEL) {
cpu_info.assembly = ASM_INTEL;
}
# endif
} }
#endif #endif

5
cpu.h
View file

@ -4,8 +4,9 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones> * Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* * Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by

View file

@ -94,7 +94,7 @@
lea r9, QWORD PTR [rdx+r13] lea r9, QWORD PTR [rdx+r13]
movdqu xmm15, XMMWORD PTR [r9] movdqu xmm15, XMMWORD PTR [r9]
ALIGN 16 ALIGN(64)
main_loop_double_sandybridge: main_loop_double_sandybridge:
movdqu xmm9, xmm15 movdqu xmm9, xmm15
mov eax, edx mov eax, edx

View file

@ -0,0 +1,180 @@
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp+24], rbp
mov QWORD PTR [rsp+32], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 64
stmxcsr DWORD PTR [rsp]
mov DWORD PTR [rsp+4], 24448
ldmxcsr DWORD PTR [rsp+4]
mov rax, QWORD PTR [rcx+48]
mov r9, rcx
xor rax, QWORD PTR [rcx+16]
mov ebp, 524288
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+40]
mov r10, r8
mov rdx, QWORD PTR [rcx+56]
movq xmm3, rax
xor rdx, QWORD PTR [rcx+24]
xor r11, QWORD PTR [rcx+8]
mov rbx, QWORD PTR [rcx+224]
mov rax, QWORD PTR [r9+80]
xor rax, QWORD PTR [r9+64]
movq xmm0, rdx
mov rcx, QWORD PTR [rcx+88]
xor rcx, QWORD PTR [r9+72]
mov rdi, QWORD PTR [r9+104]
and r10d, 2097136
movaps XMMWORD PTR [rsp+48], xmm6
movq xmm4, rax
movaps XMMWORD PTR [rsp+32], xmm7
movaps XMMWORD PTR [rsp+16], xmm8
xorps xmm8, xmm8
mov ax, 1023
shl rax, 52
movq xmm7, rax
mov r15, QWORD PTR [r9+96]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
punpcklqdq xmm4, xmm0
ALIGN(64)
cnv2_main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm6, r8
pinsrq xmm6, r11, 1
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
mov ecx, r10d
mov eax, r10d
xor ecx, 16
xor eax, 32
xor r10d, 48
aesenc xmm5, xmm6
movdqa xmm2, XMMWORD PTR [rcx+rbx]
movdqa xmm1, XMMWORD PTR [rax+rbx]
movdqa xmm0, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
paddq xmm0, xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm0
movdqa XMMWORD PTR [rax+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movaps xmm1, xmm8
mov rsi, r15
xor rsi, rdi
mov edi, 1023
shl rdi, 52
movq r14, xmm5
pextrq rax, xmm5, 1
movdqa xmm0, xmm5
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
movdqa XMMWORD PTR [rdx], xmm0
xor rsi, QWORD PTR [r10+rbx]
lea r12, QWORD PTR [r10+rbx]
mov r13, QWORD PTR [r10+rbx+8]
add r9d, r14d
or r9d, -2147483647
xor edx, edx
div r9
mov eax, eax
shl rdx, 32
lea r15, [rax+rdx]
lea rax, [r14+r15]
shr rax, 12
add rax, rdi
movq xmm0, rax
sqrtsd xmm1, xmm0
movq rdi, xmm1
test rdi, 524287
je sqrt_fixup_bulldozer
shr rdi, 19
sqrt_fixup_bulldozer_ret:
mov rax, rsi
mul r14
movq xmm1, rax
movq xmm0, rdx
punpcklqdq xmm0, xmm1
mov r9d, r10d
mov ecx, r10d
xor r9d, 16
xor ecx, 32
xor r10d, 48
movdqa xmm1, XMMWORD PTR [rcx+rbx]
xor rdx, [rcx+rbx]
xor rax, [rcx+rbx+8]
movdqa xmm2, XMMWORD PTR [r9+rbx]
pxor xmm2, xmm0
paddq xmm4, XMMWORD PTR [r10+rbx]
paddq xmm2, xmm3
paddq xmm1, xmm6
movdqa XMMWORD PTR [r9+rbx], xmm4
movdqa XMMWORD PTR [rcx+rbx], xmm2
movdqa XMMWORD PTR [r10+rbx], xmm1
movdqa xmm4, xmm3
add r8, rdx
add r11, rax
mov QWORD PTR [r12], r8
xor r8, rsi
mov QWORD PTR [r12+8], r11
mov r10, r8
xor r11, r13
and r10d, 2097136
movdqa xmm3, xmm5
dec ebp
jne cnv2_main_loop_bulldozer
ldmxcsr DWORD PTR [rsp]
movaps xmm6, XMMWORD PTR [rsp+48]
lea r11, QWORD PTR [rsp+64]
mov rbx, QWORD PTR [r11+56]
mov rbp, QWORD PTR [r11+64]
mov rsi, QWORD PTR [r11+72]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm7, XMMWORD PTR [rsp+32]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
jmp cnv2_main_loop_bulldozer_endp
sqrt_fixup_bulldozer:
movq r9, xmm5
add r9, r15
dec rdi
mov edx, -1022
shl rdx, 32
mov rax, rdi
shr rdi, 19
shr rax, 20
mov rcx, rdi
sub rcx, rax
lea rcx, [rcx+rdx+1]
add rax, rdx
imul rcx, rax
sub rcx, r9
adc rdi, 0
jmp sqrt_fixup_bulldozer_ret
cnv2_main_loop_bulldozer_endp:

View file

@ -50,7 +50,7 @@
punpcklqdq xmm5, xmm0 punpcklqdq xmm5, xmm0
movdqu xmm6, XMMWORD PTR [r10+rbx] movdqu xmm6, XMMWORD PTR [r10+rbx]
ALIGN 16 ALIGN(64)
main_loop_ivybridge: main_loop_ivybridge:
lea rdx, QWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx]
mov ecx, r10d mov ecx, r10d

View file

@ -45,7 +45,7 @@
movq xmm0, rcx movq xmm0, rcx
punpcklqdq xmm4, xmm0 punpcklqdq xmm4, xmm0
ALIGN 16 ALIGN(64)
main_loop_ryzen: main_loop_ryzen:
movdqa xmm5, XMMWORD PTR [r10+rbx] movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11 movq xmm0, r11

View file

@ -1,4 +1,8 @@
#define ALIGN .align #ifdef __APPLE__
# define ALIGN(x) .align 6
#else
# define ALIGN(x) .align 64
#endif
.intel_syntax noprefix .intel_syntax noprefix
#ifdef __APPLE__ #ifdef __APPLE__
# define FN_PREFIX(fn) _ ## fn # define FN_PREFIX(fn) _ ## fn
@ -9,29 +13,42 @@
#endif #endif
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
.global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm): FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
#include "cnv2_main_loop_ivybridge.inc" #include "cn2/cnv2_main_loop_ivybridge.inc"
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm): FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
#include "cnv2_main_loop_ryzen.inc" #include "cn2/cnv2_main_loop_ryzen.inc"
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054
ALIGN 16 ALIGN(64)
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_main_loop_bulldozer.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
sub rsp, 48 sub rsp, 48
mov rcx, rdi mov rcx, rdi
mov rdx, rsi mov rdx, rsi
#include "cnv2_double_main_loop_sandybridge.inc" #include "cn2/cnv2_double_main_loop_sandybridge.inc"
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054

View file

@ -1,24 +1,35 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE _TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv2_mainloop_ivybridge_asm PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm PUBLIC cnv2_double_mainloop_sandybridge_asm
ALIGN 64 ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC cnv2_mainloop_ivybridge_asm PROC
INCLUDE cnv2_main_loop_ivybridge.inc INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0 ret 0
mov eax, 3735929054
cnv2_mainloop_ivybridge_asm ENDP cnv2_mainloop_ivybridge_asm ENDP
ALIGN 64 ALIGN(64)
cnv2_mainloop_ryzen_asm PROC cnv2_mainloop_ryzen_asm PROC
INCLUDE cnv2_main_loop_ryzen.inc INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0 ret 0
mov eax, 3735929054
cnv2_mainloop_ryzen_asm ENDP cnv2_mainloop_ryzen_asm ENDP
ALIGN 64 ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC cnv2_mainloop_bulldozer_asm PROC
INCLUDE cnv2_double_main_loop_sandybridge.inc INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0 ret 0
mov eax, 3735929054
cnv2_mainloop_bulldozer_asm ENDP
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0
mov eax, 3735929054
cnv2_double_mainloop_sandybridge_asm ENDP cnv2_double_mainloop_sandybridge_asm ENDP
_TEXT_CNV2_MAINLOOP ENDS _TEXT_CNV2_MAINLOOP ENDS

View file

@ -0,0 +1,31 @@
#define ALIGN(x) .align 64
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm:
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
ret 0
mov eax, 3735929054

View file

@ -1,21 +0,0 @@
#define ALIGN .align
.intel_syntax noprefix
.section .text
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_double_mainloop_sandybridge_asm
ALIGN 16
cnv2_mainloop_ivybridge_asm:
#include "../cnv2_main_loop_ivybridge.inc"
ret 0
ALIGN 16
cnv2_mainloop_ryzen_asm:
#include "../cnv2_main_loop_ryzen.inc"
ret 0
ALIGN 16
cnv2_double_mainloop_sandybridge_asm:
#include "../cnv2_double_main_loop_sandybridge.inc"
ret 0

View file

@ -127,7 +127,7 @@ static struct option const options[] = {
{ "cpu-affinity", 1, NULL, 1020 }, { "cpu-affinity", 1, NULL, 1020 },
{ "donate-level", 1, NULL, 1003 }, { "donate-level", 1, NULL, 1003 },
{ "help", 0, NULL, 'h' }, { "help", 0, NULL, 'h' },
{ "keepalive", 0, NULL ,'k' }, { "keepalive", 0, NULL, 'k' },
{ "max-cpu-usage", 1, NULL, 1004 }, { "max-cpu-usage", 1, NULL, 1004 },
{ "nicehash", 0, NULL, 1006 }, { "nicehash", 0, NULL, 1006 },
{ "no-color", 0, NULL, 1002 }, { "no-color", 0, NULL, 1002 },
@ -167,7 +167,8 @@ static const char *asm_names[] = {
"none", "none",
"auto", "auto",
"intel", "intel",
"ryzen" "ryzen",
"bulldozer"
}; };

View file

@ -65,6 +65,7 @@ enum Assembly {
ASM_AUTO, ASM_AUTO,
ASM_INTEL, ASM_INTEL,
ASM_RYZEN, ASM_RYZEN,
ASM_BULLDOZER,
ASM_MAX ASM_MAX
}; };