mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-03 17:40:13 +00:00
Use new style method to call ASM functions for cn/2 & added bulldozer ASM code.
This commit is contained in:
parent
7574bfab60
commit
ef2e8bed6e
16 changed files with 325 additions and 150 deletions
|
@ -44,6 +44,9 @@
|
||||||
#include "options.h"
|
#include "options.h"
|
||||||
|
|
||||||
|
|
||||||
|
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
@ -78,6 +81,7 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output,
|
||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -171,34 +175,20 @@ static bool self_test() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
#ifndef XMRIG_NO_ASM
|
||||||
|
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
||||||
{
|
{
|
||||||
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
|
||||||
if (assembly == ASM_AUTO) {
|
if (assembly == ASM_AUTO) {
|
||||||
assembly = cpu_info.assembly;
|
assembly = (enum Assembly) cpu_info.assembly;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (assembly == ASM_NONE) {
|
if (assembly == ASM_NONE) {
|
||||||
return index;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t offset = VARIANT_MAX * 4 * 2;
|
return asm_func_map[av][variant][assembly];
|
||||||
|
|
||||||
if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) {
|
|
||||||
if (av == AV_SINGLE) {
|
|
||||||
return offset + assembly - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (av == AV_DOUBLE) {
|
|
||||||
return offset + 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
return index;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
||||||
|
@ -207,10 +197,15 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
||||||
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
# ifndef XMRIG_NO_ASM
|
||||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = {
|
if (algorithm == ALGO_CRYPTONIGHT) {
|
||||||
# else
|
cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
|
||||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
if (fun) {
|
||||||
|
return fun;
|
||||||
|
}
|
||||||
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
||||||
cryptonight_av1_v0,
|
cryptonight_av1_v0,
|
||||||
cryptonight_av2_v0,
|
cryptonight_av2_v0,
|
||||||
cryptonight_av3_v0,
|
cryptonight_av3_v0,
|
||||||
|
@ -263,16 +258,11 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
# endif
|
|
||||||
# ifndef XMRIG_NO_ASM
|
|
||||||
cryptonight_single_hash_asm_intel,
|
|
||||||
cryptonight_single_hash_asm_ryzen,
|
|
||||||
cryptonight_double_hash_asm
|
|
||||||
# endif
|
# endif
|
||||||
};
|
};
|
||||||
|
|
||||||
# ifndef NDEBUG
|
# ifndef NDEBUG
|
||||||
const size_t index = fn_index(algorithm, av, variant, opt_assembly);
|
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
||||||
|
|
||||||
cn_hash_fun func = func_table[index];
|
cn_hash_fun func = func_table[index];
|
||||||
|
|
||||||
|
@ -281,7 +271,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
||||||
|
|
||||||
return func;
|
return func;
|
||||||
# else
|
# else
|
||||||
return func_table[fn_index(algorithm, av, variant, opt_assembly)];
|
return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,6 +280,16 @@ bool cryptonight_init(int av)
|
||||||
{
|
{
|
||||||
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
|
||||||
|
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
|
||||||
|
# endif
|
||||||
|
|
||||||
return self_test();
|
return self_test();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -196,6 +196,7 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res
|
||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
||||||
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
||||||
|
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
|
||||||
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
||||||
|
|
||||||
|
|
||||||
|
@ -225,6 +226,19 @@ void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t siz
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
cnv2_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
{
|
{
|
||||||
keccak(input, size, ctx[0]->state, 200);
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
|
|
@ -97,54 +97,5 @@ void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *rest
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
|
||||||
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
|
||||||
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
|
||||||
{
|
|
||||||
keccak(input, size, ctx[0]->state, 200);
|
|
||||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
|
||||||
|
|
||||||
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
|
||||||
|
|
||||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
|
||||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
|
||||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
|
||||||
{
|
|
||||||
keccak(input, size, ctx[0]->state, 200);
|
|
||||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
|
||||||
|
|
||||||
cnv2_mainloop_ryzen_asm(ctx[0]);
|
|
||||||
|
|
||||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
|
||||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
|
||||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
|
||||||
{
|
|
||||||
keccak(input, size, ctx[0]->state, 200);
|
|
||||||
keccak(input + size, size, ctx[1]->state, 200);
|
|
||||||
|
|
||||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
|
||||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
|
||||||
|
|
||||||
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
|
||||||
|
|
||||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
|
||||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
|
||||||
|
|
||||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
|
||||||
keccakf((uint64_t*) ctx[1]->state, 24);
|
|
||||||
|
|
||||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
|
||||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,28 +1,16 @@
|
||||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
enable_language(ASM)
|
||||||
enable_language(ASM_MASM)
|
|
||||||
|
|
||||||
if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
|
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.asm")
|
set(XMRIG_ASM_FILE "crypto/asm/win64/cn_main_loop.S")
|
||||||
else()
|
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.asm")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
|
|
||||||
else()
|
else()
|
||||||
enable_language(ASM)
|
set(XMRIG_ASM_FILE "crypto/asm/cn_main_loop.S")
|
||||||
|
|
||||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.S")
|
|
||||||
else()
|
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.S")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
||||||
|
|
||||||
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
|
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
|
||||||
set(XMRIG_ASM_SOURCES "")
|
set(XMRIG_ASM_SOURCES "")
|
||||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||||
|
|
23
cpu.c
23
cpu.c
|
@ -4,8 +4,9 @@
|
||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
*
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
@ -64,20 +65,20 @@ void cpu_init_common() {
|
||||||
|
|
||||||
if (data.flags[CPU_FEATURE_AES]) {
|
if (data.flags[CPU_FEATURE_AES]) {
|
||||||
cpu_info.flags |= CPU_FLAG_AES;
|
cpu_info.flags |= CPU_FLAG_AES;
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
|
||||||
if (data.vendor == VENDOR_AMD) {
|
|
||||||
cpu_info.assembly = ASM_RYZEN;
|
|
||||||
}
|
|
||||||
else if (data.vendor == VENDOR_INTEL) {
|
|
||||||
cpu_info.assembly = ASM_INTEL;
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.flags[CPU_FEATURE_BMI2]) {
|
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
if (data.vendor == VENDOR_AMD) {
|
||||||
|
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
|
||||||
|
}
|
||||||
|
else if (data.vendor == VENDOR_INTEL) {
|
||||||
|
cpu_info.assembly = ASM_INTEL;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
5
cpu.h
5
cpu.h
|
@ -4,8 +4,9 @@
|
||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
*
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|
|
@ -94,7 +94,7 @@
|
||||||
lea r9, QWORD PTR [rdx+r13]
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
movdqu xmm15, XMMWORD PTR [r9]
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_double_sandybridge:
|
main_loop_double_sandybridge:
|
||||||
movdqu xmm9, xmm15
|
movdqu xmm9, xmm15
|
||||||
mov eax, edx
|
mov eax, edx
|
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_main_loop_bulldozer:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm6, r8
|
||||||
|
pinsrq xmm6, r11, 1
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
|
||||||
|
mov edi, 1023
|
||||||
|
shl rdi, 52
|
||||||
|
|
||||||
|
movq r14, xmm5
|
||||||
|
pextrq rax, xmm5, 1
|
||||||
|
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
div r9
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
lea r15, [rax+rdx]
|
||||||
|
lea rax, [r14+r15]
|
||||||
|
shr rax, 12
|
||||||
|
add rax, rdi
|
||||||
|
movq xmm0, rax
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je sqrt_fixup_bulldozer
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne cnv2_main_loop_bulldozer
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp cnv2_main_loop_bulldozer_endp
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer:
|
||||||
|
movq r9, xmm5
|
||||||
|
add r9, r15
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp sqrt_fixup_bulldozer_ret
|
||||||
|
|
||||||
|
cnv2_main_loop_bulldozer_endp:
|
|
@ -50,7 +50,7 @@
|
||||||
punpcklqdq xmm5, xmm0
|
punpcklqdq xmm5, xmm0
|
||||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_ivybridge:
|
main_loop_ivybridge:
|
||||||
lea rdx, QWORD PTR [r10+rbx]
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
mov ecx, r10d
|
mov ecx, r10d
|
|
@ -45,7 +45,7 @@
|
||||||
movq xmm0, rcx
|
movq xmm0, rcx
|
||||||
punpcklqdq xmm4, xmm0
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_ryzen:
|
main_loop_ryzen:
|
||||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
movq xmm0, r11
|
movq xmm0, r11
|
|
@ -1,4 +1,8 @@
|
||||||
#define ALIGN .align
|
#ifdef __APPLE__
|
||||||
|
# define ALIGN(x) .align 6
|
||||||
|
#else
|
||||||
|
# define ALIGN(x) .align 64
|
||||||
|
#endif
|
||||||
.intel_syntax noprefix
|
.intel_syntax noprefix
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
# define FN_PREFIX(fn) _ ## fn
|
# define FN_PREFIX(fn) _ ## fn
|
||||||
|
@ -9,29 +13,42 @@
|
||||||
#endif
|
#endif
|
||||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
#include "cnv2_main_loop_ivybridge.inc"
|
#include "cn2/cnv2_main_loop_ivybridge.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
#include "cnv2_main_loop_ryzen.inc"
|
#include "cn2/cnv2_main_loop_ryzen.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
|
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn2/cnv2_main_loop_bulldozer.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
mov rdx, rsi
|
mov rdx, rsi
|
||||||
#include "cnv2_double_main_loop_sandybridge.inc"
|
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
|
@ -1,24 +1,35 @@
|
||||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
|
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN(64)
|
||||||
cnv2_mainloop_ivybridge_asm PROC
|
cnv2_mainloop_ivybridge_asm PROC
|
||||||
INCLUDE cnv2_main_loop_ivybridge.inc
|
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
cnv2_mainloop_ivybridge_asm ENDP
|
cnv2_mainloop_ivybridge_asm ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN(64)
|
||||||
cnv2_mainloop_ryzen_asm PROC
|
cnv2_mainloop_ryzen_asm PROC
|
||||||
INCLUDE cnv2_main_loop_ryzen.inc
|
INCLUDE cn2/cnv2_main_loop_ryzen.inc
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
cnv2_mainloop_ryzen_asm ENDP
|
cnv2_mainloop_ryzen_asm ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN(64)
|
||||||
cnv2_double_mainloop_sandybridge_asm PROC
|
cnv2_mainloop_bulldozer_asm PROC
|
||||||
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
cnv2_mainloop_bulldozer_asm ENDP
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
|
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
_TEXT_CNV2_MAINLOOP ENDS
|
_TEXT_CNV2_MAINLOOP ENDS
|
31
crypto/asm/win64/cn_main_loop.S
Normal file
31
crypto/asm/win64/cn_main_loop.S
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#define ALIGN(x) .align 64
|
||||||
|
.intel_syntax noprefix
|
||||||
|
.section .text
|
||||||
|
.global cnv2_mainloop_ivybridge_asm
|
||||||
|
.global cnv2_mainloop_ryzen_asm
|
||||||
|
.global cnv2_mainloop_bulldozer_asm
|
||||||
|
.global cnv2_double_mainloop_sandybridge_asm
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_ivybridge_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_ryzen_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_ryzen.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_bulldozer_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_bulldozer.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_double_mainloop_sandybridge_asm:
|
||||||
|
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
|
@ -1,21 +0,0 @@
|
||||||
#define ALIGN .align
|
|
||||||
.intel_syntax noprefix
|
|
||||||
.section .text
|
|
||||||
.global cnv2_mainloop_ivybridge_asm
|
|
||||||
.global cnv2_mainloop_ryzen_asm
|
|
||||||
.global cnv2_double_mainloop_sandybridge_asm
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_mainloop_ivybridge_asm:
|
|
||||||
#include "../cnv2_main_loop_ivybridge.inc"
|
|
||||||
ret 0
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_mainloop_ryzen_asm:
|
|
||||||
#include "../cnv2_main_loop_ryzen.inc"
|
|
||||||
ret 0
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_double_mainloop_sandybridge_asm:
|
|
||||||
#include "../cnv2_double_main_loop_sandybridge.inc"
|
|
||||||
ret 0
|
|
|
@ -127,7 +127,7 @@ static struct option const options[] = {
|
||||||
{ "cpu-affinity", 1, NULL, 1020 },
|
{ "cpu-affinity", 1, NULL, 1020 },
|
||||||
{ "donate-level", 1, NULL, 1003 },
|
{ "donate-level", 1, NULL, 1003 },
|
||||||
{ "help", 0, NULL, 'h' },
|
{ "help", 0, NULL, 'h' },
|
||||||
{ "keepalive", 0, NULL ,'k' },
|
{ "keepalive", 0, NULL, 'k' },
|
||||||
{ "max-cpu-usage", 1, NULL, 1004 },
|
{ "max-cpu-usage", 1, NULL, 1004 },
|
||||||
{ "nicehash", 0, NULL, 1006 },
|
{ "nicehash", 0, NULL, 1006 },
|
||||||
{ "no-color", 0, NULL, 1002 },
|
{ "no-color", 0, NULL, 1002 },
|
||||||
|
@ -167,7 +167,8 @@ static const char *asm_names[] = {
|
||||||
"none",
|
"none",
|
||||||
"auto",
|
"auto",
|
||||||
"intel",
|
"intel",
|
||||||
"ryzen"
|
"ryzen",
|
||||||
|
"bulldozer"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,7 @@ enum Assembly {
|
||||||
ASM_AUTO,
|
ASM_AUTO,
|
||||||
ASM_INTEL,
|
ASM_INTEL,
|
||||||
ASM_RYZEN,
|
ASM_RYZEN,
|
||||||
|
ASM_BULLDOZER,
|
||||||
ASM_MAX
|
ASM_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue