From 1bb8f77b527b02271c0645ced7063522322d628c Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 13 Mar 2019 22:00:44 +0100 Subject: [PATCH] Unified ASM functions signature --- src/Mem.cpp | 2 +- src/crypto/CryptoNight.h | 5 +- src/crypto/CryptoNight_x86.h | 70 +++++++++---------- .../asm/CryptonightR_soft_aes_template.inc | 2 + .../CryptonightR_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightR_template.inc | 5 ++ src/crypto/asm/CryptonightR_template_win.inc | 5 ++ .../asm/CryptonightWOW_soft_aes_template.inc | 2 + .../CryptonightWOW_soft_aes_template_win.inc | 2 + src/crypto/asm/CryptonightWOW_template.inc | 5 ++ .../asm/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../asm/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../asm/cn2/cnv2_main_loop_ivybridge.inc | 2 + src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc | 2 + .../asm/cn2/cnv2_rwz_double_main_loop.inc | 3 + src/crypto/asm/cn2/cnv2_rwz_main_loop.inc | 2 + src/crypto/asm/cn_main_loop.S | 2 - .../CryptonightR_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightR_template_win.inc | 5 ++ .../CryptonightWOW_soft_aes_template_win.inc | 2 + .../asm/win64/CryptonightWOW_template_win.inc | 5 ++ .../cn2/cnv2_double_main_loop_sandybridge.inc | 3 + .../win64/cn2/cnv2_main_loop_bulldozer.inc | 2 + .../win64/cn2/cnv2_main_loop_ivybridge.inc | 2 + .../asm/win64/cn2/cnv2_main_loop_ryzen.inc | 2 + .../win64/cn2/cnv2_rwz_double_main_loop.inc | 3 + .../asm/win64/cn2/cnv2_rwz_main_loop.inc | 2 + src/workers/CpuThread.cpp | 24 +++---- src/workers/CpuThread.h | 3 +- 30 files changed, 121 insertions(+), 55 deletions(-) diff --git a/src/Mem.cpp b/src/Mem.cpp index 4fa794d65..01a2157b3 100644 --- a/src/Mem.cpp +++ b/src/Mem.cpp @@ -53,7 +53,7 @@ MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); c->generated_code = reinterpret_cast(p); - c->generated_code_double = reinterpret_cast(p + 0x2000); + c->generated_code_double = reinterpret_cast(p + 0x2000); c->generated_code_data.variant = xmrig::VARIANT_MAX; c->generated_code_data.height = (uint64_t)(-1); diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index 91a4c7b71..b1ec2371d 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -36,8 +36,7 @@ #endif struct cryptonight_ctx; -typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx*) ABI_ATTRIBUTE; -typedef void(*cn_mainloop_double_fun_ms_abi)(cryptonight_ctx*, cryptonight_ctx*) ABI_ATTRIBUTE; +typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE; struct cryptonight_r_data { int variant; @@ -54,7 +53,7 @@ struct cryptonight_ctx { const uint32_t* saes_table; cn_mainloop_fun_ms_abi generated_code; - cn_mainloop_double_fun_ms_abi generated_code_double; + cn_mainloop_fun_ms_abi generated_code_double; cryptonight_r_data generated_code_data; cryptonight_r_data generated_code_double_data; }; diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 8b9ea7836..202b662a2 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -590,7 +590,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } ctx[0]->saes_table = (const uint32_t*)saes_table; - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } else { #endif @@ -750,32 +750,32 @@ inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_ #ifndef XMRIG_NO_ASM -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); -extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); @@ -824,64 +824,64 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ if (VARIANT == xmrig::VARIANT_2) { if (ASM == xmrig::ASM_INTEL) { - cnv2_mainloop_ivybridge_asm(ctx[0]); + cnv2_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cnv2_mainloop_ryzen_asm(ctx[0]); + cnv2_mainloop_ryzen_asm(ctx); } else { - cnv2_mainloop_bulldozer_asm(ctx[0]); + cnv2_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_HALF) { if (ASM == xmrig::ASM_INTEL) { - cn_half_mainloop_ivybridge_asm(ctx[0]); + cn_half_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_half_mainloop_ryzen_asm(ctx[0]); + cn_half_mainloop_ryzen_asm(ctx); } else { - cn_half_mainloop_bulldozer_asm(ctx[0]); + cn_half_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_TRTL) { if (ASM == xmrig::ASM_INTEL) { - cn_trtl_mainloop_ivybridge_asm(ctx[0]); + cn_trtl_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_trtl_mainloop_ryzen_asm(ctx[0]); + cn_trtl_mainloop_ryzen_asm(ctx); } else { - cn_trtl_mainloop_bulldozer_asm(ctx[0]); + cn_trtl_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_mainloop_asm(ctx[0]); + cnv2_rwz_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { if (ASM == xmrig::ASM_INTEL) { - cn_zls_mainloop_ivybridge_asm(ctx[0]); + cn_zls_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_zls_mainloop_ryzen_asm(ctx[0]); + cn_zls_mainloop_ryzen_asm(ctx); } else { - cn_zls_mainloop_bulldozer_asm(ctx[0]); + cn_zls_mainloop_bulldozer_asm(ctx); } } else if (VARIANT == xmrig::VARIANT_DOUBLE) { if (ASM == xmrig::ASM_INTEL) { - cn_double_mainloop_ivybridge_asm(ctx[0]); + cn_double_mainloop_ivybridge_asm(ctx); } else if (ASM == xmrig::ASM_RYZEN) { - cn_double_mainloop_ryzen_asm(ctx[0]); + cn_double_mainloop_ryzen_asm(ctx); } else { - cn_double_mainloop_bulldozer_asm(ctx[0]); + cn_double_mainloop_bulldozer_asm(ctx); } } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code(ctx[0]); + ctx[0]->generated_code(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); @@ -910,25 +910,25 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory)); if (VARIANT == xmrig::VARIANT_2) { - cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cnv2_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_HALF) { - cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_half_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_TRTL) { - cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_trtl_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]); + cnv2_rwz_double_mainloop_asm(ctx); } else if (VARIANT == xmrig::VARIANT_ZLS) { - cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_zls_double_mainloop_sandybridge_asm(ctx); } else if (VARIANT == xmrig::VARIANT_DOUBLE) { - cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + cn_double_double_mainloop_sandybridge_asm(ctx); } else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code_double(ctx[0], ctx[1]); + ctx[0]->generated_code_double(ctx); } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc index 40c7874d2..e9e1bb4f6 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightR_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc index d771f69cf..589192cab 100644 --- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc index 8ecab7247..61b6b9855 100644 --- a/src/crypto/asm/CryptonightR_template.inc +++ b/src/crypto/asm/CryptonightR_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightR_template_double_end) ALIGN(64) FN_PREFIX(CryptonightR_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ FN_PREFIX(CryptonightR_template_end): ALIGN(64) FN_PREFIX(CryptonightR_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc index a170f2d2b..1bb89eb15 100644 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ b/src/crypto/asm/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc index feea3949c..53b7016a0 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc @@ -6,6 +6,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) ALIGN(64) FN_PREFIX(CryptonightWOW_soft_aes_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc index 6ebad99f6..b3202b781 100644 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc index 7183a659f..82d455f6d 100644 --- a/src/crypto/asm/CryptonightWOW_template.inc +++ b/src/crypto/asm/CryptonightWOW_template.inc @@ -12,6 +12,8 @@ PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) ALIGN(64) FN_PREFIX(CryptonightWOW_template_part1): + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ FN_PREFIX(CryptonightWOW_template_end): ALIGN(64) FN_PREFIX(CryptonightWOW_template_double_part1): + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc index c5652e278..644c01f13 100644 --- a/src/crypto/asm/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index aa5101a83..1710cac70 100644 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc index c764501db..b881b6698 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 06f1d28be..863673de1 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc index 5dbf5917f..8ccc5e179 100644 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc index d2d871732..d9bfc9c18 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc index 021f787e3..b59c02d63 100644 --- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index 347f0e088..7aed6c20b 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -49,7 +49,6 @@ ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_double_main_loop_sandybridge.inc" add rsp, 48 ret 0 @@ -68,7 +67,6 @@ ALIGN(64) FN_PREFIX(cnv2_rwz_double_mainloop_asm): sub rsp, 48 mov rcx, rdi - mov rdx, rsi #include "cn2/cnv2_rwz_double_main_loop.inc" add rsp, 48 ret 0 diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc index d6d393a96..6898a6041 100644 --- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightR_soft_aes_template_end ALIGN(64) CryptonightR_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc index 60ee3441b..d24eedaa9 100644 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightR_template_double_end ALIGN(64) CryptonightR_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -183,6 +185,9 @@ CryptonightR_template_end: ALIGN(64) CryptonightR_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc index 682090367..1c73f77c1 100644 --- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc @@ -6,6 +6,8 @@ PUBLIC CryptonightWOW_soft_aes_template_end ALIGN(64) CryptonightWOW_soft_aes_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+8], rcx push rbx push rbp diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc index 9db2cf397..55c8c8df3 100644 --- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc +++ b/src/crypto/asm/win64/CryptonightWOW_template_win.inc @@ -12,6 +12,8 @@ PUBLIC CryptonightWOW_template_double_end ALIGN(64) CryptonightWOW_template_part1: + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi @@ -165,6 +167,9 @@ CryptonightWOW_template_end: ALIGN(64) CryptonightWOW_template_double_part1: + mov rdx, [rcx+8] + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc index 05af93934..85077a201 100644 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc index 03a36f48d..f17017a07 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc index 77e28f801..a12ac35c6 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc index 7e5c127f8..044235d84 100644 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+16], rbx mov QWORD PTR [rsp+24], rbp mov QWORD PTR [rsp+32], rsi diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc index 69ca8793c..97fb691b5 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc @@ -1,3 +1,6 @@ + mov rdx, [rcx+8] + mov rcx, [rcx] + mov rax, rsp push rbx push rbp diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc index 993177305..e2b7a5fcd 100644 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc @@ -1,3 +1,5 @@ + mov rcx, [rcx] + mov QWORD PTR [rsp+24], rbx push rbp push rsi diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index c98b730ad..6548b4617 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -90,31 +90,31 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma } -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); +extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; void xmrig::CpuThread::patchAsmVariants() @@ -125,22 +125,22 @@ void xmrig::CpuThread::patchAsmVariants() cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); - cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); + cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); - cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); + cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h index 54e98cde5..05d4a0661 100644 --- a/src/workers/CpuThread.h +++ b/src/workers/CpuThread.h @@ -61,8 +61,7 @@ public: CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly); typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height); - typedef void (*cn_mainloop_fun)(cryptonight_ctx *ctx); - typedef void (*cn_mainloop_double_fun)(cryptonight_ctx *ctx1, cryptonight_ctx *ctx2); + typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx); # ifndef XMRIG_NO_ASM static void patchAsmVariants();