From bc26d2ede6f6376f2bae1013e95fb7340f0c586b Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 21 Jan 2019 00:32:24 +0700 Subject: [PATCH] #904 Add ASM implementation. --- src/crypto/CryptoNight_x86.h | 25 +++++++++++++++++++--- src/workers/CpuThread.cpp | 41 +++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 1ba624be..a8c2c309 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -572,11 +572,16 @@ extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; +extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; + template inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) @@ -608,6 +613,17 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ cn_half_mainloop_bulldozer_asm(ctx[0]); } } + else if (VARIANT == xmrig::VARIANT_TRTL) { + if (ASM == xmrig::ASM_INTEL) { + cn_trtl_mainloop_ivybridge_asm(ctx[0]); + } + else if (ASM == xmrig::ASM_RYZEN) { + cn_trtl_mainloop_ryzen_asm(ctx[0]); + } + else { + cn_trtl_mainloop_bulldozer_asm(ctx[0]); + } + } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); @@ -632,6 +648,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ else if (VARIANT == xmrig::VARIANT_HALF) { cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); } + else if (VARIANT == xmrig::VARIANT_TRTL) { + cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + } cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state)); diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index 552e809a..af55b0da 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -97,11 +97,16 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; + void xmrig::CpuThread::patchAsmVariants() { @@ -113,11 +118,21 @@ void xmrig::CpuThread::patchAsmVariants() cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); + cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); + cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); + patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + Mem::protectExecutableMemory(base, allocation_size); Mem::flushInstructionCache(base, allocation_size); } @@ -135,7 +150,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a assert(variant >= VARIANT_0 && variant < VARIANT_MAX); # ifndef XMRIG_NO_ASM - constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 8; + constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 12; # else constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX; # endif @@ -382,7 +397,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a cryptonight_single_hash_asm, cryptonight_single_hash_asm, cryptonight_single_hash_asm, - cryptonight_double_hash_asm + cryptonight_double_hash_asm, + + cryptonight_single_hash_asm, + cryptonight_single_hash_asm, + cryptonight_single_hash_asm, + cryptonight_double_hash_asm # endif }; @@ -587,6 +607,17 @@ size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant return offset + extra_offset + assembly - 2; } + if (av == AV_DOUBLE) { + return offset + 3 + extra_offset; + } + } + else if (algorithm == CRYPTONIGHT_PICO && variant == VARIANT_TRTL) { + extra_offset = 8; + + if (av == AV_SINGLE) { + return offset + extra_offset + assembly - 2; + } + if (av == AV_DOUBLE) { return offset + 3 + extra_offset; }