#904 Add ASM implementation.

This commit is contained in:
XMRig 2019-01-21 00:32:24 +07:00
parent 36b01f136f
commit bc26d2ede6
2 changed files with 58 additions and 8 deletions

View file

@ -572,11 +572,16 @@ extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm; extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM> template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
@ -608,6 +613,17 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cn_half_mainloop_bulldozer_asm(ctx[0]); cn_half_mainloop_bulldozer_asm(ctx[0]);
} }
} }
else if (VARIANT == xmrig::VARIANT_TRTL) {
if (ASM == xmrig::ASM_INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx[0]);
}
else if (ASM == xmrig::ASM_RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx[0]);
}
else {
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
}
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24); xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
@ -632,6 +648,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
else if (VARIANT == xmrig::VARIANT_HALF) { else if (VARIANT == xmrig::VARIANT_HALF) {
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
} }
else if (VARIANT == xmrig::VARIANT_TRTL) {
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
}
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state)); cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));

View file

@ -97,11 +97,16 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
void xmrig::CpuThread::patchAsmVariants() void xmrig::CpuThread::patchAsmVariants()
{ {
@ -113,11 +118,21 @@ void xmrig::CpuThread::patchAsmVariants()
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000); cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000); cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000);
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
Mem::protectExecutableMemory(base, allocation_size); Mem::protectExecutableMemory(base, allocation_size);
Mem::flushInstructionCache(base, allocation_size); Mem::flushInstructionCache(base, allocation_size);
} }
@ -135,7 +150,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
assert(variant >= VARIANT_0 && variant < VARIANT_MAX); assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
# ifndef XMRIG_NO_ASM # ifndef XMRIG_NO_ASM
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 8; constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 12;
# else # else
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX; constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX;
# endif # endif
@ -382,7 +397,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>, cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_RYZEN>, cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_RYZEN>,
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_BULLDOZER>, cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_BULLDOZER>,
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL> cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>,
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_RYZEN>,
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_BULLDOZER>,
cryptonight_double_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>
# endif # endif
}; };
@ -587,6 +607,17 @@ size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant
return offset + extra_offset + assembly - 2; return offset + extra_offset + assembly - 2;
} }
if (av == AV_DOUBLE) {
return offset + 3 + extra_offset;
}
}
else if (algorithm == CRYPTONIGHT_PICO && variant == VARIANT_TRTL) {
extra_offset = 8;
if (av == AV_SINGLE) {
return offset + extra_offset + assembly - 2;
}
if (av == AV_DOUBLE) { if (av == AV_DOUBLE) {
return offset + 3 + extra_offset; return offset + 3 + extra_offset;
} }