mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 20:19:23 +00:00
#904 Add ASM implementation.
This commit is contained in:
parent
36b01f136f
commit
bc26d2ede6
2 changed files with 58 additions and 8 deletions
|
@ -572,11 +572,16 @@ extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
|||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
||||
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
|
||||
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
|
||||
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
|
@ -608,6 +613,17 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
cn_half_mainloop_bulldozer_asm(ctx[0]);
|
||||
}
|
||||
}
|
||||
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
||||
if (ASM == xmrig::ASM_INTEL) {
|
||||
cn_trtl_mainloop_ivybridge_asm(ctx[0]);
|
||||
}
|
||||
else if (ASM == xmrig::ASM_RYZEN) {
|
||||
cn_trtl_mainloop_ryzen_asm(ctx[0]);
|
||||
}
|
||||
else {
|
||||
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
|
||||
}
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
|
@ -632,6 +648,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
else if (VARIANT == xmrig::VARIANT_HALF) {
|
||||
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||
}
|
||||
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
||||
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
|
|
|
@ -97,11 +97,16 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
|||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
||||
|
||||
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
|
||||
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
|
||||
void xmrig::CpuThread::patchAsmVariants()
|
||||
{
|
||||
|
@ -113,11 +118,21 @@ void xmrig::CpuThread::patchAsmVariants()
|
|||
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
|
||||
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000);
|
||||
|
||||
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
|
||||
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
|
||||
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
|
||||
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
|
||||
|
||||
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||
|
||||
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||
|
||||
Mem::protectExecutableMemory(base, allocation_size);
|
||||
Mem::flushInstructionCache(base, allocation_size);
|
||||
}
|
||||
|
@ -135,7 +150,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 8;
|
||||
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 12;
|
||||
# else
|
||||
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX;
|
||||
# endif
|
||||
|
@ -382,7 +397,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_RYZEN>,
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_BULLDOZER>,
|
||||
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>
|
||||
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
|
||||
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>,
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_RYZEN>,
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_BULLDOZER>,
|
||||
cryptonight_double_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>
|
||||
# endif
|
||||
};
|
||||
|
||||
|
@ -587,6 +607,17 @@ size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant
|
|||
return offset + extra_offset + assembly - 2;
|
||||
}
|
||||
|
||||
if (av == AV_DOUBLE) {
|
||||
return offset + 3 + extra_offset;
|
||||
}
|
||||
}
|
||||
else if (algorithm == CRYPTONIGHT_PICO && variant == VARIANT_TRTL) {
|
||||
extra_offset = 8;
|
||||
|
||||
if (av == AV_SINGLE) {
|
||||
return offset + extra_offset + assembly - 2;
|
||||
}
|
||||
|
||||
if (av == AV_DOUBLE) {
|
||||
return offset + 3 + extra_offset;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue