mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 18:11:05 +00:00
#904 Add ASM implementation.
This commit is contained in:
parent
36b01f136f
commit
bc26d2ede6
2 changed files with 58 additions and 8 deletions
|
@ -572,11 +572,16 @@ extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
||||||
|
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
|
extern xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm;
|
||||||
|
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
|
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
|
||||||
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
|
@ -608,6 +613,17 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||||
cn_half_mainloop_bulldozer_asm(ctx[0]);
|
cn_half_mainloop_bulldozer_asm(ctx[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
||||||
|
if (ASM == xmrig::ASM_INTEL) {
|
||||||
|
cn_trtl_mainloop_ivybridge_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else if (ASM == xmrig::ASM_RYZEN) {
|
||||||
|
cn_trtl_mainloop_ryzen_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||||
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||||
|
@ -632,6 +648,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||||
else if (VARIANT == xmrig::VARIANT_HALF) {
|
else if (VARIANT == xmrig::VARIANT_HALF) {
|
||||||
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
cn_half_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||||
}
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
||||||
|
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||||
|
}
|
||||||
|
|
||||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||||
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||||
|
|
|
@ -97,11 +97,16 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
||||||
|
|
||||||
|
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
|
|
||||||
void xmrig::CpuThread::patchAsmVariants()
|
void xmrig::CpuThread::patchAsmVariants()
|
||||||
{
|
{
|
||||||
|
@ -113,11 +118,21 @@ void xmrig::CpuThread::patchAsmVariants()
|
||||||
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
|
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
|
||||||
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000);
|
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x3000);
|
||||||
|
|
||||||
|
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
|
||||||
|
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
|
||||||
|
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
|
||||||
|
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
|
||||||
|
|
||||||
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
|
||||||
|
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
|
||||||
Mem::protectExecutableMemory(base, allocation_size);
|
Mem::protectExecutableMemory(base, allocation_size);
|
||||||
Mem::flushInstructionCache(base, allocation_size);
|
Mem::flushInstructionCache(base, allocation_size);
|
||||||
}
|
}
|
||||||
|
@ -135,7 +150,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
# ifndef XMRIG_NO_ASM
|
||||||
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 8;
|
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX + 12;
|
||||||
# else
|
# else
|
||||||
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX;
|
constexpr const size_t count = VARIANT_MAX * 10 * CRYPTONIGHT_MAX;
|
||||||
# endif
|
# endif
|
||||||
|
@ -382,7 +397,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
|
||||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_RYZEN>,
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_RYZEN>,
|
||||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_BULLDOZER>,
|
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_BULLDOZER>,
|
||||||
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>
|
cryptonight_double_hash_asm<CRYPTONIGHT, VARIANT_HALF, ASM_INTEL>,
|
||||||
|
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>,
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_RYZEN>,
|
||||||
|
cryptonight_single_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_BULLDOZER>,
|
||||||
|
cryptonight_double_hash_asm<CRYPTONIGHT_PICO, VARIANT_TRTL, ASM_INTEL>
|
||||||
# endif
|
# endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -587,6 +607,17 @@ size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant
|
||||||
return offset + extra_offset + assembly - 2;
|
return offset + extra_offset + assembly - 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (av == AV_DOUBLE) {
|
||||||
|
return offset + 3 + extra_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (algorithm == CRYPTONIGHT_PICO && variant == VARIANT_TRTL) {
|
||||||
|
extra_offset = 8;
|
||||||
|
|
||||||
|
if (av == AV_SINGLE) {
|
||||||
|
return offset + extra_offset + assembly - 2;
|
||||||
|
}
|
||||||
|
|
||||||
if (av == AV_DOUBLE) {
|
if (av == AV_DOUBLE) {
|
||||||
return offset + 3 + extra_offset;
|
return offset + 3 + extra_offset;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue