diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d5f5cb7c..15826ff84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# v5.5.2 +- [#1500](https://github.com/xmrig/xmrig/pull/1500) Removed unnecessary code from RandomX JIT compiler. +- [#1502](https://github.com/xmrig/xmrig/pull/1502) Optimizations for AMD Bulldozer. +- [#1508](https://github.com/xmrig/xmrig/pull/1508) Added support for BMI2 instructions. +- [#1510](https://github.com/xmrig/xmrig/pull/1510) Optimized `CFROUND` instruction for RandomX. +- [#1520](https://github.com/xmrig/xmrig/pull/1520) Fixed thread affinity. + # v5.5.1 - [#1469](https://github.com/xmrig/xmrig/issues/1469) Fixed build with gcc 4.8. - [#1473](https://github.com/xmrig/xmrig/pull/1473) Added RandomX auto-config for mobile Ryzen APUs. diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index f7e9fcfaa..4aad69447 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -62,6 +62,7 @@ public: virtual Assembly::Id assembly() const = 0; virtual bool hasAES() const = 0; virtual bool hasAVX2() const = 0; + virtual bool hasBMI2() const = 0; virtual bool hasOneGbPages() const = 0; virtual const char *backend() const = 0; virtual const char *brand() const = 0; diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp index 20496ff13..837e1b000 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -153,6 +153,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : } m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE]; + m_bmi2 = data.flags[CPU_FEATURE_BMI2]; } diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h index beafa57ca..30ad3c584 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.h +++ b/src/backend/cpu/platform/AdvancedCpuInfo.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -43,6 +43,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasBMI2() const override { return m_bmi2; } inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *backend() const override { return m_backend; } inline const char *brand() const override { return m_brand; } @@ -59,6 +60,7 @@ private: Assembly m_assembly; bool m_aes = false; bool m_avx2 = false; + bool m_bmi2 = false; bool m_L2_exclusive = false; char m_backend[32]{}; char m_brand[64 + 5]{}; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index 1dfede21c..fa5a43fa7 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,10 @@ # define bit_AVX2 (1 << 5) #endif +#ifndef bit_BMI2 +# define bit_BMI2 (1 << 8) +#endif + #ifndef bit_PDPE1GB # define bit_PDPE1GB (1 << 26) #endif @@ -141,6 +145,12 @@ static inline bool has_avx2() } +static inline bool has_bmi2() +{ + return has_feature(EXTENDED_FEATURES, EBX_Reg, bit_BMI2); +} + + static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB); @@ -154,6 +164,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_threads(std::thread::hardware_concurrency()), m_aes(has_aes_ni()), m_avx2(has_avx2()), + m_bmi2(has_bmi2()), m_pdpe1gb(has_pdpe1gb()) { cpu_brand_string(m_brand); diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index b1139920d..b553e575e 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -44,6 +44,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasBMI2() const override { return m_bmi2; } inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *brand() const override { return m_brand; } inline MsrMod msrMod() const override { return m_msrMod; } @@ -63,6 +64,7 @@ private: Assembly m_assembly = Assembly::NONE; bool m_aes = false; const bool m_avx2 = false; + const bool m_bmi2 = false; const bool m_pdpe1gb = false; MsrMod m_msrMod = MSR_MOD_NONE; Vendor m_vendor = VENDOR_UNKNOWN; diff --git a/src/base/kernel/Platform.h b/src/base/kernel/Platform.h index 3f026f8bd..341b4dfd2 100644 --- a/src/base/kernel/Platform.h +++ b/src/base/kernel/Platform.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/base/kernel/Platform_hwloc.cpp b/src/base/kernel/Platform_hwloc.cpp index f4b46ba1e..ca5bfed2c 100644 --- a/src/base/kernel/Platform_hwloc.cpp +++ b/src/base/kernel/Platform_hwloc.cpp @@ -6,8 +6,8 @@ * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include +#include bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) @@ -42,8 +43,11 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) } if (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) >= 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); return true; } - return hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0; + const bool result = (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return result; } diff --git a/src/base/kernel/Platform_mac.cpp b/src/base/kernel/Platform_mac.cpp index 2cd3a8314..aab006759 100644 --- a/src/base/kernel/Platform_mac.cpp +++ b/src/base/kernel/Platform_mac.cpp @@ -4,8 +4,8 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2016-2017 XMRig - * + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include #include #include +#include #include "base/kernel/Platform.h" @@ -67,7 +68,9 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) thread_affinity_policy_data_t policy = { static_cast(cpu_id) }; mach_thread = pthread_mach_thread_np(pthread_self()); - return thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1) == KERN_SUCCESS; + const bool result = (thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1) == KERN_SUCCESS); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return result; } #endif diff --git a/src/base/kernel/Platform_unix.cpp b/src/base/kernel/Platform_unix.cpp index f449995c6..ba55ed77c 100644 --- a/src/base/kernel/Platform_unix.cpp +++ b/src/base/kernel/Platform_unix.cpp @@ -4,8 +4,8 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,6 +37,7 @@ #include #include #include +#include #include "base/kernel/Platform.h" @@ -92,10 +93,13 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) CPU_SET(cpu_id, &mn); # ifndef __ANDROID__ - return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mn) == 0; + const bool result = (pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mn) == 0); # else - return sched_setaffinity(gettid(), sizeof(cpu_set_t), &mn) == 0; + const bool result = (sched_setaffinity(gettid(), sizeof(cpu_set_t), &mn) == 0); # endif + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return result; } #endif diff --git a/src/base/kernel/Platform_win.cpp b/src/base/kernel/Platform_win.cpp index 064c8352f..ce7e60e21 100644 --- a/src/base/kernel/Platform_win.cpp +++ b/src/base/kernel/Platform_win.cpp @@ -4,9 +4,8 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -98,7 +97,9 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id) LOG_ERR("Unable to set affinity. Windows supports only affinity up to 63."); } - return SetThreadAffinityMask(GetCurrentThread(), 1ULL << cpu_id) != 0; + const bool result = (SetThreadAffinityMask(GetCurrentThread(), 1ULL << cpu_id) != 0); + Sleep(1); + return result; } #endif diff --git a/src/crypto/randomx/asm/program_epilogue_store.inc b/src/crypto/randomx/asm/program_epilogue_store.inc index 6fa1a1fcf..82067d191 100644 --- a/src/crypto/randomx/asm/program_epilogue_store.inc +++ b/src/crypto/randomx/asm/program_epilogue_store.inc @@ -1,5 +1,5 @@ ;# save VM register values - add rsp, 24 + add rsp, 40 pop rcx mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 diff --git a/src/crypto/randomx/asm/program_loop_load.inc b/src/crypto/randomx/asm/program_loop_load.inc index 1c53e8314..5d8a84918 100644 --- a/src/crypto/randomx/asm/program_loop_load.inc +++ b/src/crypto/randomx/asm/program_loop_load.inc @@ -1,5 +1,5 @@ lea rcx, [rsi+rax] - mov [rsp+8], rcx + mov [rsp+16], rcx xor r8, qword ptr [rcx+0] xor r9, qword ptr [rcx+8] xor r10, qword ptr [rcx+16] @@ -9,7 +9,7 @@ xor r14, qword ptr [rcx+48] xor r15, qword ptr [rcx+56] lea rcx, [rsi+rdx] - mov [rsp+16], rcx + mov [rsp+24], rcx cvtdq2pd xmm0, qword ptr [rcx+0] cvtdq2pd xmm1, qword ptr [rcx+8] cvtdq2pd xmm2, qword ptr [rcx+16] @@ -18,11 +18,11 @@ cvtdq2pd xmm5, qword ptr [rcx+40] cvtdq2pd xmm6, qword ptr [rcx+48] cvtdq2pd xmm7, qword ptr [rcx+56] - andps xmm4, xmm13 - andps xmm5, xmm13 - andps xmm6, xmm13 - andps xmm7, xmm13 - orps xmm4, xmm14 - orps xmm5, xmm14 - orps xmm6, xmm14 - orps xmm7, xmm14 + andpd xmm4, xmm13 + andpd xmm5, xmm13 + andpd xmm6, xmm13 + andpd xmm7, xmm13 + orpd xmm4, xmm14 + orpd xmm5, xmm14 + orpd xmm6, xmm14 + orpd xmm7, xmm14 diff --git a/src/crypto/randomx/asm/program_loop_load_xop.inc b/src/crypto/randomx/asm/program_loop_load_xop.inc new file mode 100644 index 000000000..5ea2386e9 --- /dev/null +++ b/src/crypto/randomx/asm/program_loop_load_xop.inc @@ -0,0 +1,24 @@ + lea rcx, [rsi+rax] + mov [rsp+8], rcx + xor r8, qword ptr [rcx+0] + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] + lea rcx, [rsi+rdx] + mov [rsp+16], rcx + cvtdq2pd xmm0, qword ptr [rcx+0] + cvtdq2pd xmm1, qword ptr [rcx+8] + cvtdq2pd xmm2, qword ptr [rcx+16] + cvtdq2pd xmm3, qword ptr [rcx+24] + cvtdq2pd xmm4, qword ptr [rcx+32] + cvtdq2pd xmm5, qword ptr [rcx+40] + cvtdq2pd xmm6, qword ptr [rcx+48] + cvtdq2pd xmm7, qword ptr [rcx+56] + vpcmov xmm4, xmm4, xmm14, xmm13 + vpcmov xmm5, xmm5, xmm14, xmm13 + vpcmov xmm6, xmm6, xmm14, xmm13 + vpcmov xmm7, xmm7, xmm14, xmm13 diff --git a/src/crypto/randomx/asm/program_loop_store.inc b/src/crypto/randomx/asm/program_loop_store.inc index f778f134f..f579bb0c5 100644 --- a/src/crypto/randomx/asm/program_loop_store.inc +++ b/src/crypto/randomx/asm/program_loop_store.inc @@ -1,4 +1,4 @@ - mov rcx, [rsp+16] + mov rcx, [rsp+24] mov qword ptr [rcx+0], r8 mov qword ptr [rcx+8], r9 mov qword ptr [rcx+16], r10 @@ -7,7 +7,7 @@ mov qword ptr [rcx+40], r13 mov qword ptr [rcx+48], r14 mov qword ptr [rcx+56], r15 - mov rcx, [rsp+8] + mov rcx, [rsp+16] xorpd xmm0, xmm4 xorpd xmm1, xmm5 xorpd xmm2, xmm6 diff --git a/src/crypto/randomx/asm/program_xmm_constants.inc b/src/crypto/randomx/asm/program_xmm_constants.inc index 296237a45..cb4b5430d 100644 --- a/src/crypto/randomx/asm/program_xmm_constants.inc +++ b/src/crypto/randomx/asm/program_xmm_constants.inc @@ -1,5 +1,5 @@ mantissaMask: - db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0 + db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0 exp240: db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 scaleMask: diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index d0b0114c4..34f98cb96 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -89,6 +89,7 @@ namespace randomx { const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; + const uint8_t* codeLoopLoadXOP = (uint8_t*)&randomx_program_loop_load_xop; const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; @@ -104,7 +105,8 @@ namespace randomx { const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad; const int32_t prologueSize = codeLoopBegin - codePrologue; - const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; + const int32_t loopLoadSize = codeLoopLoadXOP - codeLoopLoad; + const int32_t loopLoadXOPSize = codeProgamStart - codeLoopLoadXOP; const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; @@ -184,6 +186,7 @@ namespace randomx { static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; + static const uint8_t REX_VPCMOV_XMM12[] = { 0x8F, 0x48, 0x18, 0xA2, 0xE6, 0xD0 }; static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; static const uint8_t CALL = 0xe8; @@ -295,11 +298,23 @@ namespace randomx { cpuid(1, info); hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0); + cpuid(0x80000001, info); + hasXOP = ((info[2] & (1 << 11)) != 0); + allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2); // Shift code base address to improve caching - all threads will use different L2/L3 cache sets code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize); memcpy(code, codePrologue, prologueSize); + if (hasXOP) { + memcpy(code + prologueSize, codeLoopLoadXOP, loopLoadXOPSize); + } + else { + memcpy(code + prologueSize, codeLoopLoad, loopLoadSize); + } memcpy(code + epilogueOffset, codeEpilogue, epilogueSize); + + codePosFirst = prologueSize + (hasXOP ? loopLoadXOPSize : loopLoadSize); + # ifdef XMRIG_FIX_RYZEN mainLoopBounds.first = code + prologueSize; mainLoopBounds.second = code + epilogueOffset; @@ -317,7 +332,7 @@ namespace randomx { uint8_t* p; uint32_t n; - if (flags & RANDOMX_FLAG_RYZEN) { + if (flags & RANDOMX_FLAG_AMD) { p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; } @@ -385,7 +400,7 @@ namespace randomx { *(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; *(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; if (hasAVX) { - uint32_t* p = (uint32_t*)(code + codePos + 32); + uint32_t* p = (uint32_t*)(code + codePos + 67); *p = (*p & 0xFF000000U) | 0x0077F8C5U; } @@ -393,10 +408,8 @@ namespace randomx { xmrig::Rx::setMainLoopBounds(mainLoopBounds); # endif - codePos = prologueSize; - memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); - memcpy(code + codePos, codeLoopLoad, loopLoadSize); - codePos += loopLoadSize; + memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask)); + codePos = codePosFirst; //mark all registers as used uint64_t* r = (uint64_t*)registerUsage; @@ -708,14 +721,31 @@ namespace randomx { uint8_t* const p = code; int pos = codePos; + const uint32_t dst = instr.dst; + emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); + emitByte(0xc0 + dst, p, pos); emit(REX_MUL_R, p, pos); emitByte(0xe0 + instr.src, p, pos); emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); + emitByte(0xc2 + 8 * dst, p, pos); - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; + codePos = pos; + } + + void JitCompilerX86::h_IMULH_R_BMI2(const Instruction& instr) { + uint8_t* const p = code; + int pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); + *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); + pos += 8; + + registerUsage[dst] = pos; codePos = pos; } @@ -743,6 +773,29 @@ namespace randomx { codePos = pos; } + void JitCompilerX86::h_IMULH_M_BMI2(const Instruction& instr) { + uint8_t* const p = code; + int pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, p, pos); + *(uint32_t*)(p + pos) = static_cast(0xC4D08B49 + (dst << 16)); + *(uint64_t*)(p + pos + 4) = 0x0E04F6FB62ULL + (dst << 27); + pos += 9; + } + else { + *(uint64_t*)(p + pos) = 0x86F6FB62C4D08B49ULL + (dst << 16) + (dst << 59); + *(uint32_t*)(p + pos + 8) = instr.getImm32() & ScratchpadL3Mask; + pos += 12; + } + + registerUsage[dst] = pos; + codePos = pos; + } + void JitCompilerX86::h_ISMULH_R(const Instruction& instr) { uint8_t* const p = code; int pos = codePos; @@ -992,7 +1045,12 @@ namespace randomx { const uint32_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, p, pos); emit(REX_CVTDQ2PD_XMM12, p, pos); - emit(REX_ANDPS_XMM12, p, pos); + if (hasXOP) { + emit(REX_VPCMOV_XMM12, p, pos); + } + else { + emit(REX_ANDPS_XMM12, p, pos); + } emit(REX_DIVPD, p, pos); emitByte(0xe4 + 8 * dst, p, pos); @@ -1014,18 +1072,21 @@ namespace randomx { uint8_t* const p = code; int pos = codePos; - emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.src, p, pos); - int rotate = (13 - (instr.getImm32() & 63)) & 63; - if (rotate != 0) { - emit(ROL_RAX, p, pos); - emitByte(rotate, p, pos); - } - if (vm_flags & RANDOMX_FLAG_RYZEN) { - emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos); + const uint32_t src = instr.src; + + *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); + const int rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; + *(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24); + + if (vm_flags & RANDOMX_FLAG_AMD) { + *(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL; + *(uint8_t*)(p + pos + 15) = 8; + *(uint64_t*)(p + pos + 16) = 0x202444890414AE0FULL; + pos += 24; } else { - emit(AND_OR_MOV_LDMXCSR, p, pos); + *(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL; + pos += 14; } codePos = pos; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 02b1a80fc..a194f1afb 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -73,10 +73,12 @@ namespace randomx { std::pair mainLoopBounds; # endif int32_t codePos; + int32_t codePosFirst; uint32_t vm_flags; static bool BranchesWithin32B; bool hasAVX; + bool hasXOP; static void applyTweaks(); void generateProgramPrologue(Program&, ProgramConfiguration&); @@ -121,7 +123,9 @@ namespace randomx { void h_IMUL_R(const Instruction&); void h_IMUL_M(const Instruction&); void h_IMULH_R(const Instruction&); + void h_IMULH_R_BMI2(const Instruction&); void h_IMULH_M(const Instruction&); + void h_IMULH_M_BMI2(const Instruction&); void h_ISMULH_R(const Instruction&); void h_ISMULH_M(const Instruction&); void h_IMUL_RCP(const Instruction&); diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index e5709cdc2..9f3a5bf18 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -43,6 +43,7 @@ .global DECL(randomx_program_prologue_first_load) .global DECL(randomx_program_loop_begin) .global DECL(randomx_program_loop_load) +.global DECL(randomx_program_loop_load_xop) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) .global DECL(randomx_program_read_dataset_ryzen) @@ -93,8 +94,12 @@ DECL(randomx_program_prologue_first_load): and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK - sub rsp, 24 - stmxcsr dword ptr [rsp] + sub rsp, 40 + mov dword ptr [rsp], 0x9FC0 + mov dword ptr [rsp+4], 0xBFC0 + mov dword ptr [rsp+8], 0xDFC0 + mov dword ptr [rsp+12], 0xFFC0 + mov dword ptr [rsp+32], -1 nop nop nop @@ -110,6 +115,9 @@ DECL(randomx_program_loop_begin): DECL(randomx_program_loop_load): #include "asm/program_loop_load.inc" +DECL(randomx_program_loop_load_xop): + #include "asm/program_loop_load_xop.inc" + DECL(randomx_program_start): nop diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 4b3542e3a..e36e5aafa 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -34,6 +34,7 @@ PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue_first_load PUBLIC randomx_program_loop_begin PUBLIC randomx_program_loop_load +PUBLIC randomx_program_loop_load_xop PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset PUBLIC randomx_program_read_dataset_ryzen @@ -81,8 +82,12 @@ randomx_program_prologue_first_load PROC and eax, RANDOMX_SCRATCHPAD_MASK ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK - sub rsp, 24 - stmxcsr dword ptr [rsp] + sub rsp, 40 + mov dword ptr [rsp], 9FC0h + mov dword ptr [rsp+4], 0BFC0h + mov dword ptr [rsp+8], 0DFC0h + mov dword ptr [rsp+12], 0FFC0h + mov dword ptr [rsp+32], -1 nop nop nop @@ -101,6 +106,10 @@ randomx_program_loop_load PROC include asm/program_loop_load.inc randomx_program_loop_load ENDP +randomx_program_loop_load_xop PROC + include asm/program_loop_load_xop.inc +randomx_program_loop_load_xop ENDP + randomx_program_start PROC nop randomx_program_start ENDP diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index b0a7c5acb..6523f9c47 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -35,6 +35,7 @@ extern "C" { void randomx_program_prologue_first_load(); void randomx_program_loop_begin(); void randomx_program_loop_load(); + void randomx_program_loop_load_xop(); void randomx_program_start(); void randomx_program_read_dataset(); void randomx_program_read_dataset_ryzen(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index ed9be8c91..88f7b190a 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/jit_compiler_a64_static.hpp" #endif +#include "backend/cpu/Cpu.h" + #include RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() @@ -235,14 +237,29 @@ void RandomX_ConfigurationBase::Apply() CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } +#define INST_HANDLE2(x, func_name, prev) \ + CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ + for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); } + INST_HANDLE(IADD_RS, NULL); INST_HANDLE(IADD_M, IADD_RS); INST_HANDLE(ISUB_R, IADD_M); INST_HANDLE(ISUB_M, ISUB_R); INST_HANDLE(IMUL_R, ISUB_M); INST_HANDLE(IMUL_M, IMUL_R); - INST_HANDLE(IMULH_R, IMUL_M); - INST_HANDLE(IMULH_M, IMULH_R); + +#if defined(_M_X64) || defined(__x86_64__) + if (xmrig::Cpu::info()->hasBMI2()) { + INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M); + INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R); + } + else +#endif + { + INST_HANDLE(IMULH_R, IMUL_M); + INST_HANDLE(IMULH_M, IMULH_R); + } + INST_HANDLE(ISMULH_R, IMULH_M); INST_HANDLE(ISMULH_M, ISMULH_R); INST_HANDLE(IMUL_RCP, ISMULH_M); diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 793e6e1b2..787491ebc 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -49,7 +49,7 @@ enum randomx_flags { RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_JIT = 8, RANDOMX_FLAG_1GB_PAGES = 16, - RANDOMX_FLAG_RYZEN = 64, + RANDOMX_FLAG_AMD = 64, }; diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index 2ec568f97..01d0616e9 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index 225c019e0..e42b3a711 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index 486d83c2e..654775373 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -7,8 +7,8 @@ * Copyright 2017-2019 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2018-2019 tevador - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,8 +45,12 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig:: m_flags |= RANDOMX_FLAG_JIT; } - if ((assembly == Assembly::RYZEN) || ((assembly == Assembly::AUTO) && (Cpu::info()->assembly() == Assembly::RYZEN))) { - m_flags |= RANDOMX_FLAG_RYZEN; + if (assembly == Assembly::AUTO) { + assembly = Cpu::info()->assembly(); + } + + if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) { + m_flags |= RANDOMX_FLAG_AMD; } m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad); diff --git a/src/crypto/rx/RxVm.h b/src/crypto/rx/RxVm.h index 7cddf93bd..79c3b9d66 100644 --- a/src/crypto/rx/RxVm.h +++ b/src/crypto/rx/RxVm.h @@ -7,8 +7,8 @@ * Copyright 2017-2019 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2018-2019 tevador - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/crypto/rx/Rx_linux.cpp b/src/crypto/rx/Rx_linux.cpp index c98510bda..2e1fc7b92 100644 --- a/src/crypto/rx/Rx_linux.cpp +++ b/src/crypto/rx/Rx_linux.cpp @@ -7,10 +7,10 @@ * Copyright 2017-2019 XMR-Stak , * Copyright 2018 Lee Clagett * Copyright 2018-2019 tevador - * Copyright 2018-2019 SChernykh * Copyright 2000 Transmeta Corporation * Copyright 2004-2008 H. Peter Anvin - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -91,13 +91,19 @@ static MsrItem rdmsr(uint32_t reg) } +static uint64_t get_masked_value(uint64_t old_value, uint64_t new_value, uint64_t mask) +{ + return (new_value & mask) | (old_value & ~mask); +} + + static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask) { // If a bit in mask is set to 1, use new value, otherwise use old value if (mask != MsrItem::kNoMask) { uint64_t old_value; if (rdmsr_on_cpu(reg, cpu, old_value)) { - value = (value & mask) | (old_value & ~mask); + value = get_masked_value(old_value, value, mask); } } @@ -162,7 +168,7 @@ static bool wrmsr(const MsrItems &preset, bool save) if (save) { for (const auto &i : preset) { auto item = rdmsr(i.reg()); - LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), i.value()); + LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), get_masked_value(item.value(), i.value(), i.mask())); if (item.isValid()) { savedState.emplace_back(item); diff --git a/src/crypto/rx/Rx_win.cpp b/src/crypto/rx/Rx_win.cpp index acfb7f01a..ee8aceb43 100644 --- a/src/crypto/rx/Rx_win.cpp +++ b/src/crypto/rx/Rx_win.cpp @@ -10,8 +10,8 @@ * Copyright 2000 Transmeta Corporation * Copyright 2004-2008 H. Peter Anvin * Copyright 2007-2009 hiyohiyo , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -217,6 +217,12 @@ static MsrItem rdmsr(HANDLE driver, uint32_t reg) } +static uint64_t get_masked_value(uint64_t old_value, uint64_t new_value, uint64_t mask) +{ + return (new_value & mask) | (old_value & ~mask); +} + + static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask) { struct { @@ -230,7 +236,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask) if (mask != MsrItem::kNoMask) { uint64_t old_value; if (rdmsr(driver, reg, old_value)) { - value = (value & mask) | (old_value & ~mask); + value = get_masked_value(old_value, value, mask); } } @@ -268,7 +274,7 @@ static bool wrmsr(const MsrItems &preset, bool save) if (save) { for (const auto &i : preset) { auto item = rdmsr(driver, i.reg()); - LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), i.value()); + LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), get_masked_value(item.value(), i.value(), i.mask())); if (item.isValid()) { savedState.emplace_back(item); diff --git a/src/version.h b/src/version.h index c1831cd00..5a835bd41 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "5.5.1" +#define APP_VERSION "5.5.2-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 5 #define APP_VER_MINOR 5 -#define APP_VER_PATCH 1 +#define APP_VER_PATCH 2 #ifdef _MSC_VER # if (_MSC_VER >= 1920)