From c7476e076b0e15e0d025f140168f17ca34229943 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Fri, 18 Sep 2020 20:51:25 +0200 Subject: [PATCH] RandomX refactoring, moved more stuff to compile time Small x86 JIT compiler speedup. --- src/backend/cpu/interfaces/ICpuInfo.h | 1 + src/backend/cpu/platform/BasicCpuInfo.cpp | 31 +++++++ src/backend/cpu/platform/BasicCpuInfo.h | 2 + src/crypto/randomx/bytecode_machine.cpp | 100 ++++++++++++++-------- src/crypto/randomx/bytecode_machine.hpp | 2 +- src/crypto/randomx/common.hpp | 4 +- src/crypto/randomx/jit_compiler_a64.cpp | 8 +- src/crypto/randomx/jit_compiler_x86.cpp | 62 +++----------- src/crypto/randomx/jit_compiler_x86.hpp | 5 +- src/crypto/randomx/randomx.cpp | 34 +++----- src/crypto/randomx/randomx.h | 60 ++++--------- 11 files changed, 149 insertions(+), 160 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index 20fb62958..ffab2d7d0 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -98,6 +98,7 @@ public: virtual size_t packages() const = 0; virtual size_t threads() const = 0; virtual Vendor vendor() const = 0; + virtual bool jccErratum() const = 0; }; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index cc03646b5..dac1a4fad 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -212,6 +212,37 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_vendor = VENDOR_INTEL; m_assembly = Assembly::INTEL; m_msrMod = MSR_MOD_INTEL; + + struct + { + unsigned int stepping : 4; + unsigned int model : 4; + unsigned int family : 4; + unsigned int processor_type : 2; + unsigned int reserved1 : 2; + unsigned int ext_model : 4; + unsigned int ext_family : 8; + unsigned int reserved2 : 4; + } processor_info; + + cpuid(1, data); + memcpy(&processor_info, data, sizeof(processor_info)); + + // Intel JCC erratum mitigation + if (processor_info.family == 6) { + const uint32_t model = processor_info.model | (processor_info.ext_model << 4); + const uint32_t stepping = processor_info.stepping; + + // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf + m_jccErratum = + ((model == 0x4E) && (stepping == 0x3)) || + ((model == 0x55) && (stepping == 0x4)) || + ((model == 0x5E) && (stepping == 0x3)) || + ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) || + ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) || + ((model == 0xA6) && (stepping == 0x0)) || + ((model == 0xAE) && (stepping == 0xA)); + } } } # endif diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index 05e5f442a..e3e184bb8 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -61,11 +61,13 @@ protected: inline size_t packages() const override { return 1; } inline size_t threads() const override { return m_threads; } inline Vendor vendor() const override { return m_vendor; } + inline bool jccErratum() const override { return m_jccErratum; } protected: char m_brand[64 + 6]{}; size_t m_threads; Vendor m_vendor = VENDOR_UNKNOWN; + bool m_jccErratum = false; private: Assembly m_assembly = Assembly::NONE; diff --git a/src/crypto/randomx/bytecode_machine.cpp b/src/crypto/randomx/bytecode_machine.cpp index f0b95c30e..c1ef3a0e5 100644 --- a/src/crypto/randomx/bytecode_machine.cpp +++ b/src/crypto/randomx/bytecode_machine.cpp @@ -79,9 +79,9 @@ namespace randomx { } void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) { - int opcode = instr.opcode; + uint32_t opcode = instr.opcode; - if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IADD_RS; @@ -99,8 +99,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS; - if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IADD_M; @@ -117,8 +118,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M; - if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISUB_R; @@ -133,8 +135,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R; - if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISUB_M; @@ -151,8 +154,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M; - if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMUL_R; @@ -167,8 +171,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R; - if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMUL_M; @@ -185,8 +190,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M; - if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMULH_R; @@ -195,8 +201,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R; - if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMULH_M; @@ -213,8 +220,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M; - if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISMULH_R; @@ -223,8 +231,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R; - if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISMULH_M; @@ -241,8 +250,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M; - if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP) { uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { auto dst = instr.dst % RegistersCount; @@ -257,16 +267,18 @@ namespace randomx { } return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP; - if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::INEG_R; ibc.idst = &nreg->r[dst]; registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R; - if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IXOR_R; @@ -281,8 +293,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R; - if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IXOR_M; @@ -299,8 +312,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M; - if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IROR_R; @@ -315,8 +329,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R; - if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::IROL_R; @@ -331,8 +346,9 @@ namespace randomx { registerUsage[dst] = i; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R; - if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; if (src != dst) { @@ -347,8 +363,9 @@ namespace randomx { } return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R; - if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::FSWAP_R; if (dst < RegisterCountFlt) @@ -357,8 +374,9 @@ namespace randomx { ibc.fdst = &nreg->e[dst - RegisterCountFlt]; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R; - if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegisterCountFlt; ibc.type = InstructionType::FADD_R; @@ -366,8 +384,9 @@ namespace randomx { ibc.fsrc = &nreg->a[src]; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R; - if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegistersCount; ibc.type = InstructionType::FADD_M; @@ -377,8 +396,9 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M; - if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegisterCountFlt; ibc.type = InstructionType::FSUB_R; @@ -386,8 +406,9 @@ namespace randomx { ibc.fsrc = &nreg->a[src]; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R; - if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegistersCount; ibc.type = InstructionType::FSUB_M; @@ -397,15 +418,17 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M; - if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R) { auto dst = instr.dst % RegisterCountFlt; ibc.fdst = &nreg->f[dst]; ibc.type = InstructionType::FSCAL_R; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R; - if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegisterCountFlt; ibc.type = InstructionType::FMUL_R; @@ -413,8 +436,9 @@ namespace randomx { ibc.fsrc = &nreg->a[src]; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R; - if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M) { auto dst = instr.dst % RegisterCountFlt; auto src = instr.src % RegistersCount; ibc.type = InstructionType::FDIV_M; @@ -424,41 +448,44 @@ namespace randomx { ibc.imm = signExtend2sCompl(instr.getImm32()); return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M; - if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R) { auto dst = instr.dst % RegisterCountFlt; ibc.type = InstructionType::FSQRT_R; ibc.fdst = &nreg->e[dst]; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R; - if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH) { ibc.type = InstructionType::CBRANCH; //jump condition int creg = instr.dst % RegistersCount; ibc.idst = &nreg->r[creg]; ibc.target = registerUsage[creg]; - int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; - ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); - if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 - ibc.imm &= ~(1ULL << (shift - 1)); - ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift; + const int shift = instr.getModCond(); + ibc.imm = signExtend2sCompl(instr.getImm32()) | ((1ULL << RandomX_ConfigurationBase::JumpOffset) << shift); + ibc.imm &= ~((1ULL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift); + ibc.memMask = RandomX_ConfigurationBase::ConditionMask_Calculated << shift; //mark all registers as used for (unsigned j = 0; j < RegistersCount; ++j) { registerUsage[j] = i; } return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH; - if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND) { auto src = instr.src % RegistersCount; ibc.isrc = &nreg->r[src]; ibc.type = InstructionType::CFROUND; ibc.imm = instr.getImm32() & 63; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND; - if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISTORE; @@ -471,8 +498,9 @@ namespace randomx { ibc.memMask = ScratchpadL3Mask; return; } + opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE; - if (opcode < RandomX_CurrentConfig.CEIL_NOP) { + if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_NOP) { ibc.type = InstructionType::NOP; return; } diff --git a/src/crypto/randomx/bytecode_machine.hpp b/src/crypto/randomx/bytecode_machine.hpp index 8aee78d89..8852f4d68 100644 --- a/src/crypto/randomx/bytecode_machine.hpp +++ b/src/crypto/randomx/bytecode_machine.hpp @@ -225,7 +225,7 @@ namespace randomx { } static void exe_CFROUND(RANDOMX_EXE_ARGS) { - rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4); + rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast(ibc.imm)) % 4); } static void exe_ISTORE(RANDOMX_EXE_ARGS) { diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp index 34c8477c3..aefbad032 100644 --- a/src/crypto/randomx/common.hpp +++ b/src/crypto/randomx/common.hpp @@ -74,8 +74,8 @@ namespace randomx { constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2; constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; #define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size - #define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated - #define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated + #define CacheLineAlignMask RandomX_ConfigurationBase::CacheLineAlignMask_Calculated + #define DatasetExtraItems RandomX_ConfigurationBase::DatasetExtraItems_Calculated constexpr int StoreL3Condition = 14; //Prevent some unsafe configurations. diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index d291de4d3..7a601c5b3 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -75,11 +75,11 @@ static size_t CalcDatasetItemSize() // Prologue ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + // Main loop - RandomX_CurrentConfig.CacheAccesses * ( + RandomX_ConfigurationBase::CacheAccesses * ( // Main loop prologue ((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 + // Inner main loop (instructions) - ((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 + + ((RandomX_ConfigurationBase::SuperscalarLatency * 3) + 2) * 16 + // Main loop epilogue ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4 ) + @@ -235,7 +235,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s num32bitLiterals = 64; constexpr uint32_t tmp_reg = 12; - for (size_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) + for (size_t i = 0; i < RandomX_ConfigurationBase::CacheAccesses; ++i) { // and x11, x10, CacheSize / CacheLineSize - 1 emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos); @@ -946,7 +946,7 @@ void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos) const uint32_t dst = IntRegMap[instr.dst]; const uint32_t modCond = instr.getModCond(); - const uint32_t shift = modCond + RandomX_CurrentConfig.JumpOffset; + const uint32_t shift = modCond + RandomX_ConfigurationBase::JumpOffset; const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1)); emitAddImmediate(dst, dst, imm, code, k); diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 2eff9ab90..8edf5a720 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/reciprocal.h" #include "crypto/randomx/virtual_memory.hpp" #include "base/tools/Profiler.h" +#include "backend/cpu/Cpu.h" #ifdef XMRIG_FIX_RYZEN # include "crypto/rx/Rx.h" @@ -167,55 +168,10 @@ namespace randomx { # endif } - // CPU-specific tweaks - void JitCompilerX86::applyTweaks() { - int32_t info[4]; - cpuid(0, info); - - int32_t manufacturer[4]; - manufacturer[0] = info[1]; - manufacturer[1] = info[3]; - manufacturer[2] = info[2]; - manufacturer[3] = 0; - - if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) { - struct - { - unsigned int stepping : 4; - unsigned int model : 4; - unsigned int family : 4; - unsigned int processor_type : 2; - unsigned int reserved1 : 2; - unsigned int ext_model : 4; - unsigned int ext_family : 8; - unsigned int reserved2 : 4; - } processor_info; - - cpuid(1, info); - memcpy(&processor_info, info, sizeof(processor_info)); - - // Intel JCC erratum mitigation - if (processor_info.family == 6) { - const uint32_t model = processor_info.model | (processor_info.ext_model << 4); - const uint32_t stepping = processor_info.stepping; - - // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf - BranchesWithin32B = - ((model == 0x4E) && (stepping == 0x3)) || - ((model == 0x55) && (stepping == 0x4)) || - ((model == 0x5E) && (stepping == 0x3)) || - ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) || - ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) || - ((model == 0xA6) && (stepping == 0x0)) || - ((model == 0xAE) && (stepping == 0xA)); - } - } - } - static std::atomic codeOffset; JitCompilerX86::JitCompilerX86() { - applyTweaks(); + BranchesWithin32B = xmrig::Cpu::info()->jccErratum(); int32_t info[4]; cpuid(1, info); @@ -1081,6 +1037,7 @@ namespace randomx { codePos = pos; } + template void JitCompilerX86::h_CBRANCH(const Instruction& instr) { uint8_t* const p = code; uint32_t pos = codePos; @@ -1088,7 +1045,7 @@ namespace randomx { const int reg = instr.dst % RegistersCount; int32_t jmp_offset = registerUsage[reg] - (pos + 16); - if (BranchesWithin32B) { + if (jccErratum) { const uint32_t branch_begin = static_cast(pos + 7); const uint32_t branch_end = static_cast(branch_begin + ((jmp_offset >= -128) ? 9 : 13)); @@ -1101,10 +1058,12 @@ namespace randomx { } *(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16); - const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; - *(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1)); + const int shift = instr.getModCond(); + const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift; + const uint32_t and_mask = ~((1UL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift); + *(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask; *(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16); - *(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift; + *(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift; pos += 14; if (jmp_offset >= -128) { @@ -1127,6 +1086,9 @@ namespace randomx { codePos = pos; } + template void JitCompilerX86::h_CBRANCH(const Instruction&); + template void JitCompilerX86::h_CBRANCH(const Instruction&); + void JitCompilerX86::h_ISTORE(const Instruction& instr) { uint8_t* const p = code; uint32_t pos = codePos; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index c8a60c1da..3a9163b5e 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -84,7 +84,6 @@ namespace randomx { uint8_t* allocatedCode; - void applyTweaks(); void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&); template @@ -148,11 +147,13 @@ namespace randomx { void h_FMUL_R(const Instruction&); void h_FDIV_M(const Instruction&); void h_FSQRT_R(const Instruction&); + + template void h_CBRANCH(const Instruction&); + void h_CFROUND(const Instruction&); void h_CFROUND_BMI2(const Instruction&); void h_ISTORE(const Instruction&); void h_NOP(const Instruction&); }; - } diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 89d319de2..5cfaddca3 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -111,22 +111,15 @@ RandomX_ConfigurationKeva::RandomX_ConfigurationKeva() } RandomX_ConfigurationBase::RandomX_ConfigurationBase() - : ArgonMemory(262144) - , ArgonIterations(3) + : ArgonIterations(3) , ArgonLanes(1) , ArgonSalt("RandomX\x03") - , CacheAccesses(8) - , SuperscalarLatency(170) - , DatasetBaseSize(2147483648) - , DatasetExtraSize(33554368) , ScratchpadL1_Size(16384) , ScratchpadL2_Size(262144) , ScratchpadL3_Size(2097152) , ProgramSize(256) , ProgramIterations(2048) , ProgramCount(8) - , JumpBits(8) - , JumpOffset(8) , RANDOMX_FREQ_IADD_RS(16) , RANDOMX_FREQ_IADD_M(7) , RANDOMX_FREQ_ISUB_R(16) @@ -233,11 +226,6 @@ void RandomX_ConfigurationBase::Apply() ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8); ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64; - CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1); - DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE; - - ConditionMask_Calculated = (1 << JumpBits) - 1; - #if defined(_M_X64) || defined(__x86_64__) *(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1; // Not needed right now because all variants use default dataset base size @@ -295,16 +283,16 @@ void RandomX_ConfigurationBase::Apply() #define JIT_HANDLE(x, prev) #endif - constexpr int CEIL_NULL = 0; - int k = 0; + uint32_t k = 0; + uint32_t freq_sum = 0; #define INST_HANDLE(x, prev) \ - CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ - for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } + freq_sum += RANDOMX_FREQ_##x; \ + for (; k < freq_sum; ++k) { JIT_HANDLE(x, prev); } #define INST_HANDLE2(x, func_name, prev) \ - CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ - for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); } + freq_sum += RANDOMX_FREQ_##x; \ + for (; k < freq_sum; ++k) { JIT_HANDLE(func_name, prev); } INST_HANDLE(IADD_RS, NULL); INST_HANDLE(IADD_M, IADD_RS); @@ -343,7 +331,13 @@ void RandomX_ConfigurationBase::Apply() INST_HANDLE(FMUL_R, FSCAL_R); INST_HANDLE(FDIV_M, FMUL_R); INST_HANDLE(FSQRT_R, FDIV_M); - INST_HANDLE(CBRANCH, FSQRT_R); + + if (xmrig::Cpu::info()->jccErratum()) { + INST_HANDLE2(CBRANCH, CBRANCH, FSQRT_R); + } + else { + INST_HANDLE2(CBRANCH, CBRANCH, FSQRT_R); + } #if defined(_M_X64) || defined(__x86_64__) if (xmrig::Cpu::info()->hasBMI2()) { diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 3379e2242..4f2804edd 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -64,15 +64,24 @@ struct RandomX_ConfigurationBase void Apply(); - uint32_t ArgonMemory; + // Common parameters for all RandomX variants + enum Params : uint64_t + { + ArgonMemory = 262144, + CacheAccesses = 8, + SuperscalarLatency = 170, + DatasetBaseSize = 2147483648, + DatasetExtraSize = 33554368, + JumpBits = 8, + JumpOffset = 8, + CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1), + DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE, + ConditionMask_Calculated = ((1 << JumpBits) - 1) << JumpOffset, + }; + uint32_t ArgonIterations; uint32_t ArgonLanes; const char* ArgonSalt; - uint32_t CacheAccesses; - uint32_t SuperscalarLatency; - - uint32_t DatasetBaseSize; - uint32_t DatasetExtraSize; uint32_t ScratchpadL1_Size; uint32_t ScratchpadL2_Size; @@ -82,9 +91,6 @@ struct RandomX_ConfigurationBase uint32_t ProgramIterations; uint32_t ProgramCount; - uint32_t JumpBits; - uint32_t JumpOffset; - uint32_t RANDOMX_FREQ_IADD_RS; uint32_t RANDOMX_FREQ_IADD_M; uint32_t RANDOMX_FREQ_ISUB_R; @@ -126,15 +132,10 @@ struct RandomX_ConfigurationBase uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codePrefetchScratchpadTweaked[32]; - uint32_t CacheLineAlignMask_Calculated; - uint32_t DatasetExtraItems_Calculated; - uint32_t AddressMask_Calculated[4]; uint32_t ScratchpadL3Mask_Calculated; uint32_t ScratchpadL3Mask64_Calculated; - uint32_t ConditionMask_Calculated; - #if defined(XMRIG_ARMv8) uint32_t Log2_ScratchpadL1; uint32_t Log2_ScratchpadL2; @@ -142,37 +143,6 @@ struct RandomX_ConfigurationBase uint32_t Log2_DatasetBaseSize; uint32_t Log2_CacheSize; #endif - - int CEIL_IADD_RS; - int CEIL_IADD_M; - int CEIL_ISUB_R; - int CEIL_ISUB_M; - int CEIL_IMUL_R; - int CEIL_IMUL_M; - int CEIL_IMULH_R; - int CEIL_IMULH_M; - int CEIL_ISMULH_R; - int CEIL_ISMULH_M; - int CEIL_IMUL_RCP; - int CEIL_INEG_R; - int CEIL_IXOR_R; - int CEIL_IXOR_M; - int CEIL_IROR_R; - int CEIL_IROL_R; - int CEIL_ISWAP_R; - int CEIL_FSWAP_R; - int CEIL_FADD_R; - int CEIL_FADD_M; - int CEIL_FSUB_R; - int CEIL_FSUB_M; - int CEIL_FSCAL_R; - int CEIL_FMUL_R; - int CEIL_FDIV_M; - int CEIL_FSQRT_R; - int CEIL_CBRANCH; - int CEIL_CFROUND; - int CEIL_ISTORE; - int CEIL_NOP; }; struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};