Merge pull request #1840 from SChernykh/dev

RandomX refactoring, moved more stuff to compile time
This commit is contained in:
xmrig 2020-09-19 02:01:06 +07:00 committed by GitHub
commit 866245b525
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 149 additions and 160 deletions

View file

@ -98,6 +98,7 @@ public:
virtual size_t packages() const = 0;
virtual size_t threads() const = 0;
virtual Vendor vendor() const = 0;
virtual bool jccErratum() const = 0;
};

View file

@ -212,6 +212,37 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_vendor = VENDOR_INTEL;
m_assembly = Assembly::INTEL;
m_msrMod = MSR_MOD_INTEL;
struct
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
} processor_info;
cpuid(1, data);
memcpy(&processor_info, data, sizeof(processor_info));
// Intel JCC erratum mitigation
if (processor_info.family == 6) {
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
const uint32_t stepping = processor_info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
m_jccErratum =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
}
# endif

View file

@ -61,11 +61,13 @@ protected:
inline size_t packages() const override { return 1; }
inline size_t threads() const override { return m_threads; }
inline Vendor vendor() const override { return m_vendor; }
inline bool jccErratum() const override { return m_jccErratum; }
protected:
char m_brand[64 + 6]{};
size_t m_threads;
Vendor m_vendor = VENDOR_UNKNOWN;
bool m_jccErratum = false;
private:
Assembly m_assembly = Assembly::NONE;

View file

@ -79,9 +79,9 @@ namespace randomx {
}
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
int opcode = instr.opcode;
uint32_t opcode = instr.opcode;
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
@ -99,8 +99,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS;
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
@ -117,8 +118,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M;
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R;
@ -133,8 +135,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
@ -151,8 +154,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R;
@ -167,8 +171,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
@ -185,8 +190,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R;
@ -195,8 +201,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R;
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
@ -213,8 +220,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M;
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R;
@ -223,8 +231,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
@ -241,8 +250,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
@ -257,16 +267,18 @@ namespace randomx {
}
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP;
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &nreg->r[dst];
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R;
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R;
@ -281,8 +293,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R;
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
@ -299,8 +312,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M;
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R;
@ -315,8 +329,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R;
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R;
@ -331,8 +346,9 @@ namespace randomx {
registerUsage[dst] = i;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
if (src != dst) {
@ -347,8 +363,9 @@ namespace randomx {
}
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R;
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt)
@ -357,8 +374,9 @@ namespace randomx {
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R;
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
@ -366,8 +384,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src];
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R;
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
@ -377,8 +396,9 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
@ -386,8 +406,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src];
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R;
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
@ -397,15 +418,17 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &nreg->f[dst];
ibc.type = InstructionType::FSCAL_R;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R;
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
@ -413,8 +436,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src];
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R;
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
@ -424,41 +448,44 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &nreg->e[dst];
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R;
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH) {
ibc.type = InstructionType::CBRANCH;
//jump condition
int creg = instr.dst % RegistersCount;
ibc.idst = &nreg->r[creg];
ibc.target = registerUsage[creg];
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
const int shift = instr.getModCond();
ibc.imm = signExtend2sCompl(instr.getImm32()) | ((1ULL << RandomX_ConfigurationBase::JumpOffset) << shift);
ibc.imm &= ~((1ULL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
ibc.memMask = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH;
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND) {
auto src = instr.src % RegistersCount;
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND;
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE;
@ -471,8 +498,9 @@ namespace randomx {
ibc.memMask = ScratchpadL3Mask;
return;
}
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE;
if (opcode < RandomX_CurrentConfig.CEIL_NOP) {
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_NOP) {
ibc.type = InstructionType::NOP;
return;
}

View file

@ -225,7 +225,7 @@ namespace randomx {
}
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4);
rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
}
static void exe_ISTORE(RANDOMX_EXE_ARGS) {

View file

@ -74,8 +74,8 @@ namespace randomx {
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated
#define CacheLineAlignMask RandomX_ConfigurationBase::CacheLineAlignMask_Calculated
#define DatasetExtraItems RandomX_ConfigurationBase::DatasetExtraItems_Calculated
constexpr int StoreL3Condition = 14;
//Prevent some unsafe configurations.

View file

@ -75,11 +75,11 @@ static size_t CalcDatasetItemSize()
// Prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
// Main loop
RandomX_CurrentConfig.CacheAccesses * (
RandomX_ConfigurationBase::CacheAccesses * (
// Main loop prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
// Inner main loop (instructions)
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 +
((RandomX_ConfigurationBase::SuperscalarLatency * 3) + 2) * 16 +
// Main loop epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
) +
@ -235,7 +235,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
num32bitLiterals = 64;
constexpr uint32_t tmp_reg = 12;
for (size_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i)
for (size_t i = 0; i < RandomX_ConfigurationBase::CacheAccesses; ++i)
{
// and x11, x10, CacheSize / CacheLineSize - 1
emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos);
@ -946,7 +946,7 @@ void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos)
const uint32_t dst = IntRegMap[instr.dst];
const uint32_t modCond = instr.getModCond();
const uint32_t shift = modCond + RandomX_CurrentConfig.JumpOffset;
const uint32_t shift = modCond + RandomX_ConfigurationBase::JumpOffset;
const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
emitAddImmediate(dst, dst, imm, code, k);

View file

@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp"
#include "base/tools/Profiler.h"
#include "backend/cpu/Cpu.h"
#ifdef XMRIG_FIX_RYZEN
# include "crypto/rx/Rx.h"
@ -167,55 +168,10 @@ namespace randomx {
# endif
}
// CPU-specific tweaks
void JitCompilerX86::applyTweaks() {
int32_t info[4];
cpuid(0, info);
int32_t manufacturer[4];
manufacturer[0] = info[1];
manufacturer[1] = info[3];
manufacturer[2] = info[2];
manufacturer[3] = 0;
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
struct
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
} processor_info;
cpuid(1, info);
memcpy(&processor_info, info, sizeof(processor_info));
// Intel JCC erratum mitigation
if (processor_info.family == 6) {
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
const uint32_t stepping = processor_info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
}
static std::atomic<size_t> codeOffset;
JitCompilerX86::JitCompilerX86() {
applyTweaks();
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
int32_t info[4];
cpuid(1, info);
@ -1081,6 +1037,7 @@ namespace randomx {
codePos = pos;
}
template<bool jccErratum>
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code;
uint32_t pos = codePos;
@ -1088,7 +1045,7 @@ namespace randomx {
const int reg = instr.dst % RegistersCount;
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
if (BranchesWithin32B) {
if (jccErratum) {
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
@ -1101,10 +1058,12 @@ namespace randomx {
}
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
const int shift = instr.getModCond();
const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
const uint32_t and_mask = ~((1UL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
*(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
*(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
pos += 14;
if (jmp_offset >= -128) {
@ -1127,6 +1086,9 @@ namespace randomx {
codePos = pos;
}
template void JitCompilerX86::h_CBRANCH<false>(const Instruction&);
template void JitCompilerX86::h_CBRANCH<true>(const Instruction&);
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
uint8_t* const p = code;
uint32_t pos = codePos;

View file

@ -84,7 +84,6 @@ namespace randomx {
uint8_t* allocatedCode;
void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
template<bool rax>
@ -148,11 +147,13 @@ namespace randomx {
void h_FMUL_R(const Instruction&);
void h_FDIV_M(const Instruction&);
void h_FSQRT_R(const Instruction&);
template<bool jccErratum>
void h_CBRANCH(const Instruction&);
void h_CFROUND(const Instruction&);
void h_CFROUND_BMI2(const Instruction&);
void h_ISTORE(const Instruction&);
void h_NOP(const Instruction&);
};
}

View file

@ -111,22 +111,15 @@ RandomX_ConfigurationKeva::RandomX_ConfigurationKeva()
}
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
: ArgonMemory(262144)
, ArgonIterations(3)
: ArgonIterations(3)
, ArgonLanes(1)
, ArgonSalt("RandomX\x03")
, CacheAccesses(8)
, SuperscalarLatency(170)
, DatasetBaseSize(2147483648)
, DatasetExtraSize(33554368)
, ScratchpadL1_Size(16384)
, ScratchpadL2_Size(262144)
, ScratchpadL3_Size(2097152)
, ProgramSize(256)
, ProgramIterations(2048)
, ProgramCount(8)
, JumpBits(8)
, JumpOffset(8)
, RANDOMX_FREQ_IADD_RS(16)
, RANDOMX_FREQ_IADD_M(7)
, RANDOMX_FREQ_ISUB_R(16)
@ -233,11 +226,6 @@ void RandomX_ConfigurationBase::Apply()
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
ConditionMask_Calculated = (1 << JumpBits) - 1;
#if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
// Not needed right now because all variants use default dataset base size
@ -295,16 +283,16 @@ void RandomX_ConfigurationBase::Apply()
#define JIT_HANDLE(x, prev)
#endif
constexpr int CEIL_NULL = 0;
int k = 0;
uint32_t k = 0;
uint32_t freq_sum = 0;
#define INST_HANDLE(x, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
freq_sum += RANDOMX_FREQ_##x; \
for (; k < freq_sum; ++k) { JIT_HANDLE(x, prev); }
#define INST_HANDLE2(x, func_name, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); }
freq_sum += RANDOMX_FREQ_##x; \
for (; k < freq_sum; ++k) { JIT_HANDLE(func_name, prev); }
INST_HANDLE(IADD_RS, NULL);
INST_HANDLE(IADD_M, IADD_RS);
@ -343,7 +331,13 @@ void RandomX_ConfigurationBase::Apply()
INST_HANDLE(FMUL_R, FSCAL_R);
INST_HANDLE(FDIV_M, FMUL_R);
INST_HANDLE(FSQRT_R, FDIV_M);
INST_HANDLE(CBRANCH, FSQRT_R);
if (xmrig::Cpu::info()->jccErratum()) {
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
}
else {
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
}
#if defined(_M_X64) || defined(__x86_64__)
if (xmrig::Cpu::info()->hasBMI2()) {

View file

@ -64,15 +64,24 @@ struct RandomX_ConfigurationBase
void Apply();
uint32_t ArgonMemory;
// Common parameters for all RandomX variants
enum Params : uint64_t
{
ArgonMemory = 262144,
CacheAccesses = 8,
SuperscalarLatency = 170,
DatasetBaseSize = 2147483648,
DatasetExtraSize = 33554368,
JumpBits = 8,
JumpOffset = 8,
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1),
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE,
ConditionMask_Calculated = ((1 << JumpBits) - 1) << JumpOffset,
};
uint32_t ArgonIterations;
uint32_t ArgonLanes;
const char* ArgonSalt;
uint32_t CacheAccesses;
uint32_t SuperscalarLatency;
uint32_t DatasetBaseSize;
uint32_t DatasetExtraSize;
uint32_t ScratchpadL1_Size;
uint32_t ScratchpadL2_Size;
@ -82,9 +91,6 @@ struct RandomX_ConfigurationBase
uint32_t ProgramIterations;
uint32_t ProgramCount;
uint32_t JumpBits;
uint32_t JumpOffset;
uint32_t RANDOMX_FREQ_IADD_RS;
uint32_t RANDOMX_FREQ_IADD_M;
uint32_t RANDOMX_FREQ_ISUB_R;
@ -126,15 +132,10 @@ struct RandomX_ConfigurationBase
uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codePrefetchScratchpadTweaked[32];
uint32_t CacheLineAlignMask_Calculated;
uint32_t DatasetExtraItems_Calculated;
uint32_t AddressMask_Calculated[4];
uint32_t ScratchpadL3Mask_Calculated;
uint32_t ScratchpadL3Mask64_Calculated;
uint32_t ConditionMask_Calculated;
#if defined(XMRIG_ARMv8)
uint32_t Log2_ScratchpadL1;
uint32_t Log2_ScratchpadL2;
@ -142,37 +143,6 @@ struct RandomX_ConfigurationBase
uint32_t Log2_DatasetBaseSize;
uint32_t Log2_CacheSize;
#endif
int CEIL_IADD_RS;
int CEIL_IADD_M;
int CEIL_ISUB_R;
int CEIL_ISUB_M;
int CEIL_IMUL_R;
int CEIL_IMUL_M;
int CEIL_IMULH_R;
int CEIL_IMULH_M;
int CEIL_ISMULH_R;
int CEIL_ISMULH_M;
int CEIL_IMUL_RCP;
int CEIL_INEG_R;
int CEIL_IXOR_R;
int CEIL_IXOR_M;
int CEIL_IROR_R;
int CEIL_IROL_R;
int CEIL_ISWAP_R;
int CEIL_FSWAP_R;
int CEIL_FADD_R;
int CEIL_FADD_M;
int CEIL_FSUB_R;
int CEIL_FSUB_M;
int CEIL_FSCAL_R;
int CEIL_FMUL_R;
int CEIL_FDIV_M;
int CEIL_FSQRT_R;
int CEIL_CBRANCH;
int CEIL_CFROUND;
int CEIL_ISTORE;
int CEIL_NOP;
};
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};