mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 00:37:46 +00:00
Tuned JIT compiler
0.3-0.4% speedup depending on CPU.
This commit is contained in:
parent
887c891ab2
commit
0caeb41bff
6 changed files with 61 additions and 15 deletions
|
@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
|||
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
|
||||
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
|
||||
|
||||
constexpr int PREFETCH_DISTANCE = 4096;
|
||||
constexpr int PREFETCH_DISTANCE = 7168;
|
||||
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
|
||||
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||
|
||||
|
@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
|||
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
|
||||
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
|
||||
|
||||
rx_prefetch_t0(prefetchPtr + 64);
|
||||
|
||||
scratchpadPtr += 128;
|
||||
prefetchPtr += 128;
|
||||
}
|
||||
prefetchPtr = (const char*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
|
|
|
@ -49,6 +49,7 @@ namespace randomx {
|
|||
JitCompilerA64();
|
||||
~JitCompilerA64();
|
||||
|
||||
void prepare() {}
|
||||
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||
|
||||
|
|
|
@ -325,6 +325,13 @@ namespace randomx {
|
|||
freePagedMemory(allocatedCode, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::prepare() {
|
||||
for (int i = 0; i < sizeof(engine); i += 64)
|
||||
rx_prefetch_nta((const char*)(&engine) + i);
|
||||
for (int i = 0; i < sizeof(RandomX_CurrentConfig); i += 64)
|
||||
rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||
vm_flags = flags;
|
||||
|
||||
|
@ -419,11 +426,29 @@ namespace randomx {
|
|||
r[j] = k;
|
||||
}
|
||||
|
||||
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
const uint8_t opcode = instr.opcode;
|
||||
*((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
||||
(this->*(engine[opcode]))(instr);
|
||||
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
||||
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
|
||||
Instruction& instr1 = prog(i);
|
||||
Instruction& instr2 = prog(i + 1);
|
||||
Instruction& instr3 = prog(i + 2);
|
||||
Instruction& instr4 = prog(i + 3);
|
||||
|
||||
InstructionGeneratorX86 gen1 = engine[instr1.opcode];
|
||||
InstructionGeneratorX86 gen2 = engine[instr2.opcode];
|
||||
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
|
||||
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
|
||||
|
||||
*((uint64_t*)&instr1) &= instr_mask;
|
||||
(this->*gen1)(instr1);
|
||||
|
||||
*((uint64_t*)&instr2) &= instr_mask;
|
||||
(this->*gen2)(instr2);
|
||||
|
||||
*((uint64_t*)&instr3) &= instr_mask;
|
||||
(this->*gen3)(instr3);
|
||||
|
||||
*((uint64_t*)&instr4) &= instr_mask;
|
||||
(this->*gen4)(instr4);
|
||||
}
|
||||
|
||||
emit(REX_MOV_RR, code, codePos);
|
||||
|
@ -609,13 +634,14 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
uint8_t* const p = code + pos;
|
||||
|
||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
|
||||
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
|
||||
*(uint32_t*)(p) = template_IADD_RS[dst] | (sib << 24);
|
||||
*(uint32_t*)(p + 4) = instr.getImm32();
|
||||
|
||||
pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
|
||||
pos += ((dst == RegisterNeedsDisplacement) ? 8 : 4);
|
||||
|
||||
registerUsage[instr.dst] = pos;
|
||||
registerUsage[dst] = pos;
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
|
@ -1152,6 +1178,6 @@ namespace randomx {
|
|||
emit(NOP1, code, codePos);
|
||||
}
|
||||
|
||||
InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
|
||||
alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
|
||||
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ namespace randomx {
|
|||
public:
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void prepare();
|
||||
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||
template<size_t N>
|
||||
|
@ -65,7 +66,7 @@ namespace randomx {
|
|||
}
|
||||
size_t getCodeSize();
|
||||
|
||||
static InstructionGeneratorX86 engine[256];
|
||||
alignas(64) static InstructionGeneratorX86 engine[256];
|
||||
int registerUsage[RegistersCount];
|
||||
uint8_t* allocatedCode;
|
||||
uint8_t* code;
|
||||
|
|
|
@ -291,7 +291,7 @@ RandomX_ConfigurationLoki RandomX_LokiConfig;
|
|||
RandomX_ConfigurationArqma RandomX_ArqmaConfig;
|
||||
RandomX_ConfigurationSafex RandomX_SafexConfig;
|
||||
|
||||
RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ namespace randomx {
|
|||
|
||||
template<bool softAes>
|
||||
void CompiledVm<softAes>::run(void* seed) {
|
||||
compiler.prepare();
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
compiler.generateProgram(program, config, randomx_vm::getFlags());
|
||||
|
|
Loading…
Reference in a new issue