diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index 4a400d0a8..1898a2c55 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2); rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3); - constexpr int PREFETCH_DISTANCE = 4096; + constexpr int PREFETCH_DISTANCE = 7168; const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE; scratchpadEnd -= PREFETCH_DISTANCE; @@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_prefetch_t0(prefetchPtr); - scratchpadPtr += 64; - prefetchPtr += 64; + hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4)); + hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5)); + hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6)); + hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7)); + + fill_state0 = aesdec(fill_state0, key0); + fill_state1 = aesenc(fill_state1, key1); + fill_state2 = aesdec(fill_state2, key2); + fill_state3 = aesenc(fill_state3, key3); + + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2); + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3); + + rx_prefetch_t0(prefetchPtr + 64); + + scratchpadPtr += 128; + prefetchPtr += 128; } prefetchPtr = (const char*) scratchpad; scratchpadEnd += PREFETCH_DISTANCE; diff --git a/src/crypto/randomx/jit_compiler_a64.hpp b/src/crypto/randomx/jit_compiler_a64.hpp index 05afdc70d..c589b50d2 100644 --- a/src/crypto/randomx/jit_compiler_a64.hpp +++ b/src/crypto/randomx/jit_compiler_a64.hpp @@ -49,6 +49,7 @@ namespace randomx { JitCompilerA64(); ~JitCompilerA64(); + void prepare() {} void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 34f98cb96..1307c9a59 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -325,6 +325,13 @@ namespace randomx { freePagedMemory(allocatedCode, CodeSize); } + void JitCompilerX86::prepare() { + for (int i = 0; i < sizeof(engine); i += 64) + rx_prefetch_nta((const char*)(&engine) + i); + for (int i = 0; i < sizeof(RandomX_CurrentConfig); i += 64) + rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i); + } + void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { vm_flags = flags; @@ -419,11 +426,29 @@ namespace randomx { r[j] = k; } - for (int i = 0, n = static_cast(RandomX_CurrentConfig.ProgramSize); i < n; ++i) { - Instruction& instr = prog(i); - const uint8_t opcode = instr.opcode; - *((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16); - (this->*(engine[opcode]))(instr); + constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16); + for (int i = 0, n = static_cast(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) { + Instruction& instr1 = prog(i); + Instruction& instr2 = prog(i + 1); + Instruction& instr3 = prog(i + 2); + Instruction& instr4 = prog(i + 3); + + InstructionGeneratorX86 gen1 = engine[instr1.opcode]; + InstructionGeneratorX86 gen2 = engine[instr2.opcode]; + InstructionGeneratorX86 gen3 = engine[instr3.opcode]; + InstructionGeneratorX86 gen4 = engine[instr4.opcode]; + + *((uint64_t*)&instr1) &= instr_mask; + (this->*gen1)(instr1); + + *((uint64_t*)&instr2) &= instr_mask; + (this->*gen2)(instr2); + + *((uint64_t*)&instr3) &= instr_mask; + (this->*gen3)(instr3); + + *((uint64_t*)&instr4) &= instr_mask; + (this->*gen4)(instr4); } emit(REX_MOV_RR, code, codePos); @@ -609,13 +634,14 @@ namespace randomx { int pos = codePos; uint8_t* const p = code + pos; - const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst; - *(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24); + const uint32_t dst = instr.dst; + const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst; + *(uint32_t*)(p) = template_IADD_RS[dst] | (sib << 24); *(uint32_t*)(p + 4) = instr.getImm32(); - pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4); + pos += ((dst == RegisterNeedsDisplacement) ? 8 : 4); - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; codePos = pos; } @@ -1152,6 +1178,6 @@ namespace randomx { emit(NOP1, code, codePos); } - InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; + alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index a194f1afb..c1a05c873 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -49,6 +49,7 @@ namespace randomx { public: JitCompilerX86(); ~JitCompilerX86(); + void prepare(); void generateProgram(Program&, ProgramConfiguration&, uint32_t); void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); template @@ -65,7 +66,7 @@ namespace randomx { } size_t getCodeSize(); - static InstructionGeneratorX86 engine[256]; + alignas(64) static InstructionGeneratorX86 engine[256]; int registerUsage[RegistersCount]; uint8_t* allocatedCode; uint8_t* code; diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 88f7b190a..9fcaec925 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -291,7 +291,7 @@ RandomX_ConfigurationLoki RandomX_LokiConfig; RandomX_ConfigurationArqma RandomX_ArqmaConfig; RandomX_ConfigurationSafex RandomX_SafexConfig; -RandomX_ConfigurationBase RandomX_CurrentConfig; +alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig; extern "C" { diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index 2dc0c0250..501bb8c70 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -41,6 +41,7 @@ namespace randomx { template void CompiledVm::run(void* seed) { + compiler.prepare(); VmBase::generateProgram(seed); randomx_vm::initialize(); compiler.generateProgram(program, config, randomx_vm::getFlags());