diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 84cfe39c7..9abd320bb 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -289,6 +289,11 @@ namespace randomx { JitCompilerX86::JitCompilerX86() { applyTweaks(); + + int32_t info[4]; + cpuid(1, info); + hasAVX = (info[2] & (1 << 28)) != 0; + allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2); // Shift code base address to improve caching - all threads will use different L2/L3 cache sets code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize); @@ -374,6 +379,9 @@ namespace randomx { code[codePos + 5] = 0xc0 + pcfg.readReg1; *(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; *(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; + if (hasAVX) { + *(uint32_t*)(code + codePos + 29) = 0xE977F8C5; + } codePos = prologueSize; memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 9354e5dbc..eabd6e070 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -73,6 +73,7 @@ namespace randomx { uint32_t vm_flags; static bool BranchesWithin32B; + bool hasAVX; static void applyTweaks(); void generateProgramPrologue(Program&, ProgramConfiguration&); diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 90395c522..9a4d82b92 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -82,6 +82,9 @@ randomx_program_prologue_first_load PROC ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK stmxcsr dword ptr [rsp-20] + nop + nop + nop jmp randomx_program_loop_begin randomx_program_prologue_first_load ENDP