From 7459677fd5800c8c6b321abbfad4220c6b4f74eb Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 18 Dec 2019 09:12:25 +0100 Subject: [PATCH] Add vzeroupper for processors with AVX To avoid false dependencies on upper 128 bits of YMM registers. --- src/crypto/randomx/jit_compiler_x86.cpp | 8 ++++++++ src/crypto/randomx/jit_compiler_x86.hpp | 1 + src/crypto/randomx/jit_compiler_x86_static.asm | 3 +++ 3 files changed, 12 insertions(+) diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 84cfe39c7..9abd320bb 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -289,6 +289,11 @@ namespace randomx { JitCompilerX86::JitCompilerX86() { applyTweaks(); + + int32_t info[4]; + cpuid(1, info); + hasAVX = (info[2] & (1 << 28)) != 0; + allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2); // Shift code base address to improve caching - all threads will use different L2/L3 cache sets code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize); @@ -374,6 +379,9 @@ namespace randomx { code[codePos + 5] = 0xc0 + pcfg.readReg1; *(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; *(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated; + if (hasAVX) { + *(uint32_t*)(code + codePos + 29) = 0xE977F8C5; + } codePos = prologueSize; memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 9354e5dbc..eabd6e070 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -73,6 +73,7 @@ namespace randomx { uint32_t vm_flags; static bool BranchesWithin32B; + bool hasAVX; static void applyTweaks(); void generateProgramPrologue(Program&, ProgramConfiguration&); diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index 90395c522..9a4d82b92 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -82,6 +82,9 @@ randomx_program_prologue_first_load PROC ror rdx, 32 and edx, RANDOMX_SCRATCHPAD_MASK stmxcsr dword ptr [rsp-20] + nop + nop + nop jmp randomx_program_loop_begin randomx_program_prologue_first_load ENDP