mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-10 21:04:37 +00:00
Add vzeroupper for processors with AVX
To avoid false dependencies on upper 128 bits of YMM registers.
This commit is contained in:
parent
59e8fdb9ed
commit
7459677fd5
3 changed files with 12 additions and 0 deletions
|
@ -289,6 +289,11 @@ namespace randomx {
|
|||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
applyTweaks();
|
||||
|
||||
int32_t info[4];
|
||||
cpuid(1, info);
|
||||
hasAVX = (info[2] & (1 << 28)) != 0;
|
||||
|
||||
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
|
||||
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
||||
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
|
||||
|
@ -374,6 +379,9 @@ namespace randomx {
|
|||
code[codePos + 5] = 0xc0 + pcfg.readReg1;
|
||||
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
if (hasAVX) {
|
||||
*(uint32_t*)(code + codePos + 29) = 0xE977F8C5;
|
||||
}
|
||||
|
||||
codePos = prologueSize;
|
||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
|
|
|
@ -73,6 +73,7 @@ namespace randomx {
|
|||
uint32_t vm_flags;
|
||||
|
||||
static bool BranchesWithin32B;
|
||||
bool hasAVX;
|
||||
|
||||
static void applyTweaks();
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
|
|
|
@ -82,6 +82,9 @@ randomx_program_prologue_first_load PROC
|
|||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
stmxcsr dword ptr [rsp-20]
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
jmp randomx_program_loop_begin
|
||||
randomx_program_prologue_first_load ENDP
|
||||
|
||||
|
|
Loading…
Reference in a new issue