mirror of
https://github.com/xmrig/xmrig.git
synced 2025-03-20 14:19:10 +00:00
Add vzeroupper for processors with AVX
To avoid false dependencies on upper 128 bits of YMM registers.
This commit is contained in:
parent
59e8fdb9ed
commit
7459677fd5
3 changed files with 12 additions and 0 deletions
|
@ -289,6 +289,11 @@ namespace randomx {
|
||||||
|
|
||||||
JitCompilerX86::JitCompilerX86() {
|
JitCompilerX86::JitCompilerX86() {
|
||||||
applyTweaks();
|
applyTweaks();
|
||||||
|
|
||||||
|
int32_t info[4];
|
||||||
|
cpuid(1, info);
|
||||||
|
hasAVX = (info[2] & (1 << 28)) != 0;
|
||||||
|
|
||||||
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
|
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
|
||||||
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
||||||
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
|
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
|
||||||
|
@ -374,6 +379,9 @@ namespace randomx {
|
||||||
code[codePos + 5] = 0xc0 + pcfg.readReg1;
|
code[codePos + 5] = 0xc0 + pcfg.readReg1;
|
||||||
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||||
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||||
|
if (hasAVX) {
|
||||||
|
*(uint32_t*)(code + codePos + 29) = 0xE977F8C5;
|
||||||
|
}
|
||||||
|
|
||||||
codePos = prologueSize;
|
codePos = prologueSize;
|
||||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
|
|
|
@ -73,6 +73,7 @@ namespace randomx {
|
||||||
uint32_t vm_flags;
|
uint32_t vm_flags;
|
||||||
|
|
||||||
static bool BranchesWithin32B;
|
static bool BranchesWithin32B;
|
||||||
|
bool hasAVX;
|
||||||
|
|
||||||
static void applyTweaks();
|
static void applyTweaks();
|
||||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||||
|
|
|
@ -82,6 +82,9 @@ randomx_program_prologue_first_load PROC
|
||||||
ror rdx, 32
|
ror rdx, 32
|
||||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
stmxcsr dword ptr [rsp-20]
|
stmxcsr dword ptr [rsp-20]
|
||||||
|
nop
|
||||||
|
nop
|
||||||
|
nop
|
||||||
jmp randomx_program_loop_begin
|
jmp randomx_program_loop_begin
|
||||||
randomx_program_prologue_first_load ENDP
|
randomx_program_prologue_first_load ENDP
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue