RandomX: optimized CFROUND elimination more

This commit is contained in:
SChernykh 2022-09-16 09:50:58 +02:00
parent f0e7de8c71
commit 93c07e1d34
2 changed files with 24 additions and 23 deletions

View file

@ -424,7 +424,8 @@ namespace randomx {
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask)); memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
codePos = codePosFirst; codePos = codePosFirst;
prevCFROUND = 0; prevCFROUND = -1;
prevFPOperation = -1;
//mark all registers as used //mark all registers as used
uint64_t* r = (uint64_t*)registerUsage; uint64_t* r = (uint64_t*)registerUsage;
@ -1160,7 +1161,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1175,7 +1176,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
@ -1192,7 +1193,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1207,7 +1208,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
@ -1235,7 +1236,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1250,7 +1251,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
@ -1277,7 +1278,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0; prevFPOperation = pos;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
@ -1288,18 +1289,18 @@ namespace randomx {
void JitCompilerX86::h_CFROUND(const Instruction& instr) { void JitCompilerX86::h_CFROUND(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = prevCFROUND; int32_t t = prevCFROUND;
if (pos) { if (t > prevFPOperation) {
if (vm_flags & RANDOMX_FLAG_AMD) { if (vm_flags & RANDOMX_FLAG_AMD) {
memcpy(p + pos, NOP26, 26); memcpy(p + t, NOP26, 26);
} }
else { else {
memcpy(p + pos, NOP14, 14); memcpy(p + t, NOP14, 14);
} }
} }
pos = codePos; uint32_t pos = codePos;
prevCFROUND = pos; prevCFROUND = pos;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
@ -1324,18 +1325,18 @@ namespace randomx {
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) { void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = prevCFROUND; int32_t t = prevCFROUND;
if (pos) { if (t > prevFPOperation) {
if (vm_flags & RANDOMX_FLAG_AMD) { if (vm_flags & RANDOMX_FLAG_AMD) {
memcpy(p + pos, NOP25, 25); memcpy(p + t, NOP25, 25);
} }
else { else {
memcpy(p + pos, NOP13, 13); memcpy(p + t, NOP13, 13);
} }
} }
pos = codePos; uint32_t pos = codePos;
prevCFROUND = pos; prevCFROUND = pos;
const uint64_t src = instr.src % RegistersCount; const uint64_t src = instr.src % RegistersCount;
@ -1365,10 +1366,9 @@ namespace randomx {
const int reg = instr.dst % RegistersCount; const int reg = instr.dst % RegistersCount;
int32_t jmp_offset = registerUsage[reg]; int32_t jmp_offset = registerUsage[reg];
// if it jumps over the previous CFROUND, it can't be safely eliminated // if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
const uint32_t t = prevCFROUND; if (jmp_offset <= prevFPOperation) {
if (t && (jmp_offset < t)) { prevFPOperation = pos;
prevCFROUND = 0;
} }
jmp_offset -= pos + 16; jmp_offset -= pos + 16;

View file

@ -89,7 +89,8 @@ namespace randomx {
uint32_t codePos = 0; uint32_t codePos = 0;
uint32_t codePosFirst = 0; uint32_t codePosFirst = 0;
uint32_t vm_flags = 0; uint32_t vm_flags = 0;
uint32_t prevCFROUND = 0; int32_t prevCFROUND = -1;
int32_t prevFPOperation = -1;
# ifdef XMRIG_FIX_RYZEN # ifdef XMRIG_FIX_RYZEN
std::pair<const void*, const void*> mainLoopBounds; std::pair<const void*, const void*> mainLoopBounds;