RandomX x86 JIT: remove redundant CFROUND

This commit is contained in:
SChernykh 2021-01-07 16:20:00 +01:00
parent 9f128d1182
commit f62f4e6108
3 changed files with 65 additions and 6 deletions

View file

@ -254,6 +254,8 @@ public:
return strcmp(a->m_threadId, b->m_threadId) < 0; return strcmp(a->m_threadId, b->m_threadId) < 0;
}); });
std::map<std::string, std::pair<uint32_t, double>> averageTime;
for (uint32_t i = 0; i < n;) for (uint32_t i = 0; i < n;)
{ {
uint32_t n1 = i; uint32_t n1 = i;
@ -267,19 +269,27 @@ public:
for (uint32_t j = i; j < n1; ++j) { for (uint32_t j = i; j < n1; ++j) {
ProfileScopeData* p = data[j]; ProfileScopeData* p = data[j];
const double t = p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed;
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns", LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
Tags::profiler(), Tags::profiler(),
p->m_threadId, p->m_threadId,
p->m_name, p->m_name,
p->m_totalCycles * 100.0 / data[i]->m_totalCycles, p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed t
); );
auto& value = averageTime[p->m_name];
++value.first;
value.second += t;
} }
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler()); LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
i = n1; i = n1;
} }
for (auto& data : averageTime) {
LOG_INFO("%s %-30s %9.1f ns", Tags::profiler(), data.first.c_str(), data.second.second / data.second.first);
}
# endif # endif
} }

View file

@ -164,8 +164,9 @@ namespace randomx {
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t NOP9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 };
static const uint8_t JMP_ALIGN_PREFIX[14][16] = { static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
{}, {},
@ -431,6 +432,7 @@ namespace randomx {
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask)); memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
codePos = codePosFirst; codePos = codePosFirst;
prevCFROUND = 0;
//mark all registers as used //mark all registers as used
uint64_t* r = (uint64_t*)registerUsage; uint64_t* r = (uint64_t*)registerUsage;
@ -1155,6 +1157,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1168,6 +1172,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
@ -1183,6 +1189,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1196,6 +1204,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
@ -1221,7 +1231,9 @@ namespace randomx {
void JitCompilerX86::h_FMUL_R(const Instruction& instr) { void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
const uint64_t src = instr.src % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt;
@ -1235,6 +1247,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
@ -1260,6 +1274,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos); emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos);
@ -1269,7 +1285,22 @@ namespace randomx {
void JitCompilerX86::h_CFROUND(const Instruction& instr) { void JitCompilerX86::h_CFROUND(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = prevCFROUND;
if (pos) {
if (vm_flags & RANDOMX_FLAG_AMD) {
memcpy(p + pos + 0, NOP9, 9);
memcpy(p + pos + 9, NOP9, 9);
memcpy(p + pos + 18, NOP8, 8);
}
else {
memcpy(p + pos + 0, NOP8, 8);
memcpy(p + pos + 8, NOP6, 6);
}
}
pos = codePos;
prevCFROUND = pos;
const uint32_t src = instr.src % RegistersCount; const uint32_t src = instr.src % RegistersCount;
@ -1293,7 +1324,22 @@ namespace randomx {
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) { void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = prevCFROUND;
if (pos) {
if (vm_flags & RANDOMX_FLAG_AMD) {
memcpy(p + pos + 0, NOP9, 9);
memcpy(p + pos + 9, NOP9, 9);
memcpy(p + pos + 18, NOP7, 7);
}
else {
memcpy(p + pos + 0, NOP8, 8);
memcpy(p + pos + 8, NOP5, 5);
}
}
pos = codePos;
prevCFROUND = pos;
const uint64_t src = instr.src % RegistersCount; const uint64_t src = instr.src % RegistersCount;
@ -1318,7 +1364,9 @@ namespace randomx {
void JitCompilerX86::h_CBRANCH(const Instruction& instr) { void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
prevCFROUND = 0;
const int reg = instr.dst % RegistersCount; const int reg = instr.dst % RegistersCount;
int32_t jmp_offset = registerUsage[reg] - (pos + 16); int32_t jmp_offset = registerUsage[reg] - (pos + 16);

View file

@ -89,6 +89,7 @@ namespace randomx {
uint32_t codePos = 0; uint32_t codePos = 0;
uint32_t codePosFirst = 0; uint32_t codePosFirst = 0;
uint32_t vm_flags = 0; uint32_t vm_flags = 0;
uint32_t prevCFROUND = 0;
# ifdef XMRIG_FIX_RYZEN # ifdef XMRIG_FIX_RYZEN
std::pair<const void*, const void*> mainLoopBounds; std::pair<const void*, const void*> mainLoopBounds;