mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-10 21:04:37 +00:00
Merge pull request #2028 from SChernykh/dev
RandomX x86 JIT: remove redundant CFROUND
This commit is contained in:
commit
39bfa0c420
3 changed files with 65 additions and 6 deletions
|
@ -254,6 +254,8 @@ public:
|
|||
return strcmp(a->m_threadId, b->m_threadId) < 0;
|
||||
});
|
||||
|
||||
std::map<std::string, std::pair<uint32_t, double>> averageTime;
|
||||
|
||||
for (uint32_t i = 0; i < n;)
|
||||
{
|
||||
uint32_t n1 = i;
|
||||
|
@ -267,19 +269,27 @@ public:
|
|||
|
||||
for (uint32_t j = i; j < n1; ++j) {
|
||||
ProfileScopeData* p = data[j];
|
||||
const double t = p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed;
|
||||
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
|
||||
Tags::profiler(),
|
||||
p->m_threadId,
|
||||
p->m_name,
|
||||
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
|
||||
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed
|
||||
t
|
||||
);
|
||||
auto& value = averageTime[p->m_name];
|
||||
++value.first;
|
||||
value.second += t;
|
||||
}
|
||||
|
||||
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
|
||||
|
||||
i = n1;
|
||||
}
|
||||
|
||||
for (auto& data : averageTime) {
|
||||
LOG_INFO("%s %-30s %9.1f ns", Tags::profiler(), data.first.c_str(), data.second.second / data.second.first);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
|
|
@ -164,8 +164,9 @@ namespace randomx {
|
|||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 };
|
||||
|
||||
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
||||
{},
|
||||
|
@ -431,6 +432,7 @@ namespace randomx {
|
|||
|
||||
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
codePos = codePosFirst;
|
||||
prevCFROUND = 0;
|
||||
|
||||
//mark all registers as used
|
||||
uint64_t* r = (uint64_t*)registerUsage;
|
||||
|
@ -1155,6 +1157,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
const uint64_t src = instr.src % RegisterCountFlt;
|
||||
|
||||
|
@ -1168,6 +1172,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
|
@ -1183,6 +1189,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
const uint64_t src = instr.src % RegisterCountFlt;
|
||||
|
||||
|
@ -1196,6 +1204,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
|
@ -1222,6 +1232,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
const uint64_t src = instr.src % RegisterCountFlt;
|
||||
|
||||
|
@ -1235,6 +1247,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
|
@ -1260,6 +1274,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos);
|
||||
|
@ -1269,7 +1285,22 @@ namespace randomx {
|
|||
|
||||
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
uint32_t pos = prevCFROUND;
|
||||
|
||||
if (pos) {
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
memcpy(p + pos + 0, NOP9, 9);
|
||||
memcpy(p + pos + 9, NOP9, 9);
|
||||
memcpy(p + pos + 18, NOP8, 8);
|
||||
}
|
||||
else {
|
||||
memcpy(p + pos + 0, NOP8, 8);
|
||||
memcpy(p + pos + 8, NOP6, 6);
|
||||
}
|
||||
}
|
||||
|
||||
pos = codePos;
|
||||
prevCFROUND = pos;
|
||||
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
|
||||
|
@ -1293,7 +1324,22 @@ namespace randomx {
|
|||
|
||||
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
uint32_t pos = prevCFROUND;
|
||||
|
||||
if (pos) {
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
memcpy(p + pos + 0, NOP9, 9);
|
||||
memcpy(p + pos + 9, NOP9, 9);
|
||||
memcpy(p + pos + 18, NOP7, 7);
|
||||
}
|
||||
else {
|
||||
memcpy(p + pos + 0, NOP8, 8);
|
||||
memcpy(p + pos + 8, NOP5, 5);
|
||||
}
|
||||
}
|
||||
|
||||
pos = codePos;
|
||||
prevCFROUND = pos;
|
||||
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
|
||||
|
@ -1319,6 +1365,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
prevCFROUND = 0;
|
||||
|
||||
const int reg = instr.dst % RegistersCount;
|
||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||
|
||||
|
|
|
@ -89,6 +89,7 @@ namespace randomx {
|
|||
uint32_t codePos = 0;
|
||||
uint32_t codePosFirst = 0;
|
||||
uint32_t vm_flags = 0;
|
||||
uint32_t prevCFROUND = 0;
|
||||
|
||||
# ifdef XMRIG_FIX_RYZEN
|
||||
std::pair<const void*, const void*> mainLoopBounds;
|
||||
|
|
Loading…
Reference in a new issue