mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-12 13:55:19 +00:00
Merge pull request #2028 from SChernykh/dev
RandomX x86 JIT: remove redundant CFROUND
This commit is contained in:
commit
39bfa0c420
3 changed files with 65 additions and 6 deletions
src
|
@ -254,6 +254,8 @@ public:
|
||||||
return strcmp(a->m_threadId, b->m_threadId) < 0;
|
return strcmp(a->m_threadId, b->m_threadId) < 0;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
std::map<std::string, std::pair<uint32_t, double>> averageTime;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < n;)
|
for (uint32_t i = 0; i < n;)
|
||||||
{
|
{
|
||||||
uint32_t n1 = i;
|
uint32_t n1 = i;
|
||||||
|
@ -267,19 +269,27 @@ public:
|
||||||
|
|
||||||
for (uint32_t j = i; j < n1; ++j) {
|
for (uint32_t j = i; j < n1; ++j) {
|
||||||
ProfileScopeData* p = data[j];
|
ProfileScopeData* p = data[j];
|
||||||
|
const double t = p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed;
|
||||||
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
|
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
|
||||||
Tags::profiler(),
|
Tags::profiler(),
|
||||||
p->m_threadId,
|
p->m_threadId,
|
||||||
p->m_name,
|
p->m_name,
|
||||||
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
|
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
|
||||||
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed
|
t
|
||||||
);
|
);
|
||||||
|
auto& value = averageTime[p->m_name];
|
||||||
|
++value.first;
|
||||||
|
value.second += t;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
|
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
|
||||||
|
|
||||||
i = n1;
|
i = n1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (auto& data : averageTime) {
|
||||||
|
LOG_INFO("%s %-30s %9.1f ns", Tags::profiler(), data.first.c_str(), data.second.second / data.second.first);
|
||||||
|
}
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -164,8 +164,9 @@ namespace randomx {
|
||||||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
static const uint8_t NOP9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
|
||||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 };
|
||||||
|
|
||||||
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
||||||
{},
|
{},
|
||||||
|
@ -431,6 +432,7 @@ namespace randomx {
|
||||||
|
|
||||||
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
codePos = codePosFirst;
|
codePos = codePosFirst;
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
//mark all registers as used
|
//mark all registers as used
|
||||||
uint64_t* r = (uint64_t*)registerUsage;
|
uint64_t* r = (uint64_t*)registerUsage;
|
||||||
|
@ -1155,6 +1157,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1168,6 +1172,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1183,6 +1189,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1196,6 +1204,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1221,7 +1231,9 @@ namespace randomx {
|
||||||
void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
|
void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1235,6 +1247,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1260,6 +1274,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos);
|
emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos);
|
||||||
|
@ -1269,7 +1285,22 @@ namespace randomx {
|
||||||
|
|
||||||
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = prevCFROUND;
|
||||||
|
|
||||||
|
if (pos) {
|
||||||
|
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||||
|
memcpy(p + pos + 0, NOP9, 9);
|
||||||
|
memcpy(p + pos + 9, NOP9, 9);
|
||||||
|
memcpy(p + pos + 18, NOP8, 8);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memcpy(p + pos + 0, NOP8, 8);
|
||||||
|
memcpy(p + pos + 8, NOP6, 6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = codePos;
|
||||||
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
|
|
||||||
|
@ -1293,7 +1324,22 @@ namespace randomx {
|
||||||
|
|
||||||
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
|
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = prevCFROUND;
|
||||||
|
|
||||||
|
if (pos) {
|
||||||
|
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||||
|
memcpy(p + pos + 0, NOP9, 9);
|
||||||
|
memcpy(p + pos + 9, NOP9, 9);
|
||||||
|
memcpy(p + pos + 18, NOP7, 7);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memcpy(p + pos + 0, NOP8, 8);
|
||||||
|
memcpy(p + pos + 8, NOP5, 5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = codePos;
|
||||||
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
|
|
||||||
|
@ -1318,7 +1364,9 @@ namespace randomx {
|
||||||
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
|
prevCFROUND = 0;
|
||||||
|
|
||||||
const int reg = instr.dst % RegistersCount;
|
const int reg = instr.dst % RegistersCount;
|
||||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,7 @@ namespace randomx {
|
||||||
uint32_t codePos = 0;
|
uint32_t codePos = 0;
|
||||||
uint32_t codePosFirst = 0;
|
uint32_t codePosFirst = 0;
|
||||||
uint32_t vm_flags = 0;
|
uint32_t vm_flags = 0;
|
||||||
|
uint32_t prevCFROUND = 0;
|
||||||
|
|
||||||
# ifdef XMRIG_FIX_RYZEN
|
# ifdef XMRIG_FIX_RYZEN
|
||||||
std::pair<const void*, const void*> mainLoopBounds;
|
std::pair<const void*, const void*> mainLoopBounds;
|
||||||
|
|
Loading…
Reference in a new issue