diff --git a/src/core/Miner.cpp b/src/core/Miner.cpp index 9c96d1e36..8d0263e22 100644 --- a/src/core/Miner.cpp +++ b/src/core/Miner.cpp @@ -254,6 +254,8 @@ public: return strcmp(a->m_threadId, b->m_threadId) < 0; }); + std::map> averageTime; + for (uint32_t i = 0; i < n;) { uint32_t n1 = i; @@ -267,19 +269,27 @@ public: for (uint32_t j = i; j < n1; ++j) { ProfileScopeData* p = data[j]; + const double t = p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed; LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns", Tags::profiler(), p->m_threadId, p->m_name, p->m_totalCycles * 100.0 / data[i]->m_totalCycles, - p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed + t ); + auto& value = averageTime[p->m_name]; + ++value.first; + value.second += t; } LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler()); i = n1; } + + for (auto& data : averageTime) { + LOG_INFO("%s %-30s %9.1f ns", Tags::profiler(), data.first.c_str(), data.second.second / data.second.first); + } # endif } diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 0e79d6a40..8c3145b2a 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -164,8 +164,9 @@ namespace randomx { static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t NOP9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; - static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; + static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 }; static const uint8_t JMP_ALIGN_PREFIX[14][16] = { {}, @@ -431,6 +432,7 @@ namespace randomx { memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask)); codePos = codePosFirst; + prevCFROUND = 0; //mark all registers as used uint64_t* r = (uint64_t*)registerUsage; @@ -1155,6 +1157,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1168,6 +1172,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; @@ -1183,6 +1189,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1196,6 +1204,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; @@ -1221,7 +1231,9 @@ namespace randomx { void JitCompilerX86::h_FMUL_R(const Instruction& instr) { uint8_t* const p = code; uint32_t pos = codePos; - + + prevCFROUND = 0; + const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t src = instr.src % RegisterCountFlt; @@ -1235,6 +1247,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint32_t src = instr.src % RegistersCount; const uint64_t dst = instr.dst % RegisterCountFlt; @@ -1260,6 +1274,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; + prevCFROUND = 0; + const uint32_t dst = instr.dst % RegisterCountFlt; emit32(0xe4510f66 + (((dst << 3) + dst) << 24), p, pos); @@ -1269,7 +1285,22 @@ namespace randomx { void JitCompilerX86::h_CFROUND(const Instruction& instr) { uint8_t* const p = code; - uint32_t pos = codePos; + uint32_t pos = prevCFROUND; + + if (pos) { + if (vm_flags & RANDOMX_FLAG_AMD) { + memcpy(p + pos + 0, NOP9, 9); + memcpy(p + pos + 9, NOP9, 9); + memcpy(p + pos + 18, NOP8, 8); + } + else { + memcpy(p + pos + 0, NOP8, 8); + memcpy(p + pos + 8, NOP6, 6); + } + } + + pos = codePos; + prevCFROUND = pos; const uint32_t src = instr.src % RegistersCount; @@ -1293,7 +1324,22 @@ namespace randomx { void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) { uint8_t* const p = code; - uint32_t pos = codePos; + uint32_t pos = prevCFROUND; + + if (pos) { + if (vm_flags & RANDOMX_FLAG_AMD) { + memcpy(p + pos + 0, NOP9, 9); + memcpy(p + pos + 9, NOP9, 9); + memcpy(p + pos + 18, NOP7, 7); + } + else { + memcpy(p + pos + 0, NOP8, 8); + memcpy(p + pos + 8, NOP5, 5); + } + } + + pos = codePos; + prevCFROUND = pos; const uint64_t src = instr.src % RegistersCount; @@ -1318,7 +1364,9 @@ namespace randomx { void JitCompilerX86::h_CBRANCH(const Instruction& instr) { uint8_t* const p = code; uint32_t pos = codePos; - + + prevCFROUND = 0; + const int reg = instr.dst % RegistersCount; int32_t jmp_offset = registerUsage[reg] - (pos + 16); diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 5c43264c5..0d2b4321b 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -89,6 +89,7 @@ namespace randomx { uint32_t codePos = 0; uint32_t codePosFirst = 0; uint32_t vm_flags = 0; + uint32_t prevCFROUND = 0; # ifdef XMRIG_FIX_RYZEN std::pair mainLoopBounds;