mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-22 10:45:06 +00:00
Auto-detect the fastest code for dataset init
This commit is contained in:
parent
7aba194d3b
commit
410313d933
5 changed files with 67 additions and 3 deletions
|
@ -40,6 +40,14 @@ public:
|
||||||
VENDOR_AMD
|
VENDOR_AMD
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum Arch : uint32_t {
|
||||||
|
ARCH_UNKNOWN,
|
||||||
|
ARCH_ZEN,
|
||||||
|
ARCH_ZEN_PLUS,
|
||||||
|
ARCH_ZEN2,
|
||||||
|
ARCH_ZEN3
|
||||||
|
};
|
||||||
|
|
||||||
enum MsrMod : uint32_t {
|
enum MsrMod : uint32_t {
|
||||||
MSR_MOD_NONE,
|
MSR_MOD_NONE,
|
||||||
MSR_MOD_RYZEN_17H,
|
MSR_MOD_RYZEN_17H,
|
||||||
|
@ -100,6 +108,7 @@ public:
|
||||||
virtual size_t packages() const = 0;
|
virtual size_t packages() const = 0;
|
||||||
virtual size_t threads() const = 0;
|
virtual size_t threads() const = 0;
|
||||||
virtual Vendor vendor() const = 0;
|
virtual Vendor vendor() const = 0;
|
||||||
|
virtual Arch arch() const = 0;
|
||||||
virtual bool jccErratum() const = 0;
|
virtual bool jccErratum() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -217,9 +217,27 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
switch (m_family) {
|
switch (m_family) {
|
||||||
case 0x17:
|
case 0x17:
|
||||||
m_msrMod = MSR_MOD_RYZEN_17H;
|
m_msrMod = MSR_MOD_RYZEN_17H;
|
||||||
|
switch (m_model) {
|
||||||
|
case 1:
|
||||||
|
case 17:
|
||||||
|
case 32:
|
||||||
|
m_arch = ARCH_ZEN;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
case 24:
|
||||||
|
m_arch = ARCH_ZEN_PLUS;
|
||||||
|
break;
|
||||||
|
case 49:
|
||||||
|
case 96:
|
||||||
|
case 113:
|
||||||
|
case 144:
|
||||||
|
m_arch = ARCH_ZEN2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x19:
|
case 0x19:
|
||||||
|
m_arch = ARCH_ZEN3;
|
||||||
m_msrMod = MSR_MOD_RYZEN_19H;
|
m_msrMod = MSR_MOD_RYZEN_19H;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -64,12 +64,14 @@ protected:
|
||||||
inline size_t packages() const override { return 1; }
|
inline size_t packages() const override { return 1; }
|
||||||
inline size_t threads() const override { return m_threads; }
|
inline size_t threads() const override { return m_threads; }
|
||||||
inline Vendor vendor() const override { return m_vendor; }
|
inline Vendor vendor() const override { return m_vendor; }
|
||||||
|
inline Arch arch() const override { return m_arch; }
|
||||||
inline bool jccErratum() const override { return m_jccErratum; }
|
inline bool jccErratum() const override { return m_jccErratum; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
char m_brand[64 + 6]{};
|
char m_brand[64 + 6]{};
|
||||||
size_t m_threads;
|
size_t m_threads;
|
||||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||||
|
Arch m_arch = ARCH_UNKNOWN;
|
||||||
bool m_jccErratum = false;
|
bool m_jccErratum = false;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -214,9 +214,43 @@ namespace randomx {
|
||||||
|
|
||||||
hasAVX = xmrig::Cpu::info()->hasAVX();
|
hasAVX = xmrig::Cpu::info()->hasAVX();
|
||||||
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
|
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
|
||||||
|
|
||||||
|
// Set to false by default
|
||||||
|
initDatasetAVX2 = false;
|
||||||
|
|
||||||
|
xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor();
|
||||||
|
xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch();
|
||||||
|
|
||||||
|
if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) {
|
||||||
|
// AVX2 init is faster on Intel CPUs without HT
|
||||||
|
initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
|
||||||
|
}
|
||||||
|
else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) {
|
||||||
|
switch (arch) {
|
||||||
|
case xmrig::ICpuInfo::ARCH_ZEN:
|
||||||
|
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
|
||||||
|
// AVX2 init is slow on Zen/Zen+
|
||||||
|
initDatasetAVX2 = false;
|
||||||
|
break;
|
||||||
|
case xmrig::ICpuInfo::ARCH_ZEN2:
|
||||||
|
// AVX2 init is faster on Zen2 without SMT (mobile CPUs)
|
||||||
|
initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
|
||||||
|
break;
|
||||||
|
case xmrig::ICpuInfo::ARCH_ZEN3:
|
||||||
|
// AVX2 init is faster on Zen3
|
||||||
|
initDatasetAVX2 = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorry low-end Intel CPUs
|
||||||
|
if (!hasAVX2) {
|
||||||
|
initDatasetAVX2 = false;
|
||||||
|
}
|
||||||
|
|
||||||
hasXOP = xmrig::Cpu::info()->hasXOP();
|
hasXOP = xmrig::Cpu::info()->hasXOP();
|
||||||
|
|
||||||
allocatedSize = hasAVX2 ? (CodeSize * 4) : (CodeSize * 2);
|
allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2);
|
||||||
allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
|
allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
|
||||||
# ifdef XMRIG_SECURE_JIT
|
# ifdef XMRIG_SECURE_JIT
|
||||||
false
|
false
|
||||||
|
@ -299,7 +333,7 @@ namespace randomx {
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
|
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
|
||||||
uint8_t* p = code;
|
uint8_t* p = code;
|
||||||
if (hasAVX2) {
|
if (initDatasetAVX2) {
|
||||||
codePos = 0;
|
codePos = 0;
|
||||||
emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);
|
emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);
|
||||||
|
|
||||||
|
@ -356,7 +390,7 @@ namespace randomx {
|
||||||
|
|
||||||
void JitCompilerX86::generateDatasetInitCode() {
|
void JitCompilerX86::generateDatasetInitCode() {
|
||||||
// AVX2 code is generated in generateSuperscalarHash()
|
// AVX2 code is generated in generateSuperscalarHash()
|
||||||
if (!hasAVX2) {
|
if (!initDatasetAVX2) {
|
||||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,6 +97,7 @@ namespace randomx {
|
||||||
bool BranchesWithin32B = false;
|
bool BranchesWithin32B = false;
|
||||||
bool hasAVX;
|
bool hasAVX;
|
||||||
bool hasAVX2;
|
bool hasAVX2;
|
||||||
|
bool initDatasetAVX2;
|
||||||
bool hasXOP;
|
bool hasXOP;
|
||||||
|
|
||||||
uint8_t* allocatedCode = nullptr;
|
uint8_t* allocatedCode = nullptr;
|
||||||
|
|
Loading…
Reference in a new issue