mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-17 16:27:44 +00:00
Merge pull request #1508 from SChernykh/dev
Added support for BMI2 instructions
This commit is contained in:
commit
39eafc3255
8 changed files with 81 additions and 5 deletions
|
@ -63,6 +63,7 @@ public:
|
|||
virtual Assembly::Id assembly() const = 0;
|
||||
virtual bool hasAES() const = 0;
|
||||
virtual bool hasAVX2() const = 0;
|
||||
virtual bool hasBMI2() const = 0;
|
||||
virtual bool hasOneGbPages() const = 0;
|
||||
virtual const char *backend() const = 0;
|
||||
virtual const char *brand() const = 0;
|
||||
|
|
|
@ -156,6 +156,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
|
|||
}
|
||||
|
||||
m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE];
|
||||
m_bmi2 = data.flags[CPU_FEATURE_BMI2];
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ protected:
|
|||
inline Assembly::Id assembly() const override { return m_assembly; }
|
||||
inline bool hasAES() const override { return m_aes; }
|
||||
inline bool hasAVX2() const override { return m_avx2; }
|
||||
inline bool hasBMI2() const override { return m_bmi2; }
|
||||
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
|
||||
inline const char *backend() const override { return m_backend; }
|
||||
inline const char *brand() const override { return m_brand; }
|
||||
|
@ -59,6 +60,7 @@ private:
|
|||
Assembly m_assembly;
|
||||
bool m_aes = false;
|
||||
bool m_avx2 = false;
|
||||
bool m_bmi2 = false;
|
||||
bool m_L2_exclusive = false;
|
||||
char m_backend[32]{};
|
||||
char m_brand[64 + 5]{};
|
||||
|
|
|
@ -45,6 +45,10 @@
|
|||
# define bit_AVX2 (1 << 5)
|
||||
#endif
|
||||
|
||||
#ifndef bit_BMI2
|
||||
# define bit_BMI2 (1 << 8)
|
||||
#endif
|
||||
|
||||
#ifndef bit_PDPE1GB
|
||||
# define bit_PDPE1GB (1 << 26)
|
||||
#endif
|
||||
|
@ -141,6 +145,12 @@ static inline bool has_avx2()
|
|||
}
|
||||
|
||||
|
||||
static inline bool has_bmi2()
|
||||
{
|
||||
return has_feature(EXTENDED_FEATURES, EBX_Reg, bit_BMI2);
|
||||
}
|
||||
|
||||
|
||||
static inline bool has_pdpe1gb()
|
||||
{
|
||||
return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB);
|
||||
|
@ -154,6 +164,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
|||
m_threads(std::thread::hardware_concurrency()),
|
||||
m_aes(has_aes_ni()),
|
||||
m_avx2(has_avx2()),
|
||||
m_bmi2(has_bmi2()),
|
||||
m_pdpe1gb(has_pdpe1gb())
|
||||
{
|
||||
cpu_brand_string(m_brand);
|
||||
|
|
|
@ -44,6 +44,7 @@ protected:
|
|||
inline Assembly::Id assembly() const override { return m_assembly; }
|
||||
inline bool hasAES() const override { return m_aes; }
|
||||
inline bool hasAVX2() const override { return m_avx2; }
|
||||
inline bool hasBMI2() const override { return m_bmi2; }
|
||||
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
|
||||
inline const char *brand() const override { return m_brand; }
|
||||
inline MsrMod msrMod() const override { return m_msrMod; }
|
||||
|
@ -63,6 +64,7 @@ private:
|
|||
Assembly m_assembly = Assembly::NONE;
|
||||
bool m_aes = false;
|
||||
const bool m_avx2 = false;
|
||||
const bool m_bmi2 = false;
|
||||
const bool m_pdpe1gb = false;
|
||||
MsrMod m_msrMod = MSR_MOD_NONE;
|
||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||
|
|
|
@ -721,14 +721,31 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
|
||||
emit(REX_MOV_RR64, p, pos);
|
||||
emitByte(0xc0 + instr.dst, p, pos);
|
||||
emitByte(0xc0 + dst, p, pos);
|
||||
emit(REX_MUL_R, p, pos);
|
||||
emitByte(0xe0 + instr.src, p, pos);
|
||||
emit(REX_MOV_R64R, p, pos);
|
||||
emitByte(0xc2 + 8 * instr.dst, p, pos);
|
||||
emitByte(0xc2 + 8 * dst, p, pos);
|
||||
|
||||
registerUsage[instr.dst] = pos;
|
||||
registerUsage[dst] = pos;
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_R_BMI2(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
||||
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
||||
pos += 8;
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
|
@ -756,6 +773,29 @@ namespace randomx {
|
|||
codePos = pos;
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_M_BMI2(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, p, pos);
|
||||
*(uint32_t*)(p + pos) = static_cast<uint32_t>(0xC4D08B49 + (dst << 16));
|
||||
*(uint64_t*)(p + pos + 4) = 0x0E04F6FB62ULL + (dst << 27);
|
||||
pos += 9;
|
||||
}
|
||||
else {
|
||||
*(uint64_t*)(p + pos) = 0x86F6FB62C4D08B49ULL + (dst << 16) + (dst << 59);
|
||||
*(uint32_t*)(p + pos + 8) = instr.getImm32() & ScratchpadL3Mask;
|
||||
pos += 12;
|
||||
}
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
codePos = pos;
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_R(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
|
|
@ -123,7 +123,9 @@ namespace randomx {
|
|||
void h_IMUL_R(const Instruction&);
|
||||
void h_IMUL_M(const Instruction&);
|
||||
void h_IMULH_R(const Instruction&);
|
||||
void h_IMULH_R_BMI2(const Instruction&);
|
||||
void h_IMULH_M(const Instruction&);
|
||||
void h_IMULH_M_BMI2(const Instruction&);
|
||||
void h_ISMULH_R(const Instruction&);
|
||||
void h_ISMULH_M(const Instruction&);
|
||||
void h_IMUL_RCP(const Instruction&);
|
||||
|
|
|
@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "crypto/randomx/jit_compiler_a64_static.hpp"
|
||||
#endif
|
||||
|
||||
#include "backend/cpu/Cpu.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
|
@ -235,14 +237,29 @@ void RandomX_ConfigurationBase::Apply()
|
|||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
||||
|
||||
#define INST_HANDLE2(x, func_name, prev) \
|
||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); }
|
||||
|
||||
INST_HANDLE(IADD_RS, NULL);
|
||||
INST_HANDLE(IADD_M, IADD_RS);
|
||||
INST_HANDLE(ISUB_R, IADD_M);
|
||||
INST_HANDLE(ISUB_M, ISUB_R);
|
||||
INST_HANDLE(IMUL_R, ISUB_M);
|
||||
INST_HANDLE(IMUL_M, IMUL_R);
|
||||
INST_HANDLE(IMULH_R, IMUL_M);
|
||||
INST_HANDLE(IMULH_M, IMULH_R);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
|
||||
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
INST_HANDLE(IMULH_R, IMUL_M);
|
||||
INST_HANDLE(IMULH_M, IMULH_R);
|
||||
}
|
||||
|
||||
INST_HANDLE(ISMULH_R, IMULH_M);
|
||||
INST_HANDLE(ISMULH_M, ISMULH_R);
|
||||
INST_HANDLE(IMUL_RCP, ISMULH_M);
|
||||
|
|
Loading…
Reference in a new issue