Merge pull request #1220 from SChernykh/evo

Refactored JIT compiler for x86, small RandomX speedup
This commit is contained in:
xmrig 2019-10-06 08:16:40 +07:00 committed by GitHub
commit 91498ec690
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 123 additions and 118 deletions

View file

@ -268,8 +268,6 @@ namespace randomx {
}
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
memset(registerUsage, -1, sizeof(registerUsage));
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + 2] = 0xc0 + pcfg.readReg0;
code[codePos + 5] = 0xc0 + pcfg.readReg1;
@ -280,13 +278,21 @@ namespace randomx {
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
instructionOffsets[i] = codePos;
(this->*(engine[instr.opcode]))(instr, i);
//mark all registers as used
uint64_t* r = (uint64_t*)registerUsage;
uint64_t k = codePos;
k |= k << 32;
for (unsigned j = 0; j < RegistersCount / 2; ++j) {
r[j] = k;
}
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; ++i) {
Instruction instr = prog(i);
*((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
(this->*(engine[instr.opcode]))(instr);
}
emit(REX_MOV_RR, code, codePos);
emitByte(0xc0 + pcfg.readReg2, code, codePos);
emit(REX_XOR_EAX, code, codePos);
@ -402,7 +408,7 @@ namespace randomx {
}
}
void JitCompilerX86::genAddressReg(Instruction& instr, uint8_t* code, int& codePos, bool rax) {
void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos, bool rax) {
emit(LEA_32, code, codePos);
emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos);
if (instr.src == RegisterNeedsSib) {
@ -416,7 +422,7 @@ namespace randomx {
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
}
void JitCompilerX86::genAddressRegDst(Instruction& instr, uint8_t* code, int& codePos) {
void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
emit(LEA_32, code, codePos);
emitByte(0x80 + instr.dst, code, codePos);
if (instr.dst == RegisterNeedsSib) {
@ -432,7 +438,7 @@ namespace randomx {
}
}
void JitCompilerX86::genAddressImm(Instruction& instr, uint8_t* code, int& codePos) {
void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
}
@ -447,17 +453,18 @@ namespace randomx {
0x3c8d4f,
};
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
void JitCompilerX86::h_IADD_RS(const Instruction& instr) {
int pos = codePos;
uint8_t* const p = code + pos;
registerUsage[instr.dst] = i;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
*(uint32_t*)(p + 4) = instr.getImm32();
codePos = pos + ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
registerUsage[instr.dst] = pos;
codePos = pos;
}
static const uint32_t template_IADD_M[8] = {
@ -471,11 +478,10 @@ namespace randomx {
0x063c034c,
};
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
void JitCompilerX86::h_IADD_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos);
emit32(template_IADD_M[instr.dst], p, pos);
@ -486,6 +492,7 @@ namespace randomx {
genAddressImm(instr, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
@ -493,11 +500,10 @@ namespace randomx {
emitByte((scale << 6) | (index << 3) | base, code, codePos);
}
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
void JitCompilerX86::h_ISUB_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_SUB_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -508,14 +514,14 @@ namespace randomx {
emit32(instr.getImm32(), p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
void JitCompilerX86::h_ISUB_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos);
emit(REX_SUB_RM, p, pos);
@ -528,14 +534,14 @@ namespace randomx {
genAddressImm(instr, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
void JitCompilerX86::h_IMUL_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_IMUL_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -546,14 +552,14 @@ namespace randomx {
emit32(instr.getImm32(), p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
void JitCompilerX86::h_IMUL_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos);
emit(REX_IMUL_RM, p, pos);
@ -566,14 +572,14 @@ namespace randomx {
genAddressImm(instr, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
void JitCompilerX86::h_IMULH_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos);
emit(REX_MUL_R, p, pos);
@ -581,14 +587,14 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
void JitCompilerX86::h_IMULH_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos, false);
emit(REX_MOV_RR64, p, pos);
@ -605,14 +611,14 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
void JitCompilerX86::h_ISMULH_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos);
emit(REX_MUL_R, p, pos);
@ -620,14 +626,14 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
void JitCompilerX86::h_ISMULH_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos, false);
emit(REX_MOV_RR64, p, pos);
@ -644,41 +650,41 @@ namespace randomx {
emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
void JitCompilerX86::h_IMUL_RCP(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I, p, pos);
emit64(randomx_reciprocal_fast(divisor), p, pos);
emit(REX_IMUL_RM, p, pos);
emitByte(0xc0 + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
}
codePos = pos;
}
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
void JitCompilerX86::h_INEG_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
emit(REX_NEG, p, pos);
emitByte(0xd8 + instr.dst, p, pos);
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
void JitCompilerX86::h_IXOR_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_XOR_RR, p, pos);
emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos);
@ -689,14 +695,14 @@ namespace randomx {
emit32(instr.getImm32(), p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
void JitCompilerX86::h_IXOR_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, p, pos);
emit(REX_XOR_RM, p, pos);
@ -709,14 +715,14 @@ namespace randomx {
genAddressImm(instr, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
void JitCompilerX86::h_IROR_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR, p, pos);
emitByte(0xc8 + instr.src, p, pos);
@ -729,14 +735,14 @@ namespace randomx {
emitByte(instr.getImm32() & 63, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
void JitCompilerX86::h_IROL_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR, p, pos);
emitByte(0xc8 + instr.src, p, pos);
@ -749,24 +755,25 @@ namespace randomx {
emitByte(instr.getImm32() & 63, p, pos);
}
registerUsage[instr.dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
void JitCompilerX86::h_ISWAP_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
if (instr.src != instr.dst) {
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
emit(REX_XCHG, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
registerUsage[instr.dst] = pos;
registerUsage[instr.src] = pos;
}
codePos = pos;
}
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
void JitCompilerX86::h_FSWAP_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
@ -777,105 +784,105 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
void JitCompilerX86::h_FADD_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_ADDPD, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
emitByte(0xc0 + src + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
void JitCompilerX86::h_FADD_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_ADDPD, p, pos);
emitByte(0xc4 + 8 * instr.dst, p, pos);
emitByte(0xc4 + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
void JitCompilerX86::h_FSUB_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_SUBPD, p, pos);
emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos);
emitByte(0xc0 + src + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
void JitCompilerX86::h_FSUB_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_SUBPD, p, pos);
emitByte(0xc4 + 8 * instr.dst, p, pos);
emitByte(0xc4 + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
void JitCompilerX86::h_FSCAL_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
emit(REX_XORPS, p, pos);
emitByte(0xc7 + 8 * instr.dst, p, pos);
emitByte(0xc7 + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
void JitCompilerX86::h_FMUL_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
const uint32_t src = instr.src % RegisterCountFlt;
emit(REX_MULPD, p, pos);
emitByte(0xe0 + instr.src + 8 * instr.dst, p, pos);
emitByte(0xe0 + src + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
void JitCompilerX86::h_FDIV_M(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos);
emit(REX_ANDPS_XMM12, p, pos);
emit(REX_DIVPD, p, pos);
emitByte(0xe4 + 8 * instr.dst, p, pos);
emitByte(0xe4 + 8 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
void JitCompilerX86::h_FSQRT_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
instr.dst %= RegisterCountFlt;
const uint32_t dst = instr.dst % RegisterCountFlt;
emit(SQRTPD, p, pos);
emitByte(0xe4 + 9 * instr.dst, p, pos);
emitByte(0xe4 + 9 * dst, p, pos);
codePos = pos;
}
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
@ -891,12 +898,11 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
int reg = instr.dst;
int target = registerUsage[reg] + 1;
emit(REX_ADD_I, p, pos);
emitByte(0xc0 + reg, p, pos);
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
@ -908,10 +914,10 @@ namespace randomx {
emitByte(0xc0 + reg, p, pos);
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
emit(JZ, p, pos);
emit32(instructionOffsets[target] - (pos + 4), p, pos);
emit32(registerUsage[reg] - (pos + 4), p, pos);
//mark all registers as used
uint64_t* r = (uint64_t*) registerUsage;
uint64_t k = i;
uint64_t k = pos;
k |= k << 32;
for (unsigned j = 0; j < RegistersCount / 2; ++j) {
r[j] = k;
@ -920,7 +926,7 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
@ -932,7 +938,7 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
void JitCompilerX86::h_NOP(const Instruction& instr) {
emit(NOP1, code, codePos);
}

View file

@ -41,7 +41,7 @@ namespace randomx {
class JitCompilerX86;
class Instruction;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
constexpr uint32_t CodeSize = 64 * 1024;
@ -66,16 +66,15 @@ namespace randomx {
size_t getCodeSize();
static InstructionGeneratorX86 engine[256];
int32_t instructionOffsets[512];
int registerUsage[RegistersCount];
uint8_t* code;
int32_t codePos;
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
static void genAddressReg(Instruction&, uint8_t* code, int& codePos, bool rax = true);
static void genAddressRegDst(Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(Instruction&, uint8_t* code, int& codePos);
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos, bool rax = true);
static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
@ -105,36 +104,36 @@ namespace randomx {
codePos += count;
}
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_CBRANCH(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
void h_IADD_RS(const Instruction&);
void h_IADD_M(const Instruction&);
void h_ISUB_R(const Instruction&);
void h_ISUB_M(const Instruction&);
void h_IMUL_R(const Instruction&);
void h_IMUL_M(const Instruction&);
void h_IMULH_R(const Instruction&);
void h_IMULH_M(const Instruction&);
void h_ISMULH_R(const Instruction&);
void h_ISMULH_M(const Instruction&);
void h_IMUL_RCP(const Instruction&);
void h_INEG_R(const Instruction&);
void h_IXOR_R(const Instruction&);
void h_IXOR_M(const Instruction&);
void h_IROR_R(const Instruction&);
void h_IROL_R(const Instruction&);
void h_ISWAP_R(const Instruction&);
void h_FSWAP_R(const Instruction&);
void h_FADD_R(const Instruction&);
void h_FADD_M(const Instruction&);
void h_FSUB_R(const Instruction&);
void h_FSUB_M(const Instruction&);
void h_FSCAL_R(const Instruction&);
void h_FMUL_R(const Instruction&);
void h_FDIV_M(const Instruction&);
void h_FSQRT_R(const Instruction&);
void h_CBRANCH(const Instruction&);
void h_CFROUND(const Instruction&);
void h_ISTORE(const Instruction&);
void h_NOP(const Instruction&);
};
}