mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-22 19:49:36 +00:00
Merge pull request #3271 from SChernykh/opt_genprog
RandomX: optimized program generation
This commit is contained in:
commit
5891f1f06b
4 changed files with 113 additions and 73 deletions
|
@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "base/tools/Chrono.h"
|
#include "base/tools/Chrono.h"
|
||||||
#include "crypto/randomx/randomx.h"
|
#include "crypto/randomx/randomx.h"
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/soft_aes.h"
|
||||||
|
#include "crypto/randomx/instruction.hpp"
|
||||||
|
#include "crypto/randomx/common.hpp"
|
||||||
#include "crypto/rx/Profiler.h"
|
#include "crypto/rx/Profiler.h"
|
||||||
|
|
||||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||||
|
@ -165,6 +167,17 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
|
static const rx_vec_i128 inst_mask = []() {
|
||||||
|
constexpr randomx::Instruction inst{ 0xFF, randomx::RegistersCount - 1, randomx::RegistersCount - 1, 0xFF, 0xFFFFFFFFU };
|
||||||
|
|
||||||
|
union {
|
||||||
|
randomx::Instruction mask[2];
|
||||||
|
rx_vec_i128 vec;
|
||||||
|
} result = { inst, inst };
|
||||||
|
|
||||||
|
return result.vec;
|
||||||
|
}();
|
||||||
|
|
||||||
template<int softAes>
|
template<int softAes>
|
||||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
const uint8_t* outptr = (uint8_t*)buffer;
|
const uint8_t* outptr = (uint8_t*)buffer;
|
||||||
|
@ -187,32 +200,41 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
||||||
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
||||||
|
|
||||||
while (outptr < outputEnd) {
|
#define TRANSFORM do { \
|
||||||
state0 = aesdec<softAes>(state0, key0);
|
state0 = aesdec<softAes>(state0, key0); \
|
||||||
state1 = aesenc<softAes>(state1, key0);
|
state1 = aesenc<softAes>(state1, key0); \
|
||||||
state2 = aesdec<softAes>(state2, key4);
|
state2 = aesdec<softAes>(state2, key4); \
|
||||||
state3 = aesenc<softAes>(state3, key4);
|
state3 = aesenc<softAes>(state3, key4); \
|
||||||
|
state0 = aesdec<softAes>(state0, key1); \
|
||||||
state0 = aesdec<softAes>(state0, key1);
|
state1 = aesenc<softAes>(state1, key1); \
|
||||||
state1 = aesenc<softAes>(state1, key1);
|
state2 = aesdec<softAes>(state2, key5); \
|
||||||
state2 = aesdec<softAes>(state2, key5);
|
state3 = aesenc<softAes>(state3, key5); \
|
||||||
state3 = aesenc<softAes>(state3, key5);
|
state0 = aesdec<softAes>(state0, key2); \
|
||||||
|
state1 = aesenc<softAes>(state1, key2); \
|
||||||
state0 = aesdec<softAes>(state0, key2);
|
state2 = aesdec<softAes>(state2, key6); \
|
||||||
state1 = aesenc<softAes>(state1, key2);
|
state3 = aesenc<softAes>(state3, key6); \
|
||||||
state2 = aesdec<softAes>(state2, key6);
|
state0 = aesdec<softAes>(state0, key3); \
|
||||||
state3 = aesenc<softAes>(state3, key6);
|
state1 = aesenc<softAes>(state1, key3); \
|
||||||
|
state2 = aesdec<softAes>(state2, key7); \
|
||||||
state0 = aesdec<softAes>(state0, key3);
|
state3 = aesenc<softAes>(state3, key7); \
|
||||||
state1 = aesenc<softAes>(state1, key3);
|
} while (0)
|
||||||
state2 = aesdec<softAes>(state2, key7);
|
|
||||||
state3 = aesenc<softAes>(state3, key7);
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; ++i, outptr += 64) {
|
||||||
|
TRANSFORM;
|
||||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||||
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
||||||
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
||||||
|
}
|
||||||
|
|
||||||
|
const rx_vec_i128 mask = inst_mask;
|
||||||
|
|
||||||
|
while (outptr < outputEnd) {
|
||||||
|
TRANSFORM;
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)outptr + 0, rx_and_vec_i128(state0, mask));
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)outptr + 1, rx_and_vec_i128(state1, mask));
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)outptr + 2, rx_and_vec_i128(state2, mask));
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)outptr + 3, rx_and_vec_i128(state3, mask));
|
||||||
outptr += 64;
|
outptr += 64;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,6 +126,7 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||||
|
|
||||||
#define rx_xor_vec_f128 _mm_xor_pd
|
#define rx_xor_vec_f128 _mm_xor_pd
|
||||||
#define rx_and_vec_f128 _mm_and_pd
|
#define rx_and_vec_f128 _mm_and_pd
|
||||||
|
#define rx_and_vec_i128 _mm_and_si128
|
||||||
#define rx_or_vec_f128 _mm_or_pd
|
#define rx_or_vec_f128 _mm_or_pd
|
||||||
|
|
||||||
#ifdef __AES__
|
#ifdef __AES__
|
||||||
|
@ -278,6 +279,10 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
return (rx_vec_f128)vec_and(a,b);
|
return (rx_vec_f128)vec_and(a,b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
|
||||||
|
return (rx_vec_i128)vec_and(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
return (rx_vec_f128)vec_or(a,b);
|
return (rx_vec_f128)vec_or(a,b);
|
||||||
}
|
}
|
||||||
|
@ -444,6 +449,8 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define rx_and_vec_i128 vandq_u8
|
||||||
|
|
||||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||||
}
|
}
|
||||||
|
@ -635,6 +642,13 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
|
||||||
|
rx_vec_i128 x;
|
||||||
|
x.u64[0] = a.u64[0] & b.u64[0];
|
||||||
|
x.u64[1] = a.u64[1] & b.u64[1];
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||||
rx_vec_f128 x;
|
rx_vec_f128 x;
|
||||||
x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
|
x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
|
||||||
|
|
|
@ -144,8 +144,6 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
||||||
for (uint32_t i = 0; i < program.getSize(); ++i)
|
for (uint32_t i = 0; i < program.getSize(); ++i)
|
||||||
{
|
{
|
||||||
Instruction& instr = program(i);
|
Instruction& instr = program(i);
|
||||||
instr.src %= RegistersCount;
|
|
||||||
instr.dst %= RegistersCount;
|
|
||||||
(this->*engine[instr.opcode])(instr, codePos);
|
(this->*engine[instr.opcode])(instr, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,8 +202,6 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
||||||
for (uint32_t i = 0; i < program.getSize(); ++i)
|
for (uint32_t i = 0; i < program.getSize(); ++i)
|
||||||
{
|
{
|
||||||
Instruction& instr = program(i);
|
Instruction& instr = program(i);
|
||||||
instr.src %= RegistersCount;
|
|
||||||
instr.dst %= RegistersCount;
|
|
||||||
(this->*engine[instr.opcode])(instr, codePos);
|
(this->*engine[instr.opcode])(instr, codePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -312,11 +312,19 @@ namespace randomx {
|
||||||
freePagedMemory(allocatedCode, allocatedSize);
|
freePagedMemory(allocatedCode, allocatedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<size_t N>
|
||||||
|
static FORCE_INLINE void prefetch_data(const void* data) {
|
||||||
|
rx_prefetch_nta(data);
|
||||||
|
prefetch_data<N - 1>(reinterpret_cast<const char*>(data) + 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> FORCE_INLINE void prefetch_data<0>(const void*) {}
|
||||||
|
|
||||||
|
template<typename T> static FORCE_INLINE void prefetch_data(const T& data) { prefetch_data<(sizeof(T) + 63) / 64>(&data); }
|
||||||
|
|
||||||
void JitCompilerX86::prepare() {
|
void JitCompilerX86::prepare() {
|
||||||
for (size_t i = 0; i < sizeof(engine); i += 64)
|
prefetch_data(engine);
|
||||||
rx_prefetch_nta((const char*)(&engine) + i);
|
prefetch_data(RandomX_CurrentConfig);
|
||||||
for (size_t i = 0; i < sizeof(RandomX_CurrentConfig); i += 64)
|
|
||||||
rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||||
|
@ -748,7 +756,7 @@ namespace randomx {
|
||||||
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||||
|
|
||||||
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
||||||
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
|
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
||||||
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||||
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||||
|
|
||||||
|
@ -768,8 +776,8 @@ namespace randomx {
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
uint8_t* const p = code + pos;
|
uint8_t* const p = code + pos;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
|
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
|
||||||
|
|
||||||
uint32_t k = 0x048d4f + (dst << 19);
|
uint32_t k = 0x048d4f + (dst << 19);
|
||||||
if (dst == RegisterNeedsDisplacement)
|
if (dst == RegisterNeedsDisplacement)
|
||||||
|
@ -788,8 +796,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -809,8 +817,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
||||||
|
@ -830,8 +838,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -851,8 +859,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
||||||
|
@ -871,8 +879,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -892,8 +900,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
||||||
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
||||||
|
@ -908,8 +916,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
||||||
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
||||||
|
@ -923,8 +931,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -947,8 +955,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -970,8 +978,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
||||||
pos += 8;
|
pos += 8;
|
||||||
|
@ -985,8 +993,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -1011,7 +1019,7 @@ namespace randomx {
|
||||||
|
|
||||||
uint64_t divisor = instr.getImm32();
|
uint64_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
||||||
if (imul_rcp_storage_used < 16) {
|
if (imul_rcp_storage_used < 16) {
|
||||||
|
@ -1040,7 +1048,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
||||||
pos += 3;
|
pos += 3;
|
||||||
|
|
||||||
|
@ -1052,8 +1060,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
||||||
|
@ -1073,8 +1081,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1094,8 +1102,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
||||||
|
@ -1115,8 +1123,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
||||||
|
@ -1136,8 +1144,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegistersCount;
|
const uint32_t dst = instr.dst;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
||||||
|
@ -1153,7 +1161,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegistersCount;
|
const uint64_t dst = instr.dst;
|
||||||
|
|
||||||
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
||||||
pos += 5;
|
pos += 5;
|
||||||
|
@ -1182,7 +1190,7 @@ namespace randomx {
|
||||||
|
|
||||||
prevFPOperation = pos;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1214,7 +1222,7 @@ namespace randomx {
|
||||||
|
|
||||||
prevFPOperation = pos;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1257,7 +1265,7 @@ namespace randomx {
|
||||||
|
|
||||||
prevFPOperation = pos;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1307,7 +1315,7 @@ namespace randomx {
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
prevCFROUND = pos;
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
||||||
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||||
|
@ -1343,7 +1351,7 @@ namespace randomx {
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
prevCFROUND = pos;
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src;
|
||||||
|
|
||||||
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||||
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
||||||
|
@ -1367,7 +1375,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const int reg = instr.dst % RegistersCount;
|
const int reg = instr.dst;
|
||||||
int32_t jmp_offset = registerUsage[reg];
|
int32_t jmp_offset = registerUsage[reg];
|
||||||
|
|
||||||
// if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
|
// if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
|
||||||
|
@ -1426,7 +1434,7 @@ namespace randomx {
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
genAddressRegDst(instr, p, pos);
|
genAddressRegDst(instr, p, pos);
|
||||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
|
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue