RandomX JIT refactoring

- Smaller memory footprint
- A bit faster overall
This commit is contained in:
SChernykh 2020-04-09 14:24:54 +02:00
parent 92810ad761
commit abb3340cc7
9 changed files with 374 additions and 428 deletions

View file

@ -93,7 +93,7 @@ template<size_t N>
xmrig::CpuWorker<N>::~CpuWorker()
{
# ifdef XMRIG_ALGO_RANDOMX
delete m_vm;
RxVm::Destroy(m_vm);
# endif
CnCtx::release(m_ctx, N);
@ -118,7 +118,7 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
}
if (!m_vm) {
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node);
m_vm = RxVm::Create(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node);
}
}
#endif
@ -249,14 +249,14 @@ void xmrig::CpuWorker<N>::start()
if (job.algorithm().family() == Algorithm::RANDOM_X) {
if (first) {
first = false;
randomx_calculate_hash_first(m_vm->get(), tempHash, m_job.blob(), job.size());
randomx_calculate_hash_first(m_vm, tempHash, m_job.blob(), job.size());
}
if (!nextRound(m_job)) {
break;
}
randomx_calculate_hash_next(m_vm->get(), tempHash, m_job.blob(), job.size(), m_hash);
randomx_calculate_hash_next(m_vm, tempHash, m_job.blob(), job.size(), m_hash);
}
else
# endif

View file

@ -34,6 +34,11 @@
#include "net/JobResult.h"
#ifdef XMRIG_ALGO_RANDOMX
class randomx_vm;
#endif
namespace xmrig {
@ -82,7 +87,7 @@ private:
WorkerJob<N> m_job;
# ifdef XMRIG_ALGO_RANDOMX
RxVm *m_vm = nullptr;
randomx_vm *m_vm = nullptr;
# endif
};

View file

@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] = {
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
static const unsigned int blake2b_sigma[12][16] = {
static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},

File diff suppressed because it is too large Load diff

View file

@ -67,52 +67,55 @@ namespace randomx {
size_t getCodeSize();
alignas(64) static InstructionGeneratorX86 engine[256];
int registerUsage[RegistersCount];
uint8_t* allocatedCode;
uint8_t* code;
uint32_t codePos;
uint32_t codePosFirst;
uint32_t vm_flags;
# ifdef XMRIG_FIX_RYZEN
std::pair<const void*, const void*> mainLoopBounds;
# endif
int32_t codePos;
int32_t codePosFirst;
uint32_t vm_flags;
static bool BranchesWithin32B;
bool BranchesWithin32B = false;
bool hasAVX;
bool hasXOP;
static void applyTweaks();
uint8_t* allocatedCode;
void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
template<bool rax>
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos);
static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos);
static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos);
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
static void emitByte(uint8_t val, uint8_t* code, int& codePos) {
static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) {
code[codePos] = val;
++codePos;
}
static void emit32(uint32_t val, uint8_t* code, int& codePos) {
static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val;
}
static void emit64(uint64_t val, uint8_t* code, int& codePos) {
static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val;
}
template<size_t N>
static void emit(const uint8_t (&src)[N], uint8_t* code, int& codePos) {
static void emit(const uint8_t (&src)[N], uint8_t* code, uint32_t& codePos) {
emit(src, N, code, codePos);
}
static void emit(const uint8_t* src, size_t count, uint8_t* code, int& codePos) {
static void emit(const uint8_t* src, size_t count, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, src, count);
codePos += count;
}

View file

@ -119,9 +119,9 @@ struct RandomX_ConfigurationBase
rx_vec_i128 fillAes4Rx4_Key[8];
uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[256];
uint8_t codeReadDatasetTweaked[64];
uint32_t codeReadDatasetTweakedSize;
uint8_t codeReadDatasetRyzenTweaked[256];
uint8_t codeReadDatasetRyzenTweaked[76];
uint32_t codeReadDatasetRyzenTweakedSize;
uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codePrefetchScratchpadTweaked[32];

View file

@ -31,18 +31,20 @@
#include "crypto/rx/RxVm.h"
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
randomx_vm* xmrig::RxVm::Create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
{
int flags = 0;
if (!softAes) {
m_flags |= RANDOMX_FLAG_HARD_AES;
flags |= RANDOMX_FLAG_HARD_AES;
}
if (dataset->get()) {
m_flags |= RANDOMX_FLAG_FULL_MEM;
flags |= RANDOMX_FLAG_FULL_MEM;
}
if (!dataset->cache() || dataset->cache()->isJIT()) {
m_flags |= RANDOMX_FLAG_JIT;
flags |= RANDOMX_FLAG_JIT;
}
if (assembly == Assembly::AUTO) {
@ -50,16 +52,16 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::
}
if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) {
m_flags |= RANDOMX_FLAG_AMD;
flags |= RANDOMX_FLAG_AMD;
}
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
}
xmrig::RxVm::~RxVm()
void xmrig::RxVm::Destroy(randomx_vm* vm)
{
if (m_vm) {
randomx_destroy_vm(m_vm);
if (vm) {
randomx_destroy_vm(vm);
}
}

View file

@ -50,14 +50,8 @@ class RxVm
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node);
~RxVm();
inline randomx_vm *get() const { return m_vm; }
private:
int m_flags = 0;
randomx_vm *m_vm = nullptr;
static randomx_vm* Create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node);
static void Destroy(randomx_vm* vm);
};

View file

@ -116,17 +116,17 @@ static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint3
return;
}
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, 0);
auto vm = RxVm::Create(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, 0);
for (uint32_t nonce : bundle.nonces) {
*bundle.job.nonce() = nonce;
randomx_calculate_hash(vm->get(), bundle.job.blob(), bundle.job.size(), hash);
randomx_calculate_hash(vm, bundle.job.blob(), bundle.job.size(), hash);
checkHash(bundle, results, nonce, hash, errors);
}
delete vm;
RxVm::Destroy(vm);
# endif
}
else if (algorithm.family() == Algorithm::ARGON2) {