RandomX JIT refactoring

- Smaller memory footprint
- A bit faster overall
This commit is contained in:
SChernykh 2020-04-09 14:24:54 +02:00
parent 92810ad761
commit abb3340cc7
9 changed files with 374 additions and 428 deletions

View file

@ -93,7 +93,7 @@ template<size_t N>
xmrig::CpuWorker<N>::~CpuWorker() xmrig::CpuWorker<N>::~CpuWorker()
{ {
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
delete m_vm; RxVm::Destroy(m_vm);
# endif # endif
CnCtx::release(m_ctx, N); CnCtx::release(m_ctx, N);
@ -118,7 +118,7 @@ void xmrig::CpuWorker<N>::allocateRandomX_VM()
} }
if (!m_vm) { if (!m_vm) {
m_vm = new RxVm(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node); m_vm = RxVm::Create(dataset, m_memory->scratchpad(), !m_hwAES, m_assembly, m_node);
} }
} }
#endif #endif
@ -249,14 +249,14 @@ void xmrig::CpuWorker<N>::start()
if (job.algorithm().family() == Algorithm::RANDOM_X) { if (job.algorithm().family() == Algorithm::RANDOM_X) {
if (first) { if (first) {
first = false; first = false;
randomx_calculate_hash_first(m_vm->get(), tempHash, m_job.blob(), job.size()); randomx_calculate_hash_first(m_vm, tempHash, m_job.blob(), job.size());
} }
if (!nextRound(m_job)) { if (!nextRound(m_job)) {
break; break;
} }
randomx_calculate_hash_next(m_vm->get(), tempHash, m_job.blob(), job.size(), m_hash); randomx_calculate_hash_next(m_vm, tempHash, m_job.blob(), job.size(), m_hash);
} }
else else
# endif # endif

View file

@ -34,6 +34,11 @@
#include "net/JobResult.h" #include "net/JobResult.h"
#ifdef XMRIG_ALGO_RANDOMX
class randomx_vm;
#endif
namespace xmrig { namespace xmrig {
@ -82,7 +87,7 @@ private:
WorkerJob<N> m_job; WorkerJob<N> m_job;
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
RxVm *m_vm = nullptr; randomx_vm *m_vm = nullptr;
# endif # endif
}; };

View file

@ -45,7 +45,7 @@ static const uint64_t blake2b_IV[8] = {
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
static const unsigned int blake2b_sigma[12][16] = { static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},

File diff suppressed because it is too large Load diff

View file

@ -67,52 +67,55 @@ namespace randomx {
size_t getCodeSize(); size_t getCodeSize();
alignas(64) static InstructionGeneratorX86 engine[256]; alignas(64) static InstructionGeneratorX86 engine[256];
int registerUsage[RegistersCount]; int registerUsage[RegistersCount];
uint8_t* allocatedCode;
uint8_t* code; uint8_t* code;
uint32_t codePos;
uint32_t codePosFirst;
uint32_t vm_flags;
# ifdef XMRIG_FIX_RYZEN # ifdef XMRIG_FIX_RYZEN
std::pair<const void*, const void*> mainLoopBounds; std::pair<const void*, const void*> mainLoopBounds;
# endif # endif
int32_t codePos;
int32_t codePosFirst;
uint32_t vm_flags;
static bool BranchesWithin32B; bool BranchesWithin32B = false;
bool hasAVX; bool hasAVX;
bool hasXOP; bool hasXOP;
static void applyTweaks(); uint8_t* allocatedCode;
void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&);
template<bool rax> template<bool rax>
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos); static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos);
static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos); static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos); static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos); static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos);
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &); void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
static void emitByte(uint8_t val, uint8_t* code, int& codePos) { static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) {
code[codePos] = val; code[codePos] = val;
++codePos; ++codePos;
} }
static void emit32(uint32_t val, uint8_t* code, int& codePos) { static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, &val, sizeof val); memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val; codePos += sizeof val;
} }
static void emit64(uint64_t val, uint8_t* code, int& codePos) { static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, &val, sizeof val); memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val; codePos += sizeof val;
} }
template<size_t N> template<size_t N>
static void emit(const uint8_t (&src)[N], uint8_t* code, int& codePos) { static void emit(const uint8_t (&src)[N], uint8_t* code, uint32_t& codePos) {
emit(src, N, code, codePos); emit(src, N, code, codePos);
} }
static void emit(const uint8_t* src, size_t count, uint8_t* code, int& codePos) { static void emit(const uint8_t* src, size_t count, uint8_t* code, uint32_t& codePos) {
memcpy(code + codePos, src, count); memcpy(code + codePos, src, count);
codePos += count; codePos += count;
} }

View file

@ -119,9 +119,9 @@ struct RandomX_ConfigurationBase
rx_vec_i128 fillAes4Rx4_Key[8]; rx_vec_i128 fillAes4Rx4_Key[8];
uint8_t codeShhPrefetchTweaked[20]; uint8_t codeShhPrefetchTweaked[20];
uint8_t codeReadDatasetTweaked[256]; uint8_t codeReadDatasetTweaked[64];
uint32_t codeReadDatasetTweakedSize; uint32_t codeReadDatasetTweakedSize;
uint8_t codeReadDatasetRyzenTweaked[256]; uint8_t codeReadDatasetRyzenTweaked[76];
uint32_t codeReadDatasetRyzenTweakedSize; uint32_t codeReadDatasetRyzenTweakedSize;
uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codePrefetchScratchpadTweaked[32]; uint8_t codePrefetchScratchpadTweaked[32];

View file

@ -31,18 +31,20 @@
#include "crypto/rx/RxVm.h" #include "crypto/rx/RxVm.h"
xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node) randomx_vm* xmrig::RxVm::Create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
{ {
int flags = 0;
if (!softAes) { if (!softAes) {
m_flags |= RANDOMX_FLAG_HARD_AES; flags |= RANDOMX_FLAG_HARD_AES;
} }
if (dataset->get()) { if (dataset->get()) {
m_flags |= RANDOMX_FLAG_FULL_MEM; flags |= RANDOMX_FLAG_FULL_MEM;
} }
if (!dataset->cache() || dataset->cache()->isJIT()) { if (!dataset->cache() || dataset->cache()->isJIT()) {
m_flags |= RANDOMX_FLAG_JIT; flags |= RANDOMX_FLAG_JIT;
} }
if (assembly == Assembly::AUTO) { if (assembly == Assembly::AUTO) {
@ -50,16 +52,16 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::
} }
if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) { if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) {
m_flags |= RANDOMX_FLAG_AMD; flags |= RANDOMX_FLAG_AMD;
} }
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node); return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
} }
xmrig::RxVm::~RxVm() void xmrig::RxVm::Destroy(randomx_vm* vm)
{ {
if (m_vm) { if (vm) {
randomx_destroy_vm(m_vm); randomx_destroy_vm(vm);
} }
} }

View file

@ -50,14 +50,8 @@ class RxVm
public: public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm); XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxVm);
RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node); static randomx_vm* Create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node);
~RxVm(); static void Destroy(randomx_vm* vm);
inline randomx_vm *get() const { return m_vm; }
private:
int m_flags = 0;
randomx_vm *m_vm = nullptr;
}; };

View file

@ -116,17 +116,17 @@ static void getResults(JobBundle &bundle, std::vector<JobResult> &results, uint3
return; return;
} }
auto vm = new RxVm(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, 0); auto vm = RxVm::Create(dataset, memory->scratchpad(), !hwAES, Assembly::NONE, 0);
for (uint32_t nonce : bundle.nonces) { for (uint32_t nonce : bundle.nonces) {
*bundle.job.nonce() = nonce; *bundle.job.nonce() = nonce;
randomx_calculate_hash(vm->get(), bundle.job.blob(), bundle.job.size(), hash); randomx_calculate_hash(vm, bundle.job.blob(), bundle.job.size(), hash);
checkHash(bundle, results, nonce, hash, errors); checkHash(bundle, results, nonce, hash, errors);
} }
delete vm; RxVm::Destroy(vm);
# endif # endif
} }
else if (algorithm.family() == Algorithm::ARGON2) { else if (algorithm.family() == Algorithm::ARGON2) {