From 8d1168385a512ace7128942826cb203633bcc202 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 15 Sep 2020 20:48:27 +0200 Subject: [PATCH] RandomX: returned old soft AES impl and auto-select between the two --- src/crypto/randomx/aes_hash.cpp | 13 +-- src/crypto/randomx/aes_hash.hpp | 8 +- src/crypto/randomx/jit_compiler_x86.cpp | 98 ++++++++++----------- src/crypto/randomx/soft_aes.cpp | 43 +++++++++ src/crypto/randomx/soft_aes.h | 53 +++++++++-- src/crypto/randomx/virtual_machine.cpp | 26 ++++-- src/crypto/randomx/virtual_machine.hpp | 2 +- src/crypto/randomx/vm_compiled.cpp | 6 +- src/crypto/randomx/vm_compiled.hpp | 6 +- src/crypto/randomx/vm_compiled_light.cpp | 4 +- src/crypto/randomx/vm_compiled_light.hpp | 6 +- src/crypto/randomx/vm_interpreted.cpp | 10 +-- src/crypto/randomx/vm_interpreted.hpp | 6 +- src/crypto/randomx/vm_interpreted_light.cpp | 4 +- src/crypto/randomx/vm_interpreted_light.hpp | 6 +- src/crypto/rx/Rx.cpp | 4 + 16 files changed, 194 insertions(+), 101 deletions(-) diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index 571b4ca73..7c4b0c818 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Hashing throughput: >20 GiB/s per CPU core with hardware AES */ -template +template void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { const uint8_t* inptr = (uint8_t*)input; const uint8_t* inputEnd = inptr + inputSize; @@ -118,7 +118,7 @@ template void hashAes1Rx4(const void *input, size_t inputSize, void *hash) The modified state is written back to 'state' to allow multiple calls to this function. */ -template +template void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outputEnd = outptr + outputSize; @@ -159,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); -template +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outputEnd = outptr + outputSize; @@ -214,7 +214,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); -template +template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) { PROFILE_SCOPE(RandomX_AES); @@ -311,5 +311,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3); } -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); -template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); +template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); +template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); +template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); diff --git a/src/crypto/randomx/aes_hash.hpp b/src/crypto/randomx/aes_hash.hpp index 9f75f73ae..345ec8d99 100644 --- a/src/crypto/randomx/aes_hash.hpp +++ b/src/crypto/randomx/aes_hash.hpp @@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -template +template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); -template +template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); -template +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); -template +template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 09746b901..2eff9ab90 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -343,7 +343,6 @@ namespace randomx { r[j] = k; } - constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16); for (int i = 0, n = static_cast(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) { Instruction& instr1 = prog(i); Instruction& instr2 = prog(i + 1); @@ -355,16 +354,9 @@ namespace randomx { InstructionGeneratorX86 gen3 = engine[instr3.opcode]; InstructionGeneratorX86 gen4 = engine[instr4.opcode]; - *((uint64_t*)&instr1) &= instr_mask; (this->*gen1)(instr1); - - *((uint64_t*)&instr2) &= instr_mask; (this->*gen2)(instr2); - - *((uint64_t*)&instr3) &= instr_mask; (this->*gen3)(instr3); - - *((uint64_t*)&instr4) &= instr_mask; (this->*gen4)(instr4); } @@ -518,7 +510,7 @@ namespace randomx { template void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) { - const uint32_t dst = static_cast(instr.dst) << 16; + const uint32_t dst = static_cast(instr.dst % RegistersCount) << 16; *(uint32_t*)(code + codePos) = 0x24808d41 + dst; codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3; @@ -540,8 +532,8 @@ namespace randomx { uint32_t pos = codePos; uint8_t* const p = code + pos; - const uint32_t dst = instr.dst; - const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst; + const uint32_t dst = instr.dst % RegistersCount; + const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst; uint32_t k = 0x048d4f + (dst << 19); if (dst == RegisterNeedsDisplacement) @@ -560,8 +552,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -585,8 +577,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; if (src != dst) { *(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16); @@ -606,8 +598,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -627,8 +619,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; if (src != dst) { emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos); @@ -647,8 +639,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -668,8 +660,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; *(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16); *(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16); @@ -684,8 +676,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); @@ -699,8 +691,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -723,8 +715,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -746,8 +738,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; *(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40); pos += 8; @@ -761,8 +753,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -792,7 +784,7 @@ namespace randomx { emit64(randomx_reciprocal_fast(divisor), p, pos); - const uint32_t dst = instr.dst; + const uint32_t dst = instr.dst % RegistersCount; emit32(0xc0af0f4c + (dst << 27), p, pos); registerUsage[dst] = pos; @@ -805,7 +797,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t dst = instr.dst; + const uint32_t dst = instr.dst % RegistersCount; *(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16); pos += 3; @@ -817,8 +809,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { *(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16); @@ -838,8 +830,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { genAddressReg(instr, src, p, pos); @@ -859,8 +851,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { *(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40); @@ -880,8 +872,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; - const uint64_t dst = instr.dst; + const uint64_t src = instr.src % RegistersCount; + const uint64_t dst = instr.dst % RegistersCount; if (src != dst) { *(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40); @@ -901,8 +893,8 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; - const uint32_t dst = instr.dst; + const uint32_t src = instr.src % RegistersCount; + const uint32_t dst = instr.dst % RegistersCount; if (src != dst) { *(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16); @@ -918,7 +910,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t dst = instr.dst; + const uint64_t dst = instr.dst % RegistersCount; *(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24); pos += 5; @@ -943,7 +935,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; + const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -971,7 +963,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; + const uint32_t src = instr.src % RegistersCount; const uint32_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -1010,7 +1002,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; + const uint32_t src = instr.src % RegistersCount; const uint64_t dst = instr.dst % RegisterCountFlt; genAddressReg(instr, src, p, pos); @@ -1046,7 +1038,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint32_t src = instr.src; + const uint32_t src = instr.src % RegistersCount; *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); const int rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; @@ -1070,7 +1062,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const uint64_t src = instr.src; + const uint64_t src = instr.src % RegistersCount; const uint64_t rotate = (static_cast(instr.getImm32() & 63) - 2) & 63; *(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40); @@ -1093,7 +1085,7 @@ namespace randomx { uint8_t* const p = code; uint32_t pos = codePos; - const int reg = instr.dst; + const int reg = instr.dst % RegistersCount; int32_t jmp_offset = registerUsage[reg] - (pos + 16); if (BranchesWithin32B) { @@ -1140,7 +1132,7 @@ namespace randomx { uint32_t pos = codePos; genAddressRegDst(instr, p, pos); - emit32(0x0604894c + (static_cast(instr.src) << 19), p, pos); + emit32(0x0604894c + (static_cast(instr.src % RegistersCount) << 19), p, pos); codePos = pos; } diff --git a/src/crypto/randomx/soft_aes.cpp b/src/crypto/randomx/soft_aes.cpp index 04fb7ac0e..a205398c8 100644 --- a/src/crypto/randomx/soft_aes.cpp +++ b/src/crypto/randomx/soft_aes.cpp @@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "crypto/randomx/soft_aes.h" +#include "crypto/randomx/aes_hash.hpp" +#include "base/tools/Chrono.h" +#include alignas(64) uint32_t lutEnc0[256]; alignas(64) uint32_t lutEnc1[256]; @@ -117,3 +120,43 @@ static struct SAESInitializer } } } aes_initializer; + +static uint32_t softAESImpl = 1; + +uint32_t GetSoftAESImpl() +{ + return softAESImpl; +} + +void SelectSoftAESImpl() +{ + constexpr int test_length_ms = 100; + double speed[2]; + + for (int i = 0; i < 2; ++i) + { + std::vector scratchpad(10 * 1024); + uint8_t hash[64] = {}; + uint8_t state[64] = {}; + + uint64_t t1, t2; + + uint32_t count = 0; + t1 = xmrig::Chrono::highResolutionMSecs(); + do { + if (i == 0) { + hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state); + } + else { + hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state); + } + ++count; + + t2 = xmrig::Chrono::highResolutionMSecs(); + } while (t2 - t1 < test_length_ms); + + speed[i] = count * 1e3 / (t2 - t1); + } + + softAESImpl = (speed[0] > speed[1]) ? 1 : 2; +} diff --git a/src/crypto/randomx/soft_aes.h b/src/crypto/randomx/soft_aes.h index f4142aae2..d03a1a279 100644 --- a/src/crypto/randomx/soft_aes.h +++ b/src/crypto/randomx/soft_aes.h @@ -41,11 +41,14 @@ extern uint32_t lutDec1[256]; extern uint32_t lutDec2[256]; extern uint32_t lutDec3[256]; -template rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key); -template rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key); +uint32_t GetSoftAESImpl(); +void SelectSoftAESImpl(); + +template rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key); +template rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key); template<> -FORCE_INLINE rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) { +FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) { volatile uint8_t s[16]; memcpy((void*) s, &in, 16); @@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) { } template<> -FORCE_INLINE rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) { +FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) { volatile uint8_t s[16]; memcpy((void*) s, &in, 16); @@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) { } template<> -FORCE_INLINE rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) { +FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) { + uint32_t s0, s1, s2, s3; + + s0 = rx_vec_i128_w(in); + s1 = rx_vec_i128_z(in); + s2 = rx_vec_i128_y(in); + s3 = rx_vec_i128_x(in); + + rx_vec_i128 out = rx_set_int_vec_i128( + (lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]), + (lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]), + (lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]), + (lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24]) + ); + + return rx_xor_vec_i128(out, key); +} + +template<> +FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) { + uint32_t s0, s1, s2, s3; + + s0 = rx_vec_i128_w(in); + s1 = rx_vec_i128_z(in); + s2 = rx_vec_i128_y(in); + s3 = rx_vec_i128_x(in); + + rx_vec_i128 out = rx_set_int_vec_i128( + (lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]), + (lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]), + (lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]), + (lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24]) + ); + + return rx_xor_vec_i128(out, key); +} + +template<> +FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) { return rx_aesenc_vec_i128(in, key); } template<> -FORCE_INLINE rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) { +FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) { return rx_aesdec_vec_i128(in, key); } diff --git a/src/crypto/randomx/virtual_machine.cpp b/src/crypto/randomx/virtual_machine.cpp index f00213a30..3a2d675c4 100644 --- a/src/crypto/randomx/virtual_machine.cpp +++ b/src/crypto/randomx/virtual_machine.cpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/blake2/blake2.h" #include "crypto/randomx/intrin_portable.h" #include "crypto/randomx/allocator.hpp" +#include "crypto/randomx/soft_aes.h" #include "base/tools/Profiler.h" randomx_vm::~randomx_vm() { @@ -96,11 +97,11 @@ void randomx_vm::initialize() { namespace randomx { - template + template VmBase::~VmBase() { } - template + template void VmBase::setScratchpad(uint8_t *scratchpad) { if (datasetPtr == nullptr) { throw std::invalid_argument("Cache/Dataset not set"); @@ -109,24 +110,35 @@ namespace randomx { this->scratchpad = scratchpad; } - template + template void VmBase::getFinalResult(void* out) { hashAes1Rx4(scratchpad, ScratchpadSize, ®.a); rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile)); } - template + template void VmBase::hashAndFill(void* out, uint64_t (&fill_state)[8]) { - hashAndFillAes1Rx4(scratchpad, ScratchpadSize, ®.a, fill_state); + if (!softAes) { + hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state); + } + else { + if (GetSoftAESImpl() == 1) { + hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state); + } + else { + hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state); + } + } + rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile)); } - template + template void VmBase::initScratchpad(void* seed) { fillAes1Rx4(seed, ScratchpadSize, scratchpad); } - template + template void VmBase::generateProgram(void* seed) { PROFILE_SCOPE(RandomX_generate_program); fillAes4Rx4(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program); diff --git a/src/crypto/randomx/virtual_machine.hpp b/src/crypto/randomx/virtual_machine.hpp index a60e693ae..8d44a7f52 100644 --- a/src/crypto/randomx/virtual_machine.hpp +++ b/src/crypto/randomx/virtual_machine.hpp @@ -79,7 +79,7 @@ protected: namespace randomx { - template + template class VmBase : public randomx_vm { public: diff --git a/src/crypto/randomx/vm_compiled.cpp b/src/crypto/randomx/vm_compiled.cpp index a61797e85..c32034b38 100644 --- a/src/crypto/randomx/vm_compiled.cpp +++ b/src/crypto/randomx/vm_compiled.cpp @@ -35,12 +35,12 @@ namespace randomx { static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters"); static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile"); - template + template void CompiledVm::setDataset(randomx_dataset* dataset) { datasetPtr = dataset; } - template + template void CompiledVm::run(void* seed) { PROFILE_SCOPE(RandomX_run); @@ -52,7 +52,7 @@ namespace randomx { execute(); } - template + template void CompiledVm::execute() { PROFILE_SCOPE(RandomX_JIT_execute); diff --git a/src/crypto/randomx/vm_compiled.hpp b/src/crypto/randomx/vm_compiled.hpp index 22c269068..0e9c4eb8e 100644 --- a/src/crypto/randomx/vm_compiled.hpp +++ b/src/crypto/randomx/vm_compiled.hpp @@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template class CompiledVm : public VmBase { public: @@ -61,6 +61,6 @@ namespace randomx { JitCompiler compiler; }; - using CompiledVmDefault = CompiledVm; - using CompiledVmHardAes = CompiledVm; + using CompiledVmDefault = CompiledVm<1>; + using CompiledVmHardAes = CompiledVm<0>; } diff --git a/src/crypto/randomx/vm_compiled_light.cpp b/src/crypto/randomx/vm_compiled_light.cpp index 02115cefd..d4f6fe50b 100644 --- a/src/crypto/randomx/vm_compiled_light.cpp +++ b/src/crypto/randomx/vm_compiled_light.cpp @@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template void CompiledLightVm::setCache(randomx_cache* cache) { cachePtr = cache; mem.memory = cache->memory; compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); } - template + template void CompiledLightVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); diff --git a/src/crypto/randomx/vm_compiled_light.hpp b/src/crypto/randomx/vm_compiled_light.hpp index 6d11d60ad..8139a5442 100644 --- a/src/crypto/randomx/vm_compiled_light.hpp +++ b/src/crypto/randomx/vm_compiled_light.hpp @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template class CompiledLightVm : public CompiledVm { public: @@ -52,6 +52,6 @@ namespace randomx { using CompiledVm::datasetOffset; }; - using CompiledLightVmDefault = CompiledLightVm; - using CompiledLightVmHardAes = CompiledLightVm; + using CompiledLightVmDefault = CompiledLightVm<1>; + using CompiledLightVmHardAes = CompiledLightVm<0>; } diff --git a/src/crypto/randomx/vm_interpreted.cpp b/src/crypto/randomx/vm_interpreted.cpp index e21ecfe69..840ea7688 100644 --- a/src/crypto/randomx/vm_interpreted.cpp +++ b/src/crypto/randomx/vm_interpreted.cpp @@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template void InterpretedVm::setDataset(randomx_dataset* dataset) { datasetPtr = dataset; mem.memory = dataset->memory; } - template + template void InterpretedVm::run(void* seed) { VmBase::generateProgram(seed); randomx_vm::initialize(); execute(); } - template + template void InterpretedVm::execute() { NativeRegisterFile nreg; @@ -106,14 +106,14 @@ namespace randomx { rx_store_vec_f128(®.e[i].lo, nreg.e[i]); } - template + template void InterpretedVm::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) { uint64_t* datasetLine = (uint64_t*)(mem.memory + address); for (int i = 0; i < RegistersCount; ++i) r[i] ^= datasetLine[i]; } - template + template void InterpretedVm::datasetPrefetch(uint64_t address) { rx_prefetch_nta(mem.memory + address); } diff --git a/src/crypto/randomx/vm_interpreted.hpp b/src/crypto/randomx/vm_interpreted.hpp index d928de749..452ef17f5 100644 --- a/src/crypto/randomx/vm_interpreted.hpp +++ b/src/crypto/randomx/vm_interpreted.hpp @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template class InterpretedVm : public VmBase, public BytecodeMachine { public: using VmBase::mem; @@ -65,6 +65,6 @@ namespace randomx { InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE]; }; - using InterpretedVmDefault = InterpretedVm; - using InterpretedVmHardAes = InterpretedVm; + using InterpretedVmDefault = InterpretedVm<1>; + using InterpretedVmHardAes = InterpretedVm<0>; } diff --git a/src/crypto/randomx/vm_interpreted_light.cpp b/src/crypto/randomx/vm_interpreted_light.cpp index bed6f35bc..589c1211d 100644 --- a/src/crypto/randomx/vm_interpreted_light.cpp +++ b/src/crypto/randomx/vm_interpreted_light.cpp @@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template void InterpretedLightVm::setCache(randomx_cache* cache) { cachePtr = cache; mem.memory = cache->memory; } - template + template void InterpretedLightVm::datasetRead(uint64_t address, int_reg_t(&r)[8]) { uint32_t itemNumber = address / CacheLineSize; int_reg_t rl[8]; diff --git a/src/crypto/randomx/vm_interpreted_light.hpp b/src/crypto/randomx/vm_interpreted_light.hpp index bec7978b1..8a1b5a364 100644 --- a/src/crypto/randomx/vm_interpreted_light.hpp +++ b/src/crypto/randomx/vm_interpreted_light.hpp @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - template + template class InterpretedLightVm : public InterpretedVm { public: using VmBase::mem; @@ -50,6 +50,6 @@ namespace randomx { void datasetPrefetch(uint64_t address) override { } }; - using InterpretedLightVmDefault = InterpretedLightVm; - using InterpretedLightVmHardAes = InterpretedLightVm; + using InterpretedLightVmDefault = InterpretedLightVm<1>; + using InterpretedLightVmHardAes = InterpretedLightVm<0>; } diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index a6a1f5c95..79354d7e8 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -33,6 +33,7 @@ #include "crypto/rx/RxConfig.h" #include "crypto/rx/RxQueue.h" #include "crypto/randomx/randomx.h" +#include "crypto/randomx/soft_aes.h" namespace xmrig { @@ -113,6 +114,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu if (!osInitialized) { setupMainLoopExceptionFrame(); + if (!cpu.isHwAES()) { + SelectSoftAESImpl(); + } osInitialized = true; }