Merge pull request #1835 from SChernykh/dev

RandomX: returned old soft AES impl and auto-select between the two
This commit is contained in:
xmrig 2020-09-16 01:54:40 +07:00 committed by GitHub
commit d11a313d88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 194 additions and 101 deletions

View file

@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Hashing throughput: >20 GiB/s per CPU core with hardware AES
*/
template<bool softAes>
template<int softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
@ -118,7 +118,7 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
The modified state is written back to 'state' to allow multiple
calls to this function.
*/
template<bool softAes>
template<int softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@ -159,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
@ -214,7 +214,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes>
template<int softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
PROFILE_SCOPE(RandomX_AES);
@ -311,5 +311,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
}
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);

View file

@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef>
template<bool softAes>
template<int softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
template<bool softAes>
template<int softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
template<int softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View file

@ -343,7 +343,6 @@ namespace randomx {
r[j] = k;
}
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
Instruction& instr1 = prog(i);
Instruction& instr2 = prog(i + 1);
@ -355,16 +354,9 @@ namespace randomx {
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
*((uint64_t*)&instr1) &= instr_mask;
(this->*gen1)(instr1);
*((uint64_t*)&instr2) &= instr_mask;
(this->*gen2)(instr2);
*((uint64_t*)&instr3) &= instr_mask;
(this->*gen3)(instr3);
*((uint64_t*)&instr4) &= instr_mask;
(this->*gen4)(instr4);
}
@ -518,7 +510,7 @@ namespace randomx {
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
@ -540,8 +532,8 @@ namespace randomx {
uint32_t pos = codePos;
uint8_t* const p = code + pos;
const uint32_t dst = instr.dst;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
uint32_t k = 0x048d4f + (dst << 19);
if (dst == RegisterNeedsDisplacement)
@ -560,8 +552,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -585,8 +577,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
@ -606,8 +598,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -627,8 +619,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) {
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
@ -647,8 +639,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -668,8 +660,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
@ -684,8 +676,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
@ -699,8 +691,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -723,8 +715,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -746,8 +738,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
pos += 8;
@ -761,8 +753,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -792,7 +784,7 @@ namespace randomx {
emit64(randomx_reciprocal_fast(divisor), p, pos);
const uint32_t dst = instr.dst;
const uint32_t dst = instr.dst % RegistersCount;
emit32(0xc0af0f4c + (dst << 27), p, pos);
registerUsage[dst] = pos;
@ -805,7 +797,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t dst = instr.dst;
const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
pos += 3;
@ -817,8 +809,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
@ -838,8 +830,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -859,8 +851,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
@ -880,8 +872,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) {
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
@ -901,8 +893,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
@ -918,7 +910,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t dst = instr.dst;
const uint64_t dst = instr.dst % RegistersCount;
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
pos += 5;
@ -943,7 +935,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -971,7 +963,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -1010,7 +1002,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -1046,7 +1038,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src;
const uint32_t src = instr.src % RegistersCount;
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
@ -1070,7 +1062,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src;
const uint64_t src = instr.src % RegistersCount;
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
@ -1093,7 +1085,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const int reg = instr.dst;
const int reg = instr.dst % RegistersCount;
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
if (BranchesWithin32B) {
@ -1140,7 +1132,7 @@ namespace randomx {
uint32_t pos = codePos;
genAddressRegDst(instr, p, pos);
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
codePos = pos;
}

View file

@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/aes_hash.hpp"
#include "base/tools/Chrono.h"
#include <vector>
alignas(64) uint32_t lutEnc0[256];
alignas(64) uint32_t lutEnc1[256];
@ -117,3 +120,43 @@ static struct SAESInitializer
}
}
} aes_initializer;
static uint32_t softAESImpl = 1;
uint32_t GetSoftAESImpl()
{
return softAESImpl;
}
void SelectSoftAESImpl()
{
constexpr int test_length_ms = 100;
double speed[2];
for (int i = 0; i < 2; ++i)
{
std::vector<uint8_t> scratchpad(10 * 1024);
uint8_t hash[64] = {};
uint8_t state[64] = {};
uint64_t t1, t2;
uint32_t count = 0;
t1 = xmrig::Chrono::highResolutionMSecs();
do {
if (i == 0) {
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
}
else {
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
}
++count;
t2 = xmrig::Chrono::highResolutionMSecs();
} while (t2 - t1 < test_length_ms);
speed[i] = count * 1e3 / (t2 - t1);
}
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
}

View file

@ -41,11 +41,14 @@ extern uint32_t lutDec1[256];
extern uint32_t lutDec2[256];
extern uint32_t lutDec3[256];
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
uint32_t GetSoftAESImpl();
void SelectSoftAESImpl();
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
template<>
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16];
memcpy((void*) s, &in, 16);
@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
}
template<>
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16];
memcpy((void*) s, &in, 16);
@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
}
template<>
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) {
FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
rx_vec_i128 out = rx_set_int_vec_i128(
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
);
return rx_xor_vec_i128(out, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
rx_vec_i128 out = rx_set_int_vec_i128(
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
);
return rx_xor_vec_i128(out, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesenc_vec_i128(in, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) {
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesdec_vec_i128(in, key);
}

View file

@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/intrin_portable.h"
#include "crypto/randomx/allocator.hpp"
#include "crypto/randomx/soft_aes.h"
#include "base/tools/Profiler.h"
randomx_vm::~randomx_vm() {
@ -96,11 +97,11 @@ void randomx_vm::initialize() {
namespace randomx {
template<bool softAes>
template<int softAes>
VmBase<softAes>::~VmBase() {
}
template<bool softAes>
template<int softAes>
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
if (datasetPtr == nullptr) {
throw std::invalid_argument("Cache/Dataset not set");
@ -109,24 +110,35 @@ namespace randomx {
this->scratchpad = scratchpad;
}
template<bool softAes>
template<int softAes>
void VmBase<softAes>::getFinalResult(void* out) {
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a);
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
}
template<bool softAes>
template<int softAes>
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a, fill_state);
if (!softAes) {
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
if (GetSoftAESImpl() == 1) {
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
}
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
}
template<bool softAes>
template<int softAes>
void VmBase<softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
}
template<bool softAes>
template<int softAes>
void VmBase<softAes>::generateProgram(void* seed) {
PROFILE_SCOPE(RandomX_generate_program);
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);

View file

@ -79,7 +79,7 @@ protected:
namespace randomx {
template<bool softAes>
template<int softAes>
class VmBase : public randomx_vm
{
public:

View file

@ -35,12 +35,12 @@ namespace randomx {
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
template<bool softAes>
template<int softAes>
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
datasetPtr = dataset;
}
template<bool softAes>
template<int softAes>
void CompiledVm<softAes>::run(void* seed) {
PROFILE_SCOPE(RandomX_run);
@ -52,7 +52,7 @@ namespace randomx {
execute();
}
template<bool softAes>
template<int softAes>
void CompiledVm<softAes>::execute() {
PROFILE_SCOPE(RandomX_JIT_execute);

View file

@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
class CompiledVm : public VmBase<softAes>
{
public:
@ -61,6 +61,6 @@ namespace randomx {
JitCompiler compiler;
};
using CompiledVmDefault = CompiledVm<true>;
using CompiledVmHardAes = CompiledVm<false>;
using CompiledVmDefault = CompiledVm<1>;
using CompiledVmHardAes = CompiledVm<0>;
}

View file

@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
cachePtr = cache;
mem.memory = cache->memory;
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
}
template<bool softAes>
template<int softAes>
void CompiledLightVm<softAes>::run(void* seed) {
VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize();

View file

@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
class CompiledLightVm : public CompiledVm<softAes>
{
public:
@ -52,6 +52,6 @@ namespace randomx {
using CompiledVm<softAes>::datasetOffset;
};
using CompiledLightVmDefault = CompiledLightVm<true>;
using CompiledLightVmHardAes = CompiledLightVm<false>;
using CompiledLightVmDefault = CompiledLightVm<1>;
using CompiledLightVmHardAes = CompiledLightVm<0>;
}

View file

@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
datasetPtr = dataset;
mem.memory = dataset->memory;
}
template<bool softAes>
template<int softAes>
void InterpretedVm<softAes>::run(void* seed) {
VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize();
execute();
}
template<bool softAes>
template<int softAes>
void InterpretedVm<softAes>::execute() {
NativeRegisterFile nreg;
@ -106,14 +106,14 @@ namespace randomx {
rx_store_vec_f128(&reg.e[i].lo, nreg.e[i]);
}
template<bool softAes>
template<int softAes>
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i];
}
template<bool softAes>
template<int softAes>
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
rx_prefetch_nta(mem.memory + address);
}

View file

@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
public:
using VmBase<softAes>::mem;
@ -65,6 +65,6 @@ namespace randomx {
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
};
using InterpretedVmDefault = InterpretedVm<true>;
using InterpretedVmHardAes = InterpretedVm<false>;
using InterpretedVmDefault = InterpretedVm<1>;
using InterpretedVmHardAes = InterpretedVm<0>;
}

View file

@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
cachePtr = cache;
mem.memory = cache->memory;
}
template<bool softAes>
template<int softAes>
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
uint32_t itemNumber = address / CacheLineSize;
int_reg_t rl[8];

View file

@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {
template<bool softAes>
template<int softAes>
class InterpretedLightVm : public InterpretedVm<softAes> {
public:
using VmBase<softAes>::mem;
@ -50,6 +50,6 @@ namespace randomx {
void datasetPrefetch(uint64_t address) override { }
};
using InterpretedLightVmDefault = InterpretedLightVm<true>;
using InterpretedLightVmHardAes = InterpretedLightVm<false>;
using InterpretedLightVmDefault = InterpretedLightVm<1>;
using InterpretedLightVmHardAes = InterpretedLightVm<0>;
}

View file

@ -33,6 +33,7 @@
#include "crypto/rx/RxConfig.h"
#include "crypto/rx/RxQueue.h"
#include "crypto/randomx/randomx.h"
#include "crypto/randomx/soft_aes.h"
namespace xmrig {
@ -113,6 +114,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
if (!osInitialized) {
setupMainLoopExceptionFrame();
if (!cpu.isHwAES()) {
SelectSoftAESImpl();
}
osInitialized = true;
}