mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 12:09:22 +00:00
Merge pull request #1835 from SChernykh/dev
RandomX: returned old soft AES impl and auto-select between the two
This commit is contained in:
commit
d11a313d88
16 changed files with 194 additions and 101 deletions
|
@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
*/
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||
const uint8_t* inptr = (uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
@ -118,7 +118,7 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
|
|||
The modified state is written back to 'state' to allow multiple
|
||||
calls to this function.
|
||||
*/
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
@ -159,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
@ -214,7 +214,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
PROFILE_SCOPE(RandomX_AES);
|
||||
|
||||
|
@ -311,5 +311,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
|||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||
}
|
||||
|
||||
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||
|
|
|
@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <cstddef>
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
|
@ -343,7 +343,6 @@ namespace randomx {
|
|||
r[j] = k;
|
||||
}
|
||||
|
||||
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
||||
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
|
||||
Instruction& instr1 = prog(i);
|
||||
Instruction& instr2 = prog(i + 1);
|
||||
|
@ -355,16 +354,9 @@ namespace randomx {
|
|||
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
|
||||
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
|
||||
|
||||
*((uint64_t*)&instr1) &= instr_mask;
|
||||
(this->*gen1)(instr1);
|
||||
|
||||
*((uint64_t*)&instr2) &= instr_mask;
|
||||
(this->*gen2)(instr2);
|
||||
|
||||
*((uint64_t*)&instr3) &= instr_mask;
|
||||
(this->*gen3)(instr3);
|
||||
|
||||
*((uint64_t*)&instr4) &= instr_mask;
|
||||
(this->*gen4)(instr4);
|
||||
}
|
||||
|
||||
|
@ -518,7 +510,7 @@ namespace randomx {
|
|||
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||
|
||||
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
||||
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
||||
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
|
||||
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||
|
||||
|
@ -540,8 +532,8 @@ namespace randomx {
|
|||
uint32_t pos = codePos;
|
||||
uint8_t* const p = code + pos;
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
|
||||
|
||||
uint32_t k = 0x048d4f + (dst << 19);
|
||||
if (dst == RegisterNeedsDisplacement)
|
||||
|
@ -560,8 +552,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -585,8 +577,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
||||
|
@ -606,8 +598,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -627,8 +619,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
||||
|
@ -647,8 +639,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -668,8 +660,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
||||
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
||||
|
@ -684,8 +676,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
||||
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
||||
|
@ -699,8 +691,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
|
@ -723,8 +715,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
|
@ -746,8 +738,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
||||
pos += 8;
|
||||
|
@ -761,8 +753,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<false>(instr, src, p, pos);
|
||||
|
@ -792,7 +784,7 @@ namespace randomx {
|
|||
|
||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
|
@ -805,7 +797,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
||||
pos += 3;
|
||||
|
||||
|
@ -817,8 +809,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
||||
|
@ -838,8 +830,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -859,8 +851,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
||||
|
@ -880,8 +872,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
||||
|
@ -901,8 +893,8 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t dst = instr.dst;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
|
||||
if (src != dst) {
|
||||
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
||||
|
@ -918,7 +910,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t dst = instr.dst;
|
||||
const uint64_t dst = instr.dst % RegistersCount;
|
||||
|
||||
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
||||
pos += 5;
|
||||
|
@ -943,7 +935,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -971,7 +963,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -1010,7 +1002,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||
|
||||
genAddressReg<true>(instr, src, p, pos);
|
||||
|
@ -1046,7 +1038,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint32_t src = instr.src;
|
||||
const uint32_t src = instr.src % RegistersCount;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
||||
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||
|
@ -1070,7 +1062,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const uint64_t src = instr.src;
|
||||
const uint64_t src = instr.src % RegistersCount;
|
||||
|
||||
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
||||
|
@ -1093,7 +1085,7 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
||||
const int reg = instr.dst;
|
||||
const int reg = instr.dst % RegistersCount;
|
||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||
|
||||
if (BranchesWithin32B) {
|
||||
|
@ -1140,7 +1132,7 @@ namespace randomx {
|
|||
uint32_t pos = codePos;
|
||||
|
||||
genAddressRegDst(instr, p, pos);
|
||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
|
||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
|
||||
|
||||
codePos = pos;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include <vector>
|
||||
|
||||
alignas(64) uint32_t lutEnc0[256];
|
||||
alignas(64) uint32_t lutEnc1[256];
|
||||
|
@ -117,3 +120,43 @@ static struct SAESInitializer
|
|||
}
|
||||
}
|
||||
} aes_initializer;
|
||||
|
||||
static uint32_t softAESImpl = 1;
|
||||
|
||||
uint32_t GetSoftAESImpl()
|
||||
{
|
||||
return softAESImpl;
|
||||
}
|
||||
|
||||
void SelectSoftAESImpl()
|
||||
{
|
||||
constexpr int test_length_ms = 100;
|
||||
double speed[2];
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||
uint8_t hash[64] = {};
|
||||
uint8_t state[64] = {};
|
||||
|
||||
uint64_t t1, t2;
|
||||
|
||||
uint32_t count = 0;
|
||||
t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
do {
|
||||
if (i == 0) {
|
||||
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
else {
|
||||
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
++count;
|
||||
|
||||
t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
} while (t2 - t1 < test_length_ms);
|
||||
|
||||
speed[i] = count * 1e3 / (t2 - t1);
|
||||
}
|
||||
|
||||
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
||||
}
|
||||
|
|
|
@ -41,11 +41,14 @@ extern uint32_t lutDec1[256];
|
|||
extern uint32_t lutDec2[256];
|
||||
extern uint32_t lutDec3[256];
|
||||
|
||||
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||
uint32_t GetSoftAESImpl();
|
||||
void SelectSoftAESImpl();
|
||||
|
||||
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
|
@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
|||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
|
||||
|
@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
|||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return rx_aesenc_vec_i128(in, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return rx_aesdec_vec_i128(in, key);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
#include "crypto/randomx/allocator.hpp"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
@ -96,11 +97,11 @@ void randomx_vm::initialize() {
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
VmBase<softAes>::~VmBase() {
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
|
||||
if (datasetPtr == nullptr) {
|
||||
throw std::invalid_argument("Cache/Dataset not set");
|
||||
|
@ -109,24 +110,35 @@ namespace randomx {
|
|||
this->scratchpad = scratchpad;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::getFinalResult(void* out) {
|
||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
||||
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
if (!softAes) {
|
||||
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
else {
|
||||
if (GetSoftAESImpl() == 1) {
|
||||
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
else {
|
||||
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
}
|
||||
}
|
||||
|
||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::initScratchpad(void* seed) {
|
||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void VmBase<softAes>::generateProgram(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_generate_program);
|
||||
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
|
||||
|
|
|
@ -79,7 +79,7 @@ protected:
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class VmBase : public randomx_vm
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -35,12 +35,12 @@ namespace randomx {
|
|||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::run(void* seed) {
|
||||
PROFILE_SCOPE(RandomX_run);
|
||||
|
||||
|
@ -52,7 +52,7 @@ namespace randomx {
|
|||
execute();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledVm<softAes>::execute() {
|
||||
PROFILE_SCOPE(RandomX_JIT_execute);
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class CompiledVm : public VmBase<softAes>
|
||||
{
|
||||
public:
|
||||
|
@ -61,6 +61,6 @@ namespace randomx {
|
|||
JitCompiler compiler;
|
||||
};
|
||||
|
||||
using CompiledVmDefault = CompiledVm<true>;
|
||||
using CompiledVmHardAes = CompiledVm<false>;
|
||||
using CompiledVmDefault = CompiledVm<1>;
|
||||
using CompiledVmHardAes = CompiledVm<0>;
|
||||
}
|
||||
|
|
|
@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void CompiledLightVm<softAes>::run(void* seed) {
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
|
|
|
@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class CompiledLightVm : public CompiledVm<softAes>
|
||||
{
|
||||
public:
|
||||
|
@ -52,6 +52,6 @@ namespace randomx {
|
|||
using CompiledVm<softAes>::datasetOffset;
|
||||
};
|
||||
|
||||
using CompiledLightVmDefault = CompiledLightVm<true>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<false>;
|
||||
using CompiledLightVmDefault = CompiledLightVm<1>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<0>;
|
||||
}
|
||||
|
|
|
@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
mem.memory = dataset->memory;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::run(void* seed) {
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
execute();
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::execute() {
|
||||
|
||||
NativeRegisterFile nreg;
|
||||
|
@ -106,14 +106,14 @@ namespace randomx {
|
|||
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
|
||||
rx_prefetch_nta(mem.memory + address);
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
|
||||
public:
|
||||
using VmBase<softAes>::mem;
|
||||
|
@ -65,6 +65,6 @@ namespace randomx {
|
|||
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
|
||||
};
|
||||
|
||||
using InterpretedVmDefault = InterpretedVm<true>;
|
||||
using InterpretedVmHardAes = InterpretedVm<false>;
|
||||
using InterpretedVmDefault = InterpretedVm<1>;
|
||||
using InterpretedVmHardAes = InterpretedVm<0>;
|
||||
}
|
||||
|
|
|
@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
|
||||
uint32_t itemNumber = address / CacheLineSize;
|
||||
int_reg_t rl[8];
|
||||
|
|
|
@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
template<bool softAes>
|
||||
template<int softAes>
|
||||
class InterpretedLightVm : public InterpretedVm<softAes> {
|
||||
public:
|
||||
using VmBase<softAes>::mem;
|
||||
|
@ -50,6 +50,6 @@ namespace randomx {
|
|||
void datasetPrefetch(uint64_t address) override { }
|
||||
};
|
||||
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<true>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<false>;
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<1>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<0>;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "crypto/rx/RxConfig.h"
|
||||
#include "crypto/rx/RxQueue.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -113,6 +114,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
|||
|
||||
if (!osInitialized) {
|
||||
setupMainLoopExceptionFrame();
|
||||
if (!cpu.isHwAES()) {
|
||||
SelectSoftAESImpl();
|
||||
}
|
||||
osInitialized = true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue