mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 12:09:22 +00:00
RandomX: AES improvements
- A bit faster hardware AES code when compiled with MSVC - More reliable software AES benchmark
This commit is contained in:
parent
db920e8006
commit
891a46382e
2 changed files with 44 additions and 49 deletions
|
@ -244,38 +244,29 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
//process 64 bytes at a time in 4 lanes
|
//process 64 bytes at a time in 4 lanes
|
||||||
while (scratchpadPtr < scratchpadEnd) {
|
while (scratchpadPtr < scratchpadEnd) {
|
||||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
#define HASH_STATE(k) \
|
||||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \
|
||||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \
|
||||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \
|
||||||
|
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3));
|
||||||
|
|
||||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
#define FILL_STATE(k) \
|
||||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
fill_state0 = aesdec<softAes>(fill_state0, key0); \
|
||||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
fill_state1 = aesenc<softAes>(fill_state1, key1); \
|
||||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
fill_state2 = aesdec<softAes>(fill_state2, key2); \
|
||||||
|
fill_state3 = aesenc<softAes>(fill_state3, key3); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
||||||
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
HASH_STATE(0);
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
HASH_STATE(1);
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
FILL_STATE(0);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
rx_prefetch_t0(prefetchPtr);
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
|
|
||||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
|
|
||||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
|
|
||||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
|
|
||||||
|
|
||||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
|
||||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
|
||||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
|
||||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
|
||||||
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
|
|
||||||
|
|
||||||
rx_prefetch_t0(prefetchPtr + 64);
|
rx_prefetch_t0(prefetchPtr + 64);
|
||||||
|
|
||||||
scratchpadPtr += 128;
|
scratchpadPtr += 128;
|
||||||
|
|
|
@ -131,31 +131,35 @@ uint32_t GetSoftAESImpl()
|
||||||
void SelectSoftAESImpl()
|
void SelectSoftAESImpl()
|
||||||
{
|
{
|
||||||
constexpr int test_length_ms = 100;
|
constexpr int test_length_ms = 100;
|
||||||
double speed[2];
|
double speed[2] = {};
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i)
|
for (int run = 0; run < 3; ++run) {
|
||||||
{
|
for (int i = 0; i < 2; ++i) {
|
||||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||||
uint8_t hash[64] = {};
|
uint8_t hash[64] = {};
|
||||||
uint8_t state[64] = {};
|
uint8_t state[64] = {};
|
||||||
|
|
||||||
uint64_t t1, t2;
|
uint64_t t1, t2;
|
||||||
|
|
||||||
uint32_t count = 0;
|
uint32_t count = 0;
|
||||||
t1 = xmrig::Chrono::highResolutionMSecs();
|
t1 = xmrig::Chrono::highResolutionMSecs();
|
||||||
do {
|
do {
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||||
|
}
|
||||||
|
++count;
|
||||||
|
|
||||||
|
t2 = xmrig::Chrono::highResolutionMSecs();
|
||||||
|
} while (t2 - t1 < test_length_ms);
|
||||||
|
|
||||||
|
const double x = count * 1e3 / (t2 - t1);
|
||||||
|
if (x > speed[i]) {
|
||||||
|
speed[i] = x;
|
||||||
}
|
}
|
||||||
else {
|
}
|
||||||
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
|
||||||
}
|
|
||||||
++count;
|
|
||||||
|
|
||||||
t2 = xmrig::Chrono::highResolutionMSecs();
|
|
||||||
} while (t2 - t1 < test_length_ms);
|
|
||||||
|
|
||||||
speed[i] = count * 1e3 / (t2 - t1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
||||||
|
|
Loading…
Reference in a new issue