From 891a46382e1edb4b6b99b34f2c236cc256b24993 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 21 Sep 2020 17:51:08 +0200 Subject: [PATCH] RandomX: AES improvements - A bit faster hardware AES code when compiled with MSVC - More reliable software AES benchmark --- src/crypto/randomx/aes_hash.cpp | 47 +++++++++++++-------------------- src/crypto/randomx/soft_aes.cpp | 46 +++++++++++++++++--------------- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index 7c4b0c818..a15f75ad5 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -244,38 +244,29 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi for (int i = 0; i < 2; ++i) { //process 64 bytes at a time in 4 lanes while (scratchpadPtr < scratchpadEnd) { - hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0)); - hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1)); - hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2)); - hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3)); +#define HASH_STATE(k) \ + hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \ + hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \ + hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \ + hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3)); - fill_state0 = aesdec(fill_state0, key0); - fill_state1 = aesenc(fill_state1, key1); - fill_state2 = aesdec(fill_state2, key2); - fill_state3 = aesenc(fill_state3, key3); +#define FILL_STATE(k) \ + fill_state0 = aesdec(fill_state0, key0); \ + fill_state1 = aesenc(fill_state1, key1); \ + fill_state2 = aesdec(fill_state2, key2); \ + fill_state3 = aesenc(fill_state3, key3); \ + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \ + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \ + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \ + rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3); + HASH_STATE(0); + HASH_STATE(1); + + FILL_STATE(0); + FILL_STATE(1); rx_prefetch_t0(prefetchPtr); - - hash_state0 = aesenc(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4)); - hash_state1 = aesdec(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5)); - hash_state2 = aesenc(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6)); - hash_state3 = aesdec(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7)); - - fill_state0 = aesdec(fill_state0, key0); - fill_state1 = aesenc(fill_state1, key1); - fill_state2 = aesdec(fill_state2, key2); - fill_state3 = aesenc(fill_state3, key3); - - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2); - rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3); - rx_prefetch_t0(prefetchPtr + 64); scratchpadPtr += 128; diff --git a/src/crypto/randomx/soft_aes.cpp b/src/crypto/randomx/soft_aes.cpp index a205398c8..ad6f9ffe6 100644 --- a/src/crypto/randomx/soft_aes.cpp +++ b/src/crypto/randomx/soft_aes.cpp @@ -131,31 +131,35 @@ uint32_t GetSoftAESImpl() void SelectSoftAESImpl() { constexpr int test_length_ms = 100; - double speed[2]; + double speed[2] = {}; - for (int i = 0; i < 2; ++i) - { - std::vector scratchpad(10 * 1024); - uint8_t hash[64] = {}; - uint8_t state[64] = {}; + for (int run = 0; run < 3; ++run) { + for (int i = 0; i < 2; ++i) { + std::vector scratchpad(10 * 1024); + uint8_t hash[64] = {}; + uint8_t state[64] = {}; - uint64_t t1, t2; + uint64_t t1, t2; - uint32_t count = 0; - t1 = xmrig::Chrono::highResolutionMSecs(); - do { - if (i == 0) { - hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state); + uint32_t count = 0; + t1 = xmrig::Chrono::highResolutionMSecs(); + do { + if (i == 0) { + hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state); + } + else { + hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state); + } + ++count; + + t2 = xmrig::Chrono::highResolutionMSecs(); + } while (t2 - t1 < test_length_ms); + + const double x = count * 1e3 / (t2 - t1); + if (x > speed[i]) { + speed[i] = x; } - else { - hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state); - } - ++count; - - t2 = xmrig::Chrono::highResolutionMSecs(); - } while (t2 - t1 < test_length_ms); - - speed[i] = count * 1e3 / (t2 - t1); + } } softAESImpl = (speed[0] > speed[1]) ? 1 : 2;