RandomX: AES improvements

- A bit faster hardware AES code when compiled with MSVC - More reliable software AES benchmark
2024-12-23 12:09:22 +00:00 · 2020-09-21 17:51:08 +02:00 · 2020-09-21 17:51:08 +02:00 · 891a46382e
commit 891a46382e
parent db920e8006
2 changed files with 44 additions and 49 deletions
--- a/src/crypto/randomx/aes_hash.cpp
+++ b/src/crypto/randomx/aes_hash.cpp
@ -244,38 +244,29 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
 	for (int i = 0; i < 2; ++i) {
 		//process 64 bytes at a time in 4 lanes
 		while (scratchpadPtr < scratchpadEnd) {
-			hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
+#define HASH_STATE(k) \
-			hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
+			hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \
-			hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
+			hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \
-			hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
+			hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \
 			hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3));
-			fill_state0 = aesdec<softAes>(fill_state0, key0);
+#define FILL_STATE(k) \
-			fill_state1 = aesenc<softAes>(fill_state1, key1);
+			fill_state0 = aesdec<softAes>(fill_state0, key0); \
-			fill_state2 = aesdec<softAes>(fill_state2, key2);
+			fill_state1 = aesenc<softAes>(fill_state1, key1); \
-			fill_state3 = aesenc<softAes>(fill_state3, key3);
+			fill_state2 = aesdec<softAes>(fill_state2, key2); \
 			fill_state3 = aesenc<softAes>(fill_state3, key3); \
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
-			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
+			HASH_STATE(0);
-			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
+			HASH_STATE(1);
-			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
+
-			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
+			FILL_STATE(0);
 			FILL_STATE(1);
 			rx_prefetch_t0(prefetchPtr);
 			hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
 			hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
 			hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
 			hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
 			fill_state0 = aesdec<softAes>(fill_state0, key0);
 			fill_state1 = aesenc<softAes>(fill_state1, key1);
 			fill_state2 = aesdec<softAes>(fill_state2, key2);
 			fill_state3 = aesenc<softAes>(fill_state3, key3);
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
 			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
 			rx_prefetch_t0(prefetchPtr + 64);
 			scratchpadPtr += 128;
--- a/src/crypto/randomx/soft_aes.cpp
+++ b/src/crypto/randomx/soft_aes.cpp
@ -131,31 +131,35 @@ uint32_t GetSoftAESImpl()
 void SelectSoftAESImpl()
 {
 	constexpr int test_length_ms = 100;
-	double speed[2];
+	double speed[2] = {};
-	for (int i = 0; i < 2; ++i)
+	for (int run = 0; run < 3; ++run) {
-	{
+		for (int i = 0; i < 2; ++i) {
-		std::vector<uint8_t> scratchpad(10 * 1024);
+			std::vector<uint8_t> scratchpad(10 * 1024);
-		uint8_t hash[64] = {};
+			uint8_t hash[64] = {};
-		uint8_t state[64] = {};
+			uint8_t state[64] = {};
-		uint64_t t1, t2;
+			uint64_t t1, t2;
-		uint32_t count = 0;
+			uint32_t count = 0;
-		t1 = xmrig::Chrono::highResolutionMSecs();
+			t1 = xmrig::Chrono::highResolutionMSecs();
-		do {
+			do {
-			if (i == 0) {
+				if (i == 0) {
-				hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
+					hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
 				}
 				else {
 					hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
 				}
 				++count;
 				t2 = xmrig::Chrono::highResolutionMSecs();
 			} while (t2 - t1 < test_length_ms);
 			const double x = count * 1e3 / (t2 - t1);
 			if (x > speed[i]) {
 				speed[i] = x;
 			}
-			else {
+		}
 				hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
 			}
 			++count;
 			t2 = xmrig::Chrono::highResolutionMSecs();
 		} while (t2 - t1 < test_length_ms);
 		speed[i] = count * 1e3 / (t2 - t1);
 	}
 	softAESImpl = (speed[0] > speed[1]) ? 1 : 2;