diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp index eed829602..bed473b7b 100644 --- a/src/crypto/randomx/aes_hash.cpp +++ b/src/crypto/randomx/aes_hash.cpp @@ -26,8 +26,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include + +#include "crypto/randomx/aes_hash.hpp" #include "crypto/randomx/soft_aes.h" #include "crypto/randomx/randomx.h" +#include "base/tools/Chrono.h" #include "base/tools/Profiler.h" #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d @@ -214,7 +219,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); -template +template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) { PROFILE_SCOPE(RandomX_AES); @@ -260,7 +265,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \ rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3); - switch(softAes) { + switch (softAes) { case 0: HASH_STATE(0); HASH_STATE(1); @@ -277,13 +282,51 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi break; default: - HASH_STATE(0); - FILL_STATE(0); - rx_prefetch_t0(prefetchPtr); + switch (unroll) { + case 4: + HASH_STATE(0); + FILL_STATE(0); + rx_prefetch_t0(prefetchPtr); - scratchpadPtr += 64; - prefetchPtr += 64; + HASH_STATE(1); + FILL_STATE(1); + rx_prefetch_t0(prefetchPtr + 64); + HASH_STATE(2); + FILL_STATE(2); + rx_prefetch_t0(prefetchPtr + 64 * 2); + + HASH_STATE(3); + FILL_STATE(3); + rx_prefetch_t0(prefetchPtr + 64 * 3); + + scratchpadPtr += 64 * 4; + prefetchPtr += 64 * 4; + break; + + case 2: + HASH_STATE(0); + FILL_STATE(0); + rx_prefetch_t0(prefetchPtr); + + HASH_STATE(1); + FILL_STATE(1); + rx_prefetch_t0(prefetchPtr + 64); + + scratchpadPtr += 64 * 2; + prefetchPtr += 64 * 2; + break; + + default: + HASH_STATE(0); + FILL_STATE(0); + rx_prefetch_t0(prefetchPtr); + + scratchpadPtr += 64; + prefetchPtr += 64; + + break; + } break; } } @@ -317,6 +360,53 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3); } -template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); -template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); -template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); +template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); +template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); +template void hashAndFillAes1Rx4<2,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); +template void hashAndFillAes1Rx4<2,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); +template void hashAndFillAes1Rx4<2,4>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state); + +hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>; + +void SelectSoftAESImpl(size_t threadsCount) +{ + constexpr int test_length_ms = 100; + const std::vector impl = { + &hashAndFillAes1Rx4<1,1>, + &hashAndFillAes1Rx4<2,1>, + &hashAndFillAes1Rx4<2,2>, + &hashAndFillAes1Rx4<2,4>, + }; + size_t fast_idx = 0; + double fast_speed = 0.0; + for (size_t run = 0; run < 3; ++run) { + for (size_t i = 0; i < impl.size(); ++i) { + const uint64_t t1 = xmrig::Chrono::highResolutionMSecs(); + std::vector count(threadsCount, 0); + std::vector threads; + for (size_t t = 0; t < threadsCount; ++t) { + threads.emplace_back([&, t]() { + std::vector scratchpad(10 * 1024); + uint8_t hash[64] = {}; + uint8_t state[64] = {}; + do { + (*impl[i])(scratchpad.data(), scratchpad.size(), hash, state); + ++count[t]; + } while (xmrig::Chrono::highResolutionMSecs() - t1 < test_length_ms); + }); + } + uint32_t total = 0; + for (size_t t = 0; t < threadsCount; ++t) { + threads[t].join(); + total += count[t]; + } + const uint64_t t2 = xmrig::Chrono::highResolutionMSecs(); + const double speed = total * 1e3 / (t2 - t1); + if (speed > fast_speed) { + fast_idx = i; + fast_speed = speed; + } + } + } + softAESImpl = impl[fast_idx]; +} diff --git a/src/crypto/randomx/aes_hash.hpp b/src/crypto/randomx/aes_hash.hpp index 345ec8d99..b4b57b17c 100644 --- a/src/crypto/randomx/aes_hash.hpp +++ b/src/crypto/randomx/aes_hash.hpp @@ -30,6 +30,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +typedef void (hashAndFillAes1Rx4_impl)(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); + +extern hashAndFillAes1Rx4_impl* softAESImpl; + +inline hashAndFillAes1Rx4_impl* GetSoftAESImpl() +{ + return softAESImpl; +} + +void SelectSoftAESImpl(size_t threadsCount); + template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); @@ -39,5 +50,5 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); -template +template void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); diff --git a/src/crypto/randomx/soft_aes.cpp b/src/crypto/randomx/soft_aes.cpp index ad6f9ffe6..04fb7ac0e 100644 --- a/src/crypto/randomx/soft_aes.cpp +++ b/src/crypto/randomx/soft_aes.cpp @@ -28,9 +28,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "crypto/randomx/soft_aes.h" -#include "crypto/randomx/aes_hash.hpp" -#include "base/tools/Chrono.h" -#include alignas(64) uint32_t lutEnc0[256]; alignas(64) uint32_t lutEnc1[256]; @@ -120,47 +117,3 @@ static struct SAESInitializer } } } aes_initializer; - -static uint32_t softAESImpl = 1; - -uint32_t GetSoftAESImpl() -{ - return softAESImpl; -} - -void SelectSoftAESImpl() -{ - constexpr int test_length_ms = 100; - double speed[2] = {}; - - for (int run = 0; run < 3; ++run) { - for (int i = 0; i < 2; ++i) { - std::vector scratchpad(10 * 1024); - uint8_t hash[64] = {}; - uint8_t state[64] = {}; - - uint64_t t1, t2; - - uint32_t count = 0; - t1 = xmrig::Chrono::highResolutionMSecs(); - do { - if (i == 0) { - hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state); - } - else { - hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state); - } - ++count; - - t2 = xmrig::Chrono::highResolutionMSecs(); - } while (t2 - t1 < test_length_ms); - - const double x = count * 1e3 / (t2 - t1); - if (x > speed[i]) { - speed[i] = x; - } - } - } - - softAESImpl = (speed[0] > speed[1]) ? 1 : 2; -} diff --git a/src/crypto/randomx/soft_aes.h b/src/crypto/randomx/soft_aes.h index d03a1a279..2b7d5a1e9 100644 --- a/src/crypto/randomx/soft_aes.h +++ b/src/crypto/randomx/soft_aes.h @@ -41,9 +41,6 @@ extern uint32_t lutDec1[256]; extern uint32_t lutDec2[256]; extern uint32_t lutDec3[256]; -uint32_t GetSoftAESImpl(); -void SelectSoftAESImpl(); - template rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key); template rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key); diff --git a/src/crypto/randomx/virtual_machine.cpp b/src/crypto/randomx/virtual_machine.cpp index 3a2d675c4..4a6990b23 100644 --- a/src/crypto/randomx/virtual_machine.cpp +++ b/src/crypto/randomx/virtual_machine.cpp @@ -119,15 +119,10 @@ namespace randomx { template void VmBase::hashAndFill(void* out, uint64_t (&fill_state)[8]) { if (!softAes) { - hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state); + hashAndFillAes1Rx4<0, 2>(scratchpad, ScratchpadSize, ®.a, fill_state); } else { - if (GetSoftAESImpl() == 1) { - hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state); - } - else { - hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state); - } + (*GetSoftAESImpl())(scratchpad, ScratchpadSize, ®.a, fill_state); } rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile)); diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index 79354d7e8..c8ecc2b39 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -26,14 +26,12 @@ #include "crypto/rx/Rx.h" -#include "backend/common/Tags.h" #include "backend/cpu/CpuConfig.h" #include "backend/cpu/CpuThreads.h" -#include "base/io/log/Log.h" #include "crypto/rx/RxConfig.h" #include "crypto/rx/RxQueue.h" #include "crypto/randomx/randomx.h" -#include "crypto/randomx/soft_aes.h" +#include "crypto/randomx/aes_hash.hpp" namespace xmrig { @@ -115,7 +113,7 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu if (!osInitialized) { setupMainLoopExceptionFrame(); if (!cpu.isHwAES()) { - SelectSoftAESImpl(); + SelectSoftAESImpl(cpu.threads().get(seed.algorithm()).count()); } osInitialized = true; }