mirror of
https://github.com/xmrig/xmrig.git
synced 2025-03-16 16:42:14 +00:00
Merge pull request #1864 from cohcho/soft_aes_optimization2
soft_aes: fix previous optimization
This commit is contained in:
commit
116fb3d3f9
6 changed files with 116 additions and 72 deletions
|
@ -26,8 +26,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "crypto/randomx/aes_hash.hpp"
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/soft_aes.h"
|
||||||
#include "crypto/randomx/randomx.h"
|
#include "crypto/randomx/randomx.h"
|
||||||
|
#include "base/tools/Chrono.h"
|
||||||
#include "base/tools/Profiler.h"
|
#include "base/tools/Profiler.h"
|
||||||
|
|
||||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||||
|
@ -214,7 +219,7 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<int softAes>
|
template<int softAes, int unroll>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
PROFILE_SCOPE(RandomX_AES);
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
|
|
||||||
|
@ -260,7 +265,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
||||||
|
|
||||||
switch(softAes) {
|
switch (softAes) {
|
||||||
case 0:
|
case 0:
|
||||||
HASH_STATE(0);
|
HASH_STATE(0);
|
||||||
HASH_STATE(1);
|
HASH_STATE(1);
|
||||||
|
@ -277,13 +282,51 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
HASH_STATE(0);
|
switch (unroll) {
|
||||||
FILL_STATE(0);
|
case 4:
|
||||||
rx_prefetch_t0(prefetchPtr);
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
scratchpadPtr += 64;
|
HASH_STATE(1);
|
||||||
prefetchPtr += 64;
|
FILL_STATE(1);
|
||||||
|
rx_prefetch_t0(prefetchPtr + 64);
|
||||||
|
|
||||||
|
HASH_STATE(2);
|
||||||
|
FILL_STATE(2);
|
||||||
|
rx_prefetch_t0(prefetchPtr + 64 * 2);
|
||||||
|
|
||||||
|
HASH_STATE(3);
|
||||||
|
FILL_STATE(3);
|
||||||
|
rx_prefetch_t0(prefetchPtr + 64 * 3);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 4;
|
||||||
|
prefetchPtr += 64 * 4;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
|
HASH_STATE(1);
|
||||||
|
FILL_STATE(1);
|
||||||
|
rx_prefetch_t0(prefetchPtr + 64);
|
||||||
|
|
||||||
|
scratchpadPtr += 64 * 2;
|
||||||
|
prefetchPtr += 64 * 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
|
scratchpadPtr += 64;
|
||||||
|
prefetchPtr += 64;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -317,6 +360,53 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
template void hashAndFillAes1Rx4<0,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
template void hashAndFillAes1Rx4<1,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
template void hashAndFillAes1Rx4<2,1>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
template void hashAndFillAes1Rx4<2,2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
template void hashAndFillAes1Rx4<2,4>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
|
||||||
|
hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>;
|
||||||
|
|
||||||
|
void SelectSoftAESImpl(size_t threadsCount)
|
||||||
|
{
|
||||||
|
constexpr int test_length_ms = 100;
|
||||||
|
const std::vector<hashAndFillAes1Rx4_impl *> impl = {
|
||||||
|
&hashAndFillAes1Rx4<1,1>,
|
||||||
|
&hashAndFillAes1Rx4<2,1>,
|
||||||
|
&hashAndFillAes1Rx4<2,2>,
|
||||||
|
&hashAndFillAes1Rx4<2,4>,
|
||||||
|
};
|
||||||
|
size_t fast_idx = 0;
|
||||||
|
double fast_speed = 0.0;
|
||||||
|
for (size_t run = 0; run < 3; ++run) {
|
||||||
|
for (size_t i = 0; i < impl.size(); ++i) {
|
||||||
|
const uint64_t t1 = xmrig::Chrono::highResolutionMSecs();
|
||||||
|
std::vector<uint32_t> count(threadsCount, 0);
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
for (size_t t = 0; t < threadsCount; ++t) {
|
||||||
|
threads.emplace_back([&, t]() {
|
||||||
|
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||||
|
uint8_t hash[64] = {};
|
||||||
|
uint8_t state[64] = {};
|
||||||
|
do {
|
||||||
|
(*impl[i])(scratchpad.data(), scratchpad.size(), hash, state);
|
||||||
|
++count[t];
|
||||||
|
} while (xmrig::Chrono::highResolutionMSecs() - t1 < test_length_ms);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
uint32_t total = 0;
|
||||||
|
for (size_t t = 0; t < threadsCount; ++t) {
|
||||||
|
threads[t].join();
|
||||||
|
total += count[t];
|
||||||
|
}
|
||||||
|
const uint64_t t2 = xmrig::Chrono::highResolutionMSecs();
|
||||||
|
const double speed = total * 1e3 / (t2 - t1);
|
||||||
|
if (speed > fast_speed) {
|
||||||
|
fast_idx = i;
|
||||||
|
fast_speed = speed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
softAESImpl = impl[fast_idx];
|
||||||
|
}
|
||||||
|
|
|
@ -30,6 +30,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
|
typedef void (hashAndFillAes1Rx4_impl)(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
|
|
||||||
|
extern hashAndFillAes1Rx4_impl* softAESImpl;
|
||||||
|
|
||||||
|
inline hashAndFillAes1Rx4_impl* GetSoftAESImpl()
|
||||||
|
{
|
||||||
|
return softAESImpl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SelectSoftAESImpl(size_t threadsCount);
|
||||||
|
|
||||||
template<int softAes>
|
template<int softAes>
|
||||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||||
|
|
||||||
|
@ -39,5 +50,5 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
template<int softAes>
|
template<int softAes>
|
||||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<int softAes>
|
template<int softAes, int unroll>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
|
|
|
@ -28,9 +28,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/soft_aes.h"
|
||||||
#include "crypto/randomx/aes_hash.hpp"
|
|
||||||
#include "base/tools/Chrono.h"
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
alignas(64) uint32_t lutEnc0[256];
|
alignas(64) uint32_t lutEnc0[256];
|
||||||
alignas(64) uint32_t lutEnc1[256];
|
alignas(64) uint32_t lutEnc1[256];
|
||||||
|
@ -120,47 +117,3 @@ static struct SAESInitializer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} aes_initializer;
|
} aes_initializer;
|
||||||
|
|
||||||
static uint32_t softAESImpl = 1;
|
|
||||||
|
|
||||||
uint32_t GetSoftAESImpl()
|
|
||||||
{
|
|
||||||
return softAESImpl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SelectSoftAESImpl()
|
|
||||||
{
|
|
||||||
constexpr int test_length_ms = 100;
|
|
||||||
double speed[2] = {};
|
|
||||||
|
|
||||||
for (int run = 0; run < 3; ++run) {
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
|
||||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
|
||||||
uint8_t hash[64] = {};
|
|
||||||
uint8_t state[64] = {};
|
|
||||||
|
|
||||||
uint64_t t1, t2;
|
|
||||||
|
|
||||||
uint32_t count = 0;
|
|
||||||
t1 = xmrig::Chrono::highResolutionMSecs();
|
|
||||||
do {
|
|
||||||
if (i == 0) {
|
|
||||||
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
|
||||||
}
|
|
||||||
++count;
|
|
||||||
|
|
||||||
t2 = xmrig::Chrono::highResolutionMSecs();
|
|
||||||
} while (t2 - t1 < test_length_ms);
|
|
||||||
|
|
||||||
const double x = count * 1e3 / (t2 - t1);
|
|
||||||
if (x > speed[i]) {
|
|
||||||
speed[i] = x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
|
||||||
}
|
|
||||||
|
|
|
@ -41,9 +41,6 @@ extern uint32_t lutDec1[256];
|
||||||
extern uint32_t lutDec2[256];
|
extern uint32_t lutDec2[256];
|
||||||
extern uint32_t lutDec3[256];
|
extern uint32_t lutDec3[256];
|
||||||
|
|
||||||
uint32_t GetSoftAESImpl();
|
|
||||||
void SelectSoftAESImpl();
|
|
||||||
|
|
||||||
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
|
|
||||||
|
|
|
@ -119,15 +119,10 @@ namespace randomx {
|
||||||
template<int softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
||||||
if (!softAes) {
|
if (!softAes) {
|
||||||
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
hashAndFillAes1Rx4<0, 2>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (GetSoftAESImpl() == 1) {
|
(*GetSoftAESImpl())(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||||
|
|
|
@ -26,14 +26,12 @@
|
||||||
|
|
||||||
|
|
||||||
#include "crypto/rx/Rx.h"
|
#include "crypto/rx/Rx.h"
|
||||||
#include "backend/common/Tags.h"
|
|
||||||
#include "backend/cpu/CpuConfig.h"
|
#include "backend/cpu/CpuConfig.h"
|
||||||
#include "backend/cpu/CpuThreads.h"
|
#include "backend/cpu/CpuThreads.h"
|
||||||
#include "base/io/log/Log.h"
|
|
||||||
#include "crypto/rx/RxConfig.h"
|
#include "crypto/rx/RxConfig.h"
|
||||||
#include "crypto/rx/RxQueue.h"
|
#include "crypto/rx/RxQueue.h"
|
||||||
#include "crypto/randomx/randomx.h"
|
#include "crypto/randomx/randomx.h"
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/aes_hash.hpp"
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
@ -115,7 +113,7 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
||||||
if (!osInitialized) {
|
if (!osInitialized) {
|
||||||
setupMainLoopExceptionFrame();
|
setupMainLoopExceptionFrame();
|
||||||
if (!cpu.isHwAES()) {
|
if (!cpu.isHwAES()) {
|
||||||
SelectSoftAESImpl();
|
SelectSoftAESImpl(cpu.threads().get(seed.algorithm()).count());
|
||||||
}
|
}
|
||||||
osInitialized = true;
|
osInitialized = true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue