mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 00:37:46 +00:00
AstroBWT 20-50% speedup
Skips hashes with large stage 2 size. Added configurable `astrobwt-max-size` parameter, default value is 550, min 400, max 1200, optimal value ranges from 500 to 600 depending on CPU. - Intel CPUs get 20-25% speedup - 1st- and 2nd-gen Ryzens get 30% speedup - 3rd-gen Ryzens get up to 50% speedup
This commit is contained in:
parent
c80ef54b60
commit
eeadea53e2
11 changed files with 63 additions and 10 deletions
|
@ -416,6 +416,10 @@ rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const
|
|||
out.AddMember("argon2-impl", argon2::Impl::name().toJSON(), allocator);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
out.AddMember("astrobwt-max-size", cpu.astrobwtMaxSize(), allocator);
|
||||
# endif
|
||||
|
||||
out.AddMember("hugepages", d_ptr->hugePages(2, doc), allocator);
|
||||
out.AddMember("memory", static_cast<uint64_t>(d_ptr->algo.isValid() ? (d_ptr->ways() * d_ptr->algo.l3()) : 0), allocator);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "backend/cpu/Cpu.h"
|
||||
#include "base/io/json/Json.h"
|
||||
#include "rapidjson/document.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -48,6 +49,11 @@ static const char *kAsm = "asm";
|
|||
static const char *kArgon2Impl = "argon2-impl";
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_ASTROBWT
|
||||
static const char* kAstroBWTMaxSize = "astrobwt-max-size";
|
||||
#endif
|
||||
|
||||
|
||||
extern template class Threads<CpuThreads>;
|
||||
|
||||
}
|
||||
|
@ -85,6 +91,10 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
|
|||
obj.AddMember(StringRef(kArgon2Impl), m_argon2Impl.toJSON(), allocator);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
obj.AddMember(StringRef(kAstroBWTMaxSize), m_astrobwtMaxSize, allocator);
|
||||
# endif
|
||||
|
||||
m_threads.toJSON(obj, doc);
|
||||
|
||||
return obj;
|
||||
|
@ -136,6 +146,16 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value)
|
|||
m_argon2Impl = Json::getString(value, kArgon2Impl);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
const auto& obj = Json::getValue(value, kAstroBWTMaxSize);
|
||||
if (obj.IsNull() || !obj.IsInt()) {
|
||||
m_shouldSave = true;
|
||||
}
|
||||
else {
|
||||
m_astrobwtMaxSize = std::min(std::max(obj.GetInt(), 400), 1200);
|
||||
}
|
||||
# endif
|
||||
|
||||
m_threads.read(value);
|
||||
|
||||
generate();
|
||||
|
@ -167,7 +187,7 @@ void xmrig::CpuConfig::generate()
|
|||
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
|
||||
|
||||
m_shouldSave = count > 0;
|
||||
m_shouldSave |= count > 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ public:
|
|||
inline bool isYield() const { return m_yield; }
|
||||
inline const Assembly &assembly() const { return m_assembly; }
|
||||
inline const String &argon2Impl() const { return m_argon2Impl; }
|
||||
inline int astrobwtMaxSize() const { return m_astrobwtMaxSize; }
|
||||
inline const Threads<CpuThreads> &threads() const { return m_threads; }
|
||||
inline int priority() const { return m_priority; }
|
||||
inline uint32_t limit() const { return m_limit; }
|
||||
|
@ -78,6 +79,7 @@ private:
|
|||
int m_memoryPool = 0;
|
||||
int m_priority = -1;
|
||||
String m_argon2Impl;
|
||||
int m_astrobwtMaxSize = 550;
|
||||
Threads<CpuThreads> m_threads;
|
||||
uint32_t m_limit = 100;
|
||||
};
|
||||
|
|
|
@ -42,7 +42,8 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
|
|||
priority(config.priority()),
|
||||
affinity(thread.affinity()),
|
||||
miner(miner),
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()))
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
|
||||
astrobwtMaxSize(config.astrobwtMaxSize())
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -65,6 +65,7 @@ public:
|
|||
const int64_t affinity;
|
||||
const Miner *miner;
|
||||
const uint32_t intensity;
|
||||
const int astrobwtMaxSize;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "core/Miner.h"
|
||||
#include "crypto/cn/CnCtx.h"
|
||||
#include "crypto/cn/CryptoNight_test.h"
|
||||
#include "crypto/cn/CryptoNight.h"
|
||||
#include "crypto/common/Nonce.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/rx/Rx.h"
|
||||
|
@ -76,6 +77,7 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
|||
Worker(id, data.affinity, data.priority),
|
||||
m_algorithm(data.algorithm),
|
||||
m_assembly(data.assembly),
|
||||
m_astrobwtMaxSize(data.astrobwtMaxSize * 1000),
|
||||
m_hwAES(data.hwAES),
|
||||
m_yield(data.yield),
|
||||
m_av(data.av()),
|
||||
|
@ -240,6 +242,8 @@ void xmrig::CpuWorker<N>::start()
|
|||
current_job_nonces[i] = *m_job.nonce(i);
|
||||
}
|
||||
|
||||
bool valid = true;
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
if (job.algorithm().family() == Algorithm::RANDOM_X) {
|
||||
if (first) {
|
||||
|
@ -256,20 +260,31 @@ void xmrig::CpuWorker<N>::start()
|
|||
else
|
||||
# endif
|
||||
{
|
||||
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
if (job.algorithm().family() == Algorithm::ASTROBWT) {
|
||||
if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize))
|
||||
valid = false;
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
|
||||
}
|
||||
|
||||
if (!nextRound(m_job)) {
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
|
||||
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
|
||||
if (valid) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < job.target()) {
|
||||
JobResults::submit(job, current_job_nonces[i], m_hash + (i * 32));
|
||||
}
|
||||
}
|
||||
m_count += N;
|
||||
}
|
||||
|
||||
m_count += N;
|
||||
|
||||
if (m_yield) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
|
|
@ -70,6 +70,7 @@ private:
|
|||
|
||||
const Algorithm m_algorithm;
|
||||
const Assembly m_assembly;
|
||||
const int m_astrobwtMaxSize;
|
||||
const bool m_hwAES;
|
||||
const bool m_yield;
|
||||
const CnHash::AlgoVariant m_av;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
"max-threads-hint": 100,
|
||||
"asm": true,
|
||||
"argon2-impl": null,
|
||||
"astrobwt-max-size": 550,
|
||||
"cn/0": false,
|
||||
"cn-lite/0": false
|
||||
},
|
||||
|
|
|
@ -63,6 +63,7 @@ R"===(
|
|||
"max-threads-hint": 100,
|
||||
"asm": true,
|
||||
"argon2-impl": null,
|
||||
"astrobwt-max-size": 550,
|
||||
"cn/0": false,
|
||||
"cn-lite/0": false
|
||||
},
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "AstroBWT.h"
|
||||
#include "sha3.h"
|
||||
#include "crypto/cn/CryptoNight.h"
|
||||
#include <limits>
|
||||
|
||||
constexpr int STAGE1_SIZE = 147253;
|
||||
constexpr int ALLOCATION_SIZE = (STAGE1_SIZE + 1048576) + (128 - (STAGE1_SIZE & 63));
|
||||
|
@ -152,7 +153,7 @@ void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indi
|
|||
}
|
||||
}
|
||||
|
||||
void astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash)
|
||||
bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size)
|
||||
{
|
||||
uint8_t key[32];
|
||||
uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64;
|
||||
|
@ -178,6 +179,9 @@ void astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad
|
|||
sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage1_result, STAGE1_SIZE + 1, key, sizeof(key));
|
||||
|
||||
const int stage2_size = STAGE1_SIZE + (*(uint32_t*)(key) & 0xfffff);
|
||||
if (stage2_size > stage2_max_size)
|
||||
return false;
|
||||
|
||||
Salsa20_XORKeyStream(key, stage2_output, stage2_size);
|
||||
|
||||
sort_indices(stage2_size + 1, stage2_output, indices, tmp_indices);
|
||||
|
@ -198,10 +202,12 @@ void astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad
|
|||
}
|
||||
|
||||
sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage2_result, stage2_size + 1, output_hash, 32);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<>
|
||||
void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t)
|
||||
{
|
||||
astrobwt_dero(input, static_cast<uint32_t>(size), ctx[0]->memory, output);
|
||||
astrobwt_dero(input, static_cast<uint32_t>(size), ctx[0]->memory, output, std::numeric_limits<int>::max());
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ struct cryptonight_ctx;
|
|||
|
||||
namespace xmrig { namespace astrobwt {
|
||||
|
||||
bool astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size);
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
|
||||
|
|
Loading…
Reference in a new issue