diff --git a/src/backend/cuda/CudaConfig_gen.h b/src/backend/cuda/CudaConfig_gen.h index d7c913f59..87e35dc48 100644 --- a/src/backend/cuda/CudaConfig_gen.h +++ b/src/backend/cuda/CudaConfig_gen.h @@ -106,6 +106,31 @@ size_t inline generate(Threads &threads, const #endif +#ifdef XMRIG_ALGO_RANDOMX +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + size_t count = 0; + + auto rx = CudaThreads(devices, Algorithm::RX_0); + auto wow = CudaThreads(devices, Algorithm::RX_WOW); + auto arq = CudaThreads(devices, Algorithm::RX_ARQ); + + if (!threads.isExist(Algorithm::RX_WOW) && wow != rx) { + count += threads.move("rx/wow", std::move(wow)); + } + + if (!threads.isExist(Algorithm::RX_ARQ) && arq != rx) { + count += threads.move("rx/arq", std::move(arq)); + } + + count += threads.move("rx", std::move(rx)); + + return count; +} +#endif + + } /* namespace xmrig */ diff --git a/src/backend/cuda/CudaWorker.cpp b/src/backend/cuda/CudaWorker.cpp index 28a3a7bf5..5e5ad413e 100644 --- a/src/backend/cuda/CudaWorker.cpp +++ b/src/backend/cuda/CudaWorker.cpp @@ -61,8 +61,7 @@ static inline uint32_t roundSize(uint32_t intensity) { return kReserveCount / in xmrig::CudaWorker::CudaWorker(size_t id, const CudaLaunchData &data) : Worker(id, data.thread.affinity(), -1), m_algorithm(data.algorithm), - m_miner(data.miner), - m_intensity(data.thread.threads() * data.thread.blocks()) + m_miner(data.miner) { switch (m_algorithm.family()) { case Algorithm::RANDOM_X: @@ -133,7 +132,8 @@ void xmrig::CudaWorker::start() JobResults::submit(m_job.currentJob(), foundNonce, foundCount); } - m_job.nextRound(roundSize(m_intensity), m_intensity); + const size_t batch_size = intensity(); + m_job.nextRound(roundSize(batch_size), batch_size); storeStats(); std::this_thread::yield(); @@ -152,7 +152,8 @@ bool xmrig::CudaWorker::consumeJob() return false; } - m_job.add(m_miner->job(), Nonce::sequence(Nonce::CUDA), roundSize(m_intensity) * m_intensity); + const size_t batch_size = intensity(); + m_job.add(m_miner->job(), Nonce::sequence(Nonce::CUDA), roundSize(batch_size) * batch_size); return m_runner->set(m_job.currentJob(), m_job.blob());; } @@ -164,7 +165,7 @@ void xmrig::CudaWorker::storeStats() return; } - m_count += m_intensity; + m_count += intensity(); Worker::storeStats(); } diff --git a/src/backend/cuda/CudaWorker.h b/src/backend/cuda/CudaWorker.h index 4fb006ba9..f717ca509 100644 --- a/src/backend/cuda/CudaWorker.h +++ b/src/backend/cuda/CudaWorker.h @@ -62,7 +62,6 @@ private: const Algorithm m_algorithm; const Miner *m_miner; - const uint32_t m_intensity; ICudaRunner *m_runner = nullptr; WorkerJob<1> m_job; }; diff --git a/src/backend/cuda/runners/CudaBaseRunner.cpp b/src/backend/cuda/runners/CudaBaseRunner.cpp index 191b9e39b..032d50c93 100644 --- a/src/backend/cuda/runners/CudaBaseRunner.cpp +++ b/src/backend/cuda/runners/CudaBaseRunner.cpp @@ -51,13 +51,7 @@ bool xmrig::CudaBaseRunner::init() return false; } - if (!CudaLib::deviceInit(m_ctx)) { - printError(CudaLib::lastError(m_ctx)); - - return false; - } - - return true; + return callWrapper(CudaLib::deviceInit(m_ctx)); } @@ -66,13 +60,7 @@ bool xmrig::CudaBaseRunner::set(const Job &job, uint8_t *blob) m_height = job.height(); m_target = job.target(); - if (!CudaLib::setJob(m_ctx, blob, job.size(), job.algorithm())) { - printError(CudaLib::lastError(m_ctx)); - - return false; - } - - return true; + return callWrapper(CudaLib::setJob(m_ctx, blob, job.size(), job.algorithm())); } @@ -82,9 +70,14 @@ size_t xmrig::CudaBaseRunner::intensity() const } -void xmrig::CudaBaseRunner::printError(const char *error) const +bool xmrig::CudaBaseRunner::callWrapper(bool result) const { - if (error) { - LOG_ERR("%s" RED_S " thread " RED_BOLD("#%zu") RED_S " failed with error " RED_BOLD("%s"), cuda_tag(), m_threadId, error); + if (!result) { + const char *error = CudaLib::lastError(m_ctx); + if (error) { + LOG_ERR("%s" RED_S " thread " RED_BOLD("#%zu") RED_S " failed with error " RED_BOLD("%s"), cuda_tag(), m_threadId, error); + } } + + return result; } diff --git a/src/backend/cuda/runners/CudaBaseRunner.h b/src/backend/cuda/runners/CudaBaseRunner.h index 4ac04d4de..c0e1aef09 100644 --- a/src/backend/cuda/runners/CudaBaseRunner.h +++ b/src/backend/cuda/runners/CudaBaseRunner.h @@ -52,7 +52,7 @@ protected: size_t intensity() const override; protected: - void printError(const char *error) const; + bool callWrapper(bool result) const; const CudaLaunchData &m_data; const size_t m_threadId; diff --git a/src/backend/cuda/runners/CudaCnRunner.cpp b/src/backend/cuda/runners/CudaCnRunner.cpp index c4852972c..4d79efe36 100644 --- a/src/backend/cuda/runners/CudaCnRunner.cpp +++ b/src/backend/cuda/runners/CudaCnRunner.cpp @@ -34,11 +34,5 @@ xmrig::CudaCnRunner::CudaCnRunner(size_t index, const CudaLaunchData &data) : Cu bool xmrig::CudaCnRunner::run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) { - if (!CudaLib::cnHash(m_ctx, startNonce, m_height, m_target, rescount, resnonce)) { - printError(CudaLib::lastError(m_ctx)); - - return false; - } - - return true; + return callWrapper(CudaLib::cnHash(m_ctx, startNonce, m_height, m_target, rescount, resnonce)); } diff --git a/src/backend/cuda/runners/CudaRxRunner.cpp b/src/backend/cuda/runners/CudaRxRunner.cpp index 92b2d9f90..83bf21ff6 100644 --- a/src/backend/cuda/runners/CudaRxRunner.cpp +++ b/src/backend/cuda/runners/CudaRxRunner.cpp @@ -24,19 +24,42 @@ #include "backend/cuda/runners/CudaRxRunner.h" +#include "backend/cuda/CudaLaunchData.h" +#include "backend/cuda/wrappers/CudaLib.h" +#include "base/net/stratum/Job.h" +#include "crypto/rx/Rx.h" +#include "crypto/rx/RxDataset.h" xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : CudaBaseRunner(index, data) { -} + m_intensity = m_data.thread.threads() * m_data.thread.blocks(); + const size_t scratchpads_size = m_intensity * m_data.algorithm.l3(); + const size_t num_scratchpads = scratchpads_size / m_data.algorithm.l3(); + if (m_intensity > num_scratchpads) { + m_intensity = num_scratchpads; + } -xmrig::CudaRxRunner::~CudaRxRunner() -{ + m_intensity -= m_intensity % 32; } bool xmrig::CudaRxRunner::run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) { - return false; + return callWrapper(CudaLib::rxHash(m_ctx, startNonce, m_target, rescount, resnonce)); +} + + +bool xmrig::CudaRxRunner::set(const Job &job, uint8_t *blob) +{ + const bool rc = CudaBaseRunner::set(job, blob); + if (!rc || m_ready) { + return rc; + } + + auto dataset = Rx::dataset(job, 0); + m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_intensity)); + + return m_ready; } diff --git a/src/backend/cuda/runners/CudaRxRunner.h b/src/backend/cuda/runners/CudaRxRunner.h index 8aba75f54..06ed1b90f 100644 --- a/src/backend/cuda/runners/CudaRxRunner.h +++ b/src/backend/cuda/runners/CudaRxRunner.h @@ -35,15 +35,17 @@ namespace xmrig { class CudaRxRunner : public CudaBaseRunner { public: - XMRIG_DISABLE_COPY_MOVE_DEFAULT(CudaRxRunner) - CudaRxRunner(size_t index, const CudaLaunchData &data); - ~CudaRxRunner() override; protected: + inline size_t intensity() const override { return m_intensity; } + bool run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) override; + bool set(const Job &job, uint8_t *blob) override; private: + bool m_ready = false; + size_t m_intensity = 0; }; diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index f89d1de18..4f8376e3b 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -58,6 +58,8 @@ static const char *kInit = "init"; static const char *kLastError = "lastError"; static const char *kPluginVersion = "pluginVersion"; static const char *kRelease = "release"; +static const char *kRxHash = "rxHash"; +static const char *kRxPrepare = "rxPrepare"; static const char *kSetJob = "setJob"; static const char *kSymbolNotFound = "symbol not found"; static const char *kVersion = "version"; @@ -76,6 +78,8 @@ using init_t = void (*)(); using lastError_t = const char * (*)(nvid_ctx *); using pluginVersion_t = const char * (*)(); using release_t = void (*)(nvid_ctx *); +using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); +using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, uint32_t); using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, int32_t); using version_t = uint32_t (*)(Version); @@ -93,6 +97,8 @@ static init_t pInit = nullptr; static lastError_t pLastError = nullptr; static pluginVersion_t pPluginVersion = nullptr; static release_t pRelease = nullptr; +static rxHash_t pRxHash = nullptr; +static rxPrepare_t pRxPrepare = nullptr; static setJob_t pSetJob = nullptr; static version_t pVersion = nullptr; @@ -144,6 +150,18 @@ bool xmrig::CudaLib::deviceInit(nvid_ctx *ctx) noexcept } +bool xmrig::CudaLib::rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept +{ + return pRxHash(ctx, startNonce, target, rescount, resnonce); +} + + +bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept +{ + return pRxPrepare(ctx, dataset, datasetSize, batchSize); +} + + bool xmrig::CudaLib::setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept { return pSetJob(ctx, data, size, algorithm); @@ -267,6 +285,8 @@ bool xmrig::CudaLib::load() DLSYM(LastError); DLSYM(PluginVersion); DLSYM(Release); + DLSYM(RxHash); + DLSYM(RxPrepare); DLSYM(SetJob); DLSYM(Version); } catch (std::exception &ex) { diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index 926fca569..b1215640d 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -72,6 +72,8 @@ public: static bool cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce); static bool deviceInit(nvid_ctx *ctx) noexcept; + static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; + static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept; static bool setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept; static const char *deviceName(nvid_ctx *ctx) noexcept; static const char *lastError(nvid_ctx *ctx) noexcept;