diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index aa0c30a16..a15ba810a 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -238,7 +238,7 @@ public: # ifdef XMRIG_ALGO_KAWPOW if (algo.family() == Algorithm::KAWPOW) { const uint32_t epoch = job.height() / KPHash::EPOCH_LENGTH; - mem_used = (KPCache::cache_size(epoch) + KPCache::dag_size(epoch)) / oneMiB; + mem_used = (KPCache::dag_size(epoch) + oneMiB - 1) / oneMiB; } # endif diff --git a/src/backend/cuda/runners/CudaKawPowRunner.cpp b/src/backend/cuda/runners/CudaKawPowRunner.cpp index 6cb11a26d..fa78c5f80 100644 --- a/src/backend/cuda/runners/CudaKawPowRunner.cpp +++ b/src/backend/cuda/runners/CudaKawPowRunner.cpp @@ -66,14 +66,15 @@ bool xmrig::CudaKawPowRunner::set(const Job &job, uint8_t *blob) const uint64_t start_ms = Chrono::steadyMSecs(); - const bool result = CudaLib::kawPowPrepare(m_ctx, cache.data(), cache.size(), cache.dag_size(epoch), height, dag_sizes); + const bool result = CudaLib::kawPowPrepare(m_ctx, cache.data(), cache.size(), cache.l1_cache(), cache.dag_size(epoch), height, dag_sizes); if (!result) { LOG_ERR("Failed to initialize DAG: %s", CudaLib::lastError(m_ctx)); } - - const int64_t dt = Chrono::steadyMSecs() - start_ms; - if (dt > 1000) { - LOG_INFO("%s " YELLOW("KawPow") " DAG for epoch " WHITE_BOLD("%u") " calculated " BLACK_BOLD("(%" PRIu64 "ms)"), Tags::nvidia(), epoch, dt); + else { + const int64_t dt = Chrono::steadyMSecs() - start_ms; + if (dt > 1000) { + LOG_INFO("%s " YELLOW("KawPow") " DAG for epoch " WHITE_BOLD("%u") " calculated " BLACK_BOLD("(%" PRIu64 "ms)"), Tags::nvidia(), epoch, dt); + } } return result; diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index 83625acc2..0829e0bb2 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -66,7 +66,7 @@ static const char *kRelease = "release"; static const char *kRxHash = "rxHash"; static const char *kRxPrepare = "rxPrepare"; static const char *kKawPowHash = "kawPowHash"; -static const char *kKawPowPrepare = "kawPowPrepare"; +static const char *kKawPowPrepare_v2 = "kawPowPrepare_v2"; static const char *kKawPowStopHash = "kawPowStopHash"; static const char *kSetJob = "setJob"; static const char *kSetJob_v2 = "setJob_v2"; @@ -92,7 +92,7 @@ using release_t = void (*)(nvid_ctx *); using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t); using kawPowHash_t = bool (*)(nvid_ctx *, uint8_t*, uint64_t, uint32_t *, uint32_t *, uint32_t *); -using kawPowPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, size_t, uint32_t, const uint64_t*); +using kawPowPrepare_v2_t = bool (*)(nvid_ctx *, const void *, size_t, const void *, size_t, uint32_t, const uint64_t*); using kawPowStopHash_t = bool (*)(nvid_ctx *); using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, int32_t); using setJob_v2_t = bool (*)(nvid_ctx *, const void *, size_t, const char *); @@ -118,7 +118,7 @@ static release_t pRelease = nullptr; static rxHash_t pRxHash = nullptr; static rxPrepare_t pRxPrepare = nullptr; static kawPowHash_t pKawPowHash = nullptr; -static kawPowPrepare_t pKawPowPrepare = nullptr; +static kawPowPrepare_v2_t pKawPowPrepare_v2 = nullptr; static kawPowStopHash_t pKawPowStopHash = nullptr; static setJob_t pSetJob = nullptr; static setJob_v2_t pSetJob_v2 = nullptr; @@ -214,9 +214,9 @@ bool xmrig::CudaLib::kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t targe } -bool xmrig::CudaLib::kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept +bool xmrig::CudaLib::kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, const void* dag_precalc, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept { - return pKawPowPrepare(ctx, cache, cache_size, dag_size, height, dag_sizes); + return pKawPowPrepare_v2(ctx, cache, cache_size, dag_precalc, dag_size, height, dag_sizes); } @@ -375,7 +375,7 @@ bool xmrig::CudaLib::load() DLSYM(AstroBWTHash); DLSYM(AstroBWTPrepare); DLSYM(KawPowHash); - DLSYM(KawPowPrepare); + DLSYM(KawPowPrepare_v2); DLSYM(KawPowStopHash); DLSYM(Version); diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index f1cd460f8..c058ffd59 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -81,7 +81,7 @@ public: static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept; static bool kawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t *skipped_hashes) noexcept; - static bool kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept; + static bool kawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, const void* dag_precalc, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept; static bool kawPowStopHash(nvid_ctx *ctx) noexcept; static bool setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept; static const char *deviceName(nvid_ctx *ctx) noexcept; diff --git a/src/crypto/kawpow/KPCache.cpp b/src/crypto/kawpow/KPCache.cpp index 61e1f9cb7..29bde372f 100644 --- a/src/crypto/kawpow/KPCache.cpp +++ b/src/crypto/kawpow/KPCache.cpp @@ -26,6 +26,7 @@ #include +#include #include "crypto/kawpow/KPCache.h" #include "3rdparty/libethash/data_sizes.h" @@ -83,8 +84,30 @@ bool KPCache::init(uint32_t epoch) cache.num_parent_nodes = cache.cache_size / sizeof(node); calculate_fast_mod_data(cache.num_parent_nodes, cache.reciprocal, cache.increment, cache.shift); - for (uint32_t i = 0; i < sizeof(m_l1Cache) / sizeof(node); ++i) { - ethash_calculate_dag_item_opt(((node*)m_l1Cache) + i, i, num_dataset_parents, &cache); + const uint64_t cache_nodes = (size + sizeof(node) * 4 - 1) / sizeof(node); + m_DAGCache.resize(cache_nodes * (sizeof(node) / sizeof(uint32_t))); + + // Init DAG cache + { + const uint64_t n = std::max(std::thread::hardware_concurrency(), 1U); + + std::vector threads; + threads.reserve(n); + + for (uint64_t i = 0; i < n; ++i) { + const uint32_t a = (cache_nodes * i) / n; + const uint32_t b = (cache_nodes * (i + 1)) / n; + + threads.emplace_back([this, a, b, cache_nodes, &cache]() { + for (uint32_t j = a; j < b; ++j) { + ethash_calculate_dag_item_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache); + } + }); + } + + for (auto& t : threads) { + t.join(); + } } m_size = size; diff --git a/src/crypto/kawpow/KPCache.h b/src/crypto/kawpow/KPCache.h index 3f18b0f50..0522e123f 100644 --- a/src/crypto/kawpow/KPCache.h +++ b/src/crypto/kawpow/KPCache.h @@ -30,6 +30,7 @@ #include "base/tools/Object.h" #include +#include namespace xmrig @@ -57,7 +58,7 @@ public: size_t size() const { return m_size; } uint32_t epoch() const { return m_epoch; } - const uint32_t* l1_cache() const { return m_l1Cache; } + const uint32_t* l1_cache() const { return m_DAGCache.data(); } static uint64_t cache_size(uint32_t epoch); static uint64_t dag_size(uint32_t epoch); @@ -71,7 +72,7 @@ private: VirtualMemory* m_memory = nullptr; size_t m_size = 0; uint32_t m_epoch = 0xFFFFFFFFUL; - uint32_t m_l1Cache[l1_cache_num_items] = {}; + std::vector m_DAGCache; };