diff --git a/src/backend/cpu/CpuBackend.cpp b/src/backend/cpu/CpuBackend.cpp index d6d3ff14c..78b71e25a 100644 --- a/src/backend/cpu/CpuBackend.cpp +++ b/src/backend/cpu/CpuBackend.cpp @@ -111,13 +111,13 @@ public: return; } - LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%zu/%zu %1.0f%%" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"), + LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%1.0f%% %zu/%zu" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"), tag, m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S, m_started, m_threads, m_ways, (m_hugePages == m_pages ? GREEN_BOLD_S : (m_hugePages == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), - m_hugePages, m_pages, m_hugePages == 0 ? 0.0 : static_cast(m_hugePages) / m_pages * 100.0, + m_hugePages, m_pages, memory() / 1024, Chrono::steadyMSecs() - m_ts ); diff --git a/src/backend/opencl/runners/OclRxBaseRunner.cpp b/src/backend/opencl/runners/OclRxBaseRunner.cpp index fa0259b49..278c4f768 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.cpp +++ b/src/backend/opencl/runners/OclRxBaseRunner.cpp @@ -122,7 +122,7 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob) m_seed = job.seed(); auto dataset = Rx::dataset(job, 0); - enqueueWriteBuffer(m_dataset, CL_TRUE, 0, dataset->size(), dataset->raw()); + enqueueWriteBuffer(m_dataset, CL_TRUE, 0, RxDataset::maxSize(), dataset->raw()); } if (job.size() < Job::kMaxBlobSize) { diff --git a/src/crypto/rx/RxBasicStorage.cpp b/src/crypto/rx/RxBasicStorage.cpp index 6b1509cc8..dcabad5b0 100644 --- a/src/crypto/rx/RxBasicStorage.cpp +++ b/src/crypto/rx/RxBasicStorage.cpp @@ -29,6 +29,7 @@ #include "backend/common/Tags.h" #include "base/io/log/Log.h" #include "base/tools/Chrono.h" +#include "base/tools/Object.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" #include "crypto/rx/RxDataset.h" @@ -44,6 +45,14 @@ constexpr size_t oneMiB = 1024 * 1024; class RxBasicStoragePrivate { public: + XMRIG_DISABLE_COPY_MOVE(RxBasicStoragePrivate) + + inline RxBasicStoragePrivate() = default; + inline ~RxBasicStoragePrivate() + { + delete m_dataset; + } + inline bool isReady(const Job &job) const { return m_ready && m_seed == job; } inline RxDataset *dataset() const { return m_dataset; } @@ -69,8 +78,10 @@ public: } - inline void initDataset(uint32_t threads, uint64_t ts) + inline void initDataset(uint32_t threads) { + const uint64_t ts = Chrono::steadyMSecs(); + m_dataset->init(m_seed.data(), threads); LOG_INFO("%s" GREEN_BOLD("dataset ready") BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); @@ -86,15 +97,15 @@ private: const auto pages = m_dataset->hugePages(); const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; - LOG_INFO("%s" GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") BLACK_BOLD(" (%zu+%zu)") " huge pages %s%u/%u %1.0f%%" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), + LOG_INFO("%s" GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") BLACK_BOLD(" (%zu+%zu)") " huge pages %s%1.0f%% %u/%u" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), - (RxDataset::maxSize() + RxCache::maxSize()) / oneMiB, + m_dataset->size() / oneMiB, RxDataset::maxSize() / oneMiB, RxCache::maxSize() / oneMiB, (pages.first == pages.second ? GREEN_BOLD_S : (pages.first == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), + percent, pages.first, pages.second, - percent, m_dataset->cache()->isJIT() ? GREEN_BOLD_S "+" : RED_BOLD_S "-", Chrono::steadyMSecs() - ts ); @@ -148,13 +159,11 @@ std::pair xmrig::RxBasicStorage::hugePages() const void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages) { - const uint64_t ts = Chrono::steadyMSecs(); - d_ptr->setSeed(seed); if (!d_ptr->dataset()) { d_ptr->createDataset(hugePages); } - d_ptr->initDataset(threads, ts); + d_ptr->initDataset(threads); } diff --git a/src/crypto/rx/RxCache.cpp b/src/crypto/rx/RxCache.cpp index e8fcb6857..a248ea5cc 100644 --- a/src/crypto/rx/RxCache.cpp +++ b/src/crypto/rx/RxCache.cpp @@ -25,8 +25,9 @@ */ -#include "crypto/randomx/randomx.h" #include "crypto/rx/RxCache.h" +#include "crypto/common/VirtualMemory.h" +#include "crypto/randomx/randomx.h" static_assert(RANDOMX_FLAG_JIT == 8, "RANDOMX_FLAG_JIT flag mismatch"); @@ -72,3 +73,17 @@ bool xmrig::RxCache::init(const Buffer &seed) return true; } + + +std::pair xmrig::RxCache::hugePages() const +{ + constexpr size_t twoMiB = 2u * 1024u * 1024u; + constexpr size_t total = VirtualMemory::align(maxSize(), twoMiB) / twoMiB; + + uint32_t count = 0; + if (isHugePages()) { + count += total; + } + + return { count, total }; +} diff --git a/src/crypto/rx/RxCache.h b/src/crypto/rx/RxCache.h index f01e50872..84635292b 100644 --- a/src/crypto/rx/RxCache.h +++ b/src/crypto/rx/RxCache.h @@ -55,8 +55,10 @@ public: inline bool isJIT() const { return m_flags & 8; } inline const Buffer &seed() const { return m_seed; } inline randomx_cache *get() const { return m_cache; } + inline size_t size() const { return maxSize(); } bool init(const Buffer &seed); + std::pair hugePages() const; static inline constexpr size_t maxSize() { return RANDOMX_CACHE_MAX_SIZE; } diff --git a/src/crypto/rx/RxDataset.cpp b/src/crypto/rx/RxDataset.cpp index f5572c2a8..62887a01c 100644 --- a/src/crypto/rx/RxDataset.cpp +++ b/src/crypto/rx/RxDataset.cpp @@ -48,6 +48,12 @@ xmrig::RxDataset::RxDataset(bool hugePages, bool cache) } +xmrig::RxDataset::RxDataset(RxCache *cache) : + m_cache(cache) +{ +} + + xmrig::RxDataset::~RxDataset() { if (m_dataset) { @@ -94,6 +100,22 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads) } +size_t xmrig::RxDataset::size(bool cache) const +{ + size_t size = 0; + + if (m_dataset) { + size += maxSize(); + } + + if (cache && m_cache) { + size += RxCache::maxSize(); + } + + return size; +} + + std::pair xmrig::RxDataset::hugePages(bool cache) const { constexpr size_t twoMiB = 2u * 1024u * 1024u; @@ -123,6 +145,16 @@ void *xmrig::RxDataset::raw() const } +void xmrig::RxDataset::setRaw(const void *raw) +{ + if (!m_dataset) { + return; + } + + memcpy(randomx_get_dataset_memory(m_dataset), raw, maxSize()); +} + + void xmrig::RxDataset::allocate(bool hugePages) { if (hugePages) { diff --git a/src/crypto/rx/RxDataset.h b/src/crypto/rx/RxDataset.h index 5ca10e004..9b4f41204 100644 --- a/src/crypto/rx/RxDataset.h +++ b/src/crypto/rx/RxDataset.h @@ -50,23 +50,25 @@ public: XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxDataset) RxDataset(bool hugePages, bool cache); + RxDataset(RxCache *cache); ~RxDataset(); - inline bool isHugePages() const { return m_flags & 1; } - inline randomx_dataset *get() const { return m_dataset; } - inline RxCache *cache() const { return m_cache; } - inline size_t size() const { return maxSize(); } + inline bool isHugePages() const { return m_flags & 1; } + inline randomx_dataset *get() const { return m_dataset; } + inline RxCache *cache() const { return m_cache; } + inline void setCache(RxCache *cache) { m_cache = cache; } bool init(const Buffer &seed, uint32_t numThreads); + size_t size(bool cache = true) const; std::pair hugePages(bool cache = true) const; void *raw() const; + void setRaw(const void *raw); static inline constexpr size_t maxSize() { return RANDOMX_DATASET_MAX_SIZE; } private: void allocate(bool hugePages); - Algorithm m_algorithm; int m_flags = 0; randomx_dataset *m_dataset = nullptr; RxCache *m_cache = nullptr; diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp index 3d5b999a9..f6fffadd8 100644 --- a/src/crypto/rx/RxNUMAStorage.cpp +++ b/src/crypto/rx/RxNUMAStorage.cpp @@ -27,8 +27,12 @@ #include "crypto/rx/RxNUMAStorage.h" #include "backend/common/Tags.h" +#include "backend/cpu/Cpu.h" +#include "backend/cpu/platform/HwlocCpuInfo.h" #include "base/io/log/Log.h" +#include "base/kernel/Platform.h" #include "base/tools/Chrono.h" +#include "base/tools/Object.h" #include "crypto/rx/RxAlgo.h" #include "crypto/rx/RxCache.h" #include "crypto/rx/RxDataset.h" @@ -36,19 +40,64 @@ #include +#include +#include +#include namespace xmrig { constexpr size_t oneMiB = 1024 * 1024; +static std::mutex mutex; + + +static bool bindToNUMANode(uint32_t nodeId) +{ + auto cpu = static_cast(Cpu::info()); + hwloc_obj_t node = hwloc_get_numanode_obj_by_os_index(cpu->topology(), nodeId); + if (!node) { + return false; + } + + if (cpu->membind(node->nodeset)) { + Platform::setThreadAffinity(static_cast(hwloc_bitmap_first(node->cpuset))); + + return true; + } + + return false; +} + + +static inline void printSkipped(uint32_t nodeId, const char *reason) +{ + LOG_WARN("%s" CYAN_BOLD("#%u ") RED_BOLD("skipped") YELLOW(" (%s)"), rx_tag(), nodeId, reason); +} + + +static inline void printDatasetReady(uint32_t nodeId, uint64_t ts) +{ + LOG_INFO("%s" CYAN_BOLD("#%u ") GREEN_BOLD("dataset ready") BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), nodeId, Chrono::steadyMSecs() - ts); +} class RxNUMAStoragePrivate { public: - inline bool isReady(const Job &job) const { return m_ready && m_seed == job; } - inline RxDataset *dataset() const { return m_dataset; } + XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxNUMAStoragePrivate) + + inline RxNUMAStoragePrivate(const std::vector &nodeset) : m_nodeset(nodeset) {} + inline ~RxNUMAStoragePrivate() + { + for (auto const &item : m_datasets) { + delete item.second; + } + } + + inline bool isAllocated() const { return m_allocated; } + inline bool isReady(const Job &job) const { return m_ready && m_seed == job; } + inline RxDataset *dataset(uint32_t nodeId) const { return m_datasets.count(nodeId) ? m_datasets.at(nodeId) : m_datasets.at(m_nodeset.front()); } inline void setSeed(const RxSeed &seed) @@ -63,53 +112,190 @@ public: } - inline void createDataset(bool hugePages) + inline void createDatasets(bool hugePages) { const uint64_t ts = Chrono::steadyMSecs(); - m_dataset = new RxDataset(hugePages, true); - printAllocStatus(ts); + std::vector threads; + threads.reserve(m_nodeset.size()); + + for (uint32_t node : m_nodeset) { + threads.emplace_back(allocate, this, node, hugePages); + } + + for (auto &thread : threads) { + thread.join(); + } + + std::thread thread(allocateCache, this, m_nodeset.front(), hugePages); + thread.join(); + + if (m_datasets.empty()) { + m_datasets.insert({ m_nodeset.front(), new RxDataset(m_cache) }); + + LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "failed to allocate RandomX datasets, switching to slow mode" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); + } + else { + dataset(m_nodeset.front())->setCache(m_cache); + + printAllocStatus(ts); + } + + m_allocated = true; } - inline void initDataset(uint32_t threads, uint64_t ts) + inline void initDatasets(uint32_t threads) { - m_dataset->init(m_seed.data(), threads); + uint64_t ts = Chrono::steadyMSecs(); + auto id = m_nodeset.front(); + auto primary = dataset(id); - LOG_INFO("%s" GREEN_BOLD("dataset ready") BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); + primary->init(m_seed.data(), threads); + + printDatasetReady(id, ts); + + if (m_datasets.size() > 1) { + std::vector threads; + threads.reserve(m_datasets.size() - 1); + + for (auto const &item : m_datasets) { + if (item.first == id) { + continue; + } + + threads.emplace_back(copyDataset, item.second, item.first, primary->raw()); + } + + for (auto &thread : threads) { + thread.join(); + } + } m_ready = true; } -private: - void printAllocStatus(uint64_t ts) + inline std::pair hugePages() const { - if (m_dataset->get() != nullptr) { - const auto pages = m_dataset->hugePages(); - const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + auto pages = m_cache->hugePages(); + for (auto const &item : m_datasets) { + const auto p = item.second->hugePages(false); + pages.first += p.first; + pages.second += p.second; + } - LOG_INFO("%s" GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") BLACK_BOLD(" (%zu+%zu)") " huge pages %s%u/%u %1.0f%%" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), - rx_tag(), - (RxDataset::maxSize() + RxCache::maxSize()) / oneMiB, - RxDataset::maxSize() / oneMiB, - RxCache::maxSize() / oneMiB, - (pages.first == pages.second ? GREEN_BOLD_S : (pages.first == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), - pages.first, - pages.second, - percent, - m_dataset->cache()->isJIT() ? GREEN_BOLD_S "+" : RED_BOLD_S "-", - Chrono::steadyMSecs() - ts - ); - } - else { - LOG_WARN(CLEAR "%s" YELLOW_BOLD_S "failed to allocate RandomX dataset, switching to slow mode" BLACK_BOLD(" (%" PRIu64 " ms)"), rx_tag(), Chrono::steadyMSecs() - ts); - } + return pages; } - bool m_ready = false; - RxDataset *m_dataset = nullptr; +private: + static void allocate(RxNUMAStoragePrivate *d_ptr, uint32_t nodeId, bool hugePages) + { + const uint64_t ts = Chrono::steadyMSecs(); + + if (!bindToNUMANode(nodeId)) { + printSkipped(nodeId, "can't bind memory"); + + return; + } + + auto dataset = new RxDataset(hugePages, false); + if (!dataset->get()) { + printSkipped(nodeId, "failed to allocate dataset"); + + delete dataset; + return; + } + + std::lock_guard lock(mutex); + d_ptr->m_datasets.insert({ nodeId, dataset }); + d_ptr->printAllocStatus(dataset, nodeId, ts); + } + + + static void allocateCache(RxNUMAStoragePrivate *d_ptr, uint32_t nodeId, bool hugePages) + { + const uint64_t ts = Chrono::steadyMSecs(); + + bindToNUMANode(nodeId); + + auto cache = new RxCache(hugePages); + + std::lock_guard lock(mutex); + d_ptr->m_cache = cache; + d_ptr->printAllocStatus(cache, nodeId, ts); + } + + + static void copyDataset(RxDataset *dst, uint32_t nodeId, const void *raw) + { + const uint64_t ts = Chrono::steadyMSecs(); + + dst->setRaw(raw); + + printDatasetReady(nodeId, ts); + } + + + void printAllocStatus(RxDataset *dataset, uint32_t nodeId, uint64_t ts) + { + const auto pages = dataset->hugePages(); + const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + + LOG_INFO("%s" CYAN_BOLD("#%u ") GREEN_BOLD("allocated") CYAN_BOLD(" %zu MB") " huge pages %s%3.0f%%" CLEAR BLACK_BOLD(" (%" PRIu64 " ms)"), + rx_tag(), + nodeId, + dataset->size() / oneMiB, + (pages.first == pages.second ? GREEN_BOLD_S : RED_BOLD_S), + percent, + Chrono::steadyMSecs() - ts + ); + } + + + void printAllocStatus(RxCache *cache, uint32_t nodeId, uint64_t ts) + { + const auto pages = cache->hugePages(); + const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + + LOG_INFO("%s" CYAN_BOLD("#%u ") GREEN_BOLD("allocated") CYAN_BOLD(" %4zu MB") " huge pages %s%3.0f%%" CLEAR " %sJIT" BLACK_BOLD(" (%" PRIu64 " ms)"), + rx_tag(), + nodeId, + cache->size() / oneMiB, + (pages.first == pages.second ? GREEN_BOLD_S : RED_BOLD_S), + percent, + cache->isJIT() ? GREEN_BOLD_S "+" : RED_BOLD_S "-", + Chrono::steadyMSecs() - ts + ); + } + + + void printAllocStatus(uint64_t ts) + { + size_t memory = m_cache->size(); + auto pages = hugePages(); + const double percent = pages.first == 0 ? 0.0 : static_cast(pages.first) / pages.second * 100.0; + + for (auto const &item : m_datasets) { + memory += item.second->size(false); + } + + LOG_INFO("%s" CYAN_BOLD("-- ") GREEN_BOLD("allocated") CYAN_BOLD(" %4zu MB") " huge pages %s%3.0f%% %u/%u" CLEAR BLACK_BOLD(" (%" PRIu64 " ms)"), + rx_tag(), + memory / oneMiB, + (pages.first == pages.second ? GREEN_BOLD_S : (pages.first == 0 ? RED_BOLD_S : YELLOW_BOLD_S)), + percent, + pages.first, + pages.second, + Chrono::steadyMSecs() - ts + ); + } + + + bool m_allocated = false; + bool m_ready = false; + RxCache *m_cache = nullptr; RxSeed m_seed; std::map m_datasets; std::vector m_nodeset; @@ -120,9 +306,8 @@ private: xmrig::RxNUMAStorage::RxNUMAStorage(const std::vector &nodeset) : - d_ptr(new RxNUMAStoragePrivate()) + d_ptr(new RxNUMAStoragePrivate(nodeset)) { - LOG_WARN(">>>>>> %zu", nodeset.size()); // FIXME } @@ -132,35 +317,33 @@ xmrig::RxNUMAStorage::~RxNUMAStorage() } -xmrig::RxDataset *xmrig::RxNUMAStorage::dataset(const Job &job, uint32_t) const +xmrig::RxDataset *xmrig::RxNUMAStorage::dataset(const Job &job, uint32_t nodeId) const { if (!d_ptr->isReady(job)) { return nullptr; } - return d_ptr->dataset(); + return d_ptr->dataset(nodeId); } std::pair xmrig::RxNUMAStorage::hugePages() const { - if (!d_ptr->dataset()) { + if (!d_ptr->isAllocated()) { return { 0u, 0u }; } - return d_ptr->dataset()->hugePages(); + return d_ptr->hugePages(); } void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages) { - const uint64_t ts = Chrono::steadyMSecs(); - d_ptr->setSeed(seed); - if (!d_ptr->dataset()) { - d_ptr->createDataset(hugePages); + if (!d_ptr->isAllocated()) { + d_ptr->createDatasets(hugePages); } - d_ptr->initDataset(threads, ts); + d_ptr->initDatasets(threads); } diff --git a/src/crypto/rx/RxVm.cpp b/src/crypto/rx/RxVm.cpp index 6426443a4..526b5ce69 100644 --- a/src/crypto/rx/RxVm.cpp +++ b/src/crypto/rx/RxVm.cpp @@ -41,11 +41,13 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes) m_flags |= RANDOMX_FLAG_FULL_MEM; } - if (dataset->cache()->isJIT()) { + if (!dataset->cache() || dataset->cache()->isJIT()) { m_flags |= RANDOMX_FLAG_JIT; + m_vm = randomx_create_vm(static_cast(m_flags), nullptr, dataset->get(), scratchpad); + } + else { + m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache()->get(), dataset->get(), scratchpad); } - - m_vm = randomx_create_vm(static_cast(m_flags), dataset->cache()->get(), dataset->get(), scratchpad); }