From d4a302499689036dd7b9d413bbad8a3f707d8a61 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 26 Oct 2019 03:12:55 +0700 Subject: [PATCH] Implemented CUDA config generation. --- src/backend/cuda/CudaBackend.cpp | 10 +++- src/backend/cuda/CudaConfig.cpp | 24 ++++++-- src/backend/cuda/CudaConfig.h | 14 ++++- src/backend/cuda/CudaConfig_gen.h | 76 +++++++++++++++++++++++- src/backend/cuda/CudaThread.cpp | 40 ++++++++++++- src/backend/cuda/CudaThread.h | 29 +++++++-- src/backend/cuda/CudaThreads.cpp | 8 +++ src/backend/cuda/CudaThreads.h | 2 + src/backend/cuda/wrappers/CudaDevice.cpp | 12 ++-- src/backend/cuda/wrappers/CudaDevice.h | 2 +- src/backend/cuda/wrappers/CudaLib.cpp | 16 ++--- src/backend/cuda/wrappers/CudaLib.h | 6 +- src/backend/opencl/OclBackend.cpp | 2 +- src/backend/opencl/OclConfig.cpp | 5 +- src/backend/opencl/OclConfig.h | 2 +- src/backend/opencl/OclThreads.cpp | 2 +- 16 files changed, 208 insertions(+), 42 deletions(-) diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index a7c315ec2..1a4f9294b 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -97,10 +97,14 @@ public: return printDisabled(RED_S " (no devices)"); } + if (!devices.empty()) { + return; + } + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%u.%u") "/" WHITE_BOLD("%u.%u") BLACK_BOLD("/%s"), "CUDA", runtimeVersion / 1000, runtimeVersion % 100, driverVersion / 1000, driverVersion % 100, CudaLib::pluginVersion()); - devices = CudaLib::devices(); + devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep()); for (const CudaDevice &device : devices) { Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("#%zu") YELLOW(" %s") GREEN_BOLD(" %s ") WHITE_BOLD("%u/%u MHz") " smx:" WHITE_BOLD("%u") " arch:" WHITE_BOLD("%u%u") " mem:" CYAN("%zu/%zu") " MB", @@ -156,13 +160,13 @@ xmrig::CudaBackend::~CudaBackend() bool xmrig::CudaBackend::isEnabled() const { - return false; + return d_ptr->controller->config()->cuda().isEnabled() && CudaLib::isInitialized() && !d_ptr->devices.empty();; } bool xmrig::CudaBackend::isEnabled(const Algorithm &algorithm) const { - return false; + return !d_ptr->controller->config()->cuda().threads().get(algorithm).isEmpty(); } diff --git a/src/backend/cuda/CudaConfig.cpp b/src/backend/cuda/CudaConfig.cpp index ee4008222..084fb702e 100644 --- a/src/backend/cuda/CudaConfig.cpp +++ b/src/backend/cuda/CudaConfig.cpp @@ -25,6 +25,7 @@ #include "backend/cuda/CudaConfig.h" #include "backend/cuda/CudaConfig_gen.h" +#include "backend/cuda/wrappers/CudaLib.h" #include "base/io/json/Json.h" #include "base/io/log/Log.h" #include "rapidjson/document.h" @@ -91,13 +92,26 @@ void xmrig::CudaConfig::generate() return; } + if (!CudaLib::init(loader())) { + return; + } + + if (!CudaLib::runtimeVersion() || !CudaLib::driverVersion() || !CudaLib::deviceCount()) { + return; + } + + const auto devices = CudaLib::devices(bfactor(), bsleep()); + if (devices.empty()) { + return; + } + size_t count = 0; -// count += xmrig::generate(m_threads, devices); -// count += xmrig::generate(m_threads, devices); -// count += xmrig::generate(m_threads, devices); -// count += xmrig::generate(m_threads, devices); -// count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); m_shouldSave = count > 0; } diff --git a/src/backend/cuda/CudaConfig.h b/src/backend/cuda/CudaConfig.h index 16c06e60a..5c392d2c5 100644 --- a/src/backend/cuda/CudaConfig.h +++ b/src/backend/cuda/CudaConfig.h @@ -45,16 +45,26 @@ public: inline bool isShouldSave() const { return m_shouldSave; } inline const String &loader() const { return m_loader; } inline const Threads &threads() const { return m_threads; } + inline int32_t bfactor() const { return m_bfactor; } + inline int32_t bsleep() const { return m_bsleep; } private: void generate(); void setDevicesHint(const char *devicesHint); - bool m_enabled = false; - bool m_shouldSave = false; + bool m_enabled = false; + bool m_shouldSave = false; std::vector m_devicesHint; String m_loader; Threads m_threads; + +# ifdef _WIN32 + int32_t m_bfactor = 6; + int32_t m_bsleep = 25; +# else + int32_t m_bfactor = 0; + int32_t m_bsleep = 0; +# endif }; diff --git a/src/backend/cuda/CudaConfig_gen.h b/src/backend/cuda/CudaConfig_gen.h index 757635ee2..d7c913f59 100644 --- a/src/backend/cuda/CudaConfig_gen.h +++ b/src/backend/cuda/CudaConfig_gen.h @@ -22,12 +22,13 @@ * along with this program. If not, see . */ -#ifndef XMRIG_OCLCONFIG_GEN_H -#define XMRIG_OCLCONFIG_GEN_H +#ifndef XMRIG_CUDACONFIG_GEN_H +#define XMRIG_CUDACONFIG_GEN_H #include "backend/common/Threads.h" #include "backend/cuda/CudaThreads.h" +#include "backend/cuda/wrappers/CudaDevice.h" #include @@ -36,7 +37,76 @@ namespace xmrig { +static inline size_t generate(const char *key, Threads &threads, const Algorithm &algorithm, const std::vector &devices) +{ + if (threads.isExist(algorithm) || threads.has(key)) { + return 0; + } + + return threads.move(key, CudaThreads(devices, algorithm)); +} + + +template +static inline size_t generate(Threads &, const std::vector &) { return 0; } + + +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + size_t count = 0; + + count += generate("cn", threads, Algorithm::CN_1, devices); + count += generate("cn/2", threads, Algorithm::CN_2, devices); + + if (!threads.isExist(Algorithm::CN_0)) { + threads.disable(Algorithm::CN_0); + count++; + } + +# ifdef XMRIG_ALGO_CN_GPU + count += generate("cn/gpu", threads, Algorithm::CN_GPU, devices); +# endif + + return count; +} + + +#ifdef XMRIG_ALGO_CN_LITE +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + size_t count = generate("cn-lite", threads, Algorithm::CN_LITE_1, devices); + + if (!threads.isExist(Algorithm::CN_LITE_0)) { + threads.disable(Algorithm::CN_LITE_0); + ++count; + } + + return count; +} +#endif + + +#ifdef XMRIG_ALGO_CN_HEAVY +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + return generate("cn-heavy", threads, Algorithm::CN_HEAVY_0, devices); +} +#endif + + +#ifdef XMRIG_ALGO_CN_PICO +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices); +} +#endif + + } /* namespace xmrig */ -#endif /* XMRIG_OCLCONFIG_GEN_H */ +#endif /* XMRIG_CUDACONFIG_GEN_H */ diff --git a/src/backend/cuda/CudaThread.cpp b/src/backend/cuda/CudaThread.cpp index 9dc77c4f6..3100b662c 100644 --- a/src/backend/cuda/CudaThread.cpp +++ b/src/backend/cuda/CudaThread.cpp @@ -24,7 +24,7 @@ #include "backend/cuda/CudaThread.h" - +#include "backend/cuda/wrappers/CudaLib.h" #include "base/io/json/Json.h" #include "rapidjson/document.h" @@ -34,6 +34,12 @@ namespace xmrig { +static const char *kAffinity = "affinity"; +static const char *kBFactor = "bfactor"; +static const char *kBlocks = "blocks"; +static const char *kBSleep = "bsleep"; +static const char *kIndex = "index"; +static const char *kThreads = "threads"; } // namespace xmrig @@ -43,12 +49,35 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value) if (!value.IsObject()) { return; } + + m_index = Json::getUint(value, kIndex); + m_threads = Json::getInt(value, kThreads); + m_blocks = Json::getInt(value, kBlocks); + m_bfactor = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u); + m_bsleep = Json::getUint(value, kBSleep, m_bsleep); + m_affinity = Json::getUint64(value, kAffinity, m_affinity); +} + + +xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) : + m_blocks(CudaLib::deviceInt(ctx, CudaLib::DeviceBlocks)), + m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)), + m_index(index), + m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)), + m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep)) +{ + } bool xmrig::CudaThread::isEqual(const CudaThread &other) const { - return false; + return m_blocks == other.m_blocks && + m_threads == other.m_threads && + m_affinity == other.m_affinity && + m_index == other.m_index && + m_bfactor == other.m_bfactor && + m_bsleep == other.m_bsleep; } @@ -59,5 +88,12 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const Value out(kObjectType); + out.AddMember(StringRef(kIndex), index(), allocator); + out.AddMember(StringRef(kThreads), threads(), allocator); + out.AddMember(StringRef(kBlocks), blocks(), allocator); + out.AddMember(StringRef(kBFactor), bfactor(), allocator); + out.AddMember(StringRef(kBSleep), bsleep(), allocator); + out.AddMember(StringRef(kAffinity), affinity(), allocator); + return out; } diff --git a/src/backend/cuda/CudaThread.h b/src/backend/cuda/CudaThread.h index ececf3ada..f6523d955 100644 --- a/src/backend/cuda/CudaThread.h +++ b/src/backend/cuda/CudaThread.h @@ -26,14 +26,13 @@ #define XMRIG_CUDATHREAD_H +using nvid_ctx = struct nvid_ctx; + + #include "crypto/common/Algorithm.h" #include "rapidjson/fwd.h" -#include -#include - - namespace xmrig { @@ -41,10 +40,16 @@ class CudaThread { public: CudaThread() = delete; - CudaThread(const rapidjson::Value &value); + CudaThread(uint32_t index, nvid_ctx *ctx); - inline bool isValid() const { return false; } + inline bool isValid() const { return m_blocks > 0 && m_threads > 0; } + inline int32_t bfactor() const { return static_cast(m_bfactor); } + inline int32_t blocks() const { return m_blocks; } + inline int32_t bsleep() const { return static_cast(m_bsleep); } + inline int32_t threads() const { return m_threads; } + inline int64_t affinity() const { return m_affinity; } + inline uint32_t index() const { return m_index; } inline bool operator!=(const CudaThread &other) const { return !isEqual(other); } inline bool operator==(const CudaThread &other) const { return isEqual(other); } @@ -53,6 +58,18 @@ public: rapidjson::Value toJSON(rapidjson::Document &doc) const; private: + int32_t m_blocks = 0; + int32_t m_threads = 0; + int64_t m_affinity = -1; + uint32_t m_index = 0; + +# ifdef _WIN32 + uint32_t m_bfactor = 6; + uint32_t m_bsleep = 25; +# else + uint32_t m_bfactor = 0; + uint32_t m_bsleep = 0; +# endif }; diff --git a/src/backend/cuda/CudaThreads.cpp b/src/backend/cuda/CudaThreads.cpp index 9c8b1531b..5ff4cb24b 100644 --- a/src/backend/cuda/CudaThreads.cpp +++ b/src/backend/cuda/CudaThreads.cpp @@ -44,6 +44,14 @@ xmrig::CudaThreads::CudaThreads(const rapidjson::Value &value) } +xmrig::CudaThreads::CudaThreads(const std::vector &devices, const Algorithm &algorithm) +{ + for (const auto &device : devices) { + device.generate(algorithm, *this); + } +} + + bool xmrig::CudaThreads::isEqual(const CudaThreads &other) const { if (isEmpty() && other.isEmpty()) { diff --git a/src/backend/cuda/CudaThreads.h b/src/backend/cuda/CudaThreads.h index 3c7627228..5f174d8eb 100644 --- a/src/backend/cuda/CudaThreads.h +++ b/src/backend/cuda/CudaThreads.h @@ -30,6 +30,7 @@ #include "backend/cuda/CudaThread.h" +#include "backend/cuda/wrappers/CudaDevice.h" namespace xmrig { @@ -40,6 +41,7 @@ class CudaThreads public: CudaThreads() = default; CudaThreads(const rapidjson::Value &value); + CudaThreads(const std::vector &devices, const Algorithm &algorithm); inline bool isEmpty() const { return m_data.empty(); } inline const std::vector &data() const { return m_data; } diff --git a/src/backend/cuda/wrappers/CudaDevice.cpp b/src/backend/cuda/wrappers/CudaDevice.cpp index 03646eae3..7eb947fc2 100644 --- a/src/backend/cuda/wrappers/CudaDevice.cpp +++ b/src/backend/cuda/wrappers/CudaDevice.cpp @@ -34,12 +34,11 @@ #include - -xmrig::CudaDevice::CudaDevice(uint32_t index) : +xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) : m_index(index) { - auto ctx = CudaLib::alloc(index, 0, 0, 0, 0, Algorithm::INVALID); - if (CudaLib::deviceInfo(ctx) != 0) { + auto ctx = CudaLib::alloc(index, bfactor, bsleep); + if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) { CudaLib::release(ctx); return; @@ -105,6 +104,11 @@ uint32_t xmrig::CudaDevice::smx() const void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const { + if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) { + return; + } + + threads.add(CudaThread(m_index, m_ctx)); } diff --git a/src/backend/cuda/wrappers/CudaDevice.h b/src/backend/cuda/wrappers/CudaDevice.h index c0df83c83..40b8ee11f 100644 --- a/src/backend/cuda/wrappers/CudaDevice.h +++ b/src/backend/cuda/wrappers/CudaDevice.h @@ -46,7 +46,7 @@ public: CudaDevice() = delete; CudaDevice(const CudaDevice &other) = delete; CudaDevice(CudaDevice &&other) noexcept; - CudaDevice(uint32_t index); + CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep); ~CudaDevice(); size_t freeMemSize() const; diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index a8ca56015..58ce66bb5 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -58,9 +58,9 @@ static const char *kSymbolNotFound = "symbol not found"; static const char *kVersion = "version"; -using alloc_t = nvid_ctx * (*)(size_t, int32_t, int32_t, int32_t, int32_t, int32_t); +using alloc_t = nvid_ctx * (*)(uint32_t, int32_t, int32_t); using deviceCount_t = uint32_t (*)(); -using deviceInfo_t = int32_t (*)(nvid_ctx *); +using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t); using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); using deviceName_t = const char * (*)(nvid_ctx *); using deviceUint_t = uint32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); @@ -129,9 +129,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept } -int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx) noexcept +int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept { - return pDeviceInfo(ctx); + return pDeviceInfo(ctx, blocks, threads, algorithm); } @@ -141,13 +141,13 @@ int32_t xmrig::CudaLib::deviceInt(nvid_ctx *ctx, DeviceProperty property) noexce } -nvid_ctx *xmrig::CudaLib::alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept +nvid_ctx *xmrig::CudaLib::alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept { - return pAlloc(id, blocks, threads, bfactor, bsleep, algorithm); + return pAlloc(id, bfactor, bsleep); } -std::vector xmrig::CudaLib::devices() noexcept +std::vector xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep) noexcept { const uint32_t count = deviceCount(); if (!count) { @@ -158,7 +158,7 @@ std::vector xmrig::CudaLib::devices() noexcept out.reserve(count); for (uint32_t i = 0; i < count; ++i) { - CudaDevice device(i); + CudaDevice device(i, bfactor, bsleep); if (device.isValid()) { out.emplace_back(std::move(device)); } diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index e479102b6..253a15a4f 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -72,10 +72,10 @@ public: static const char *deviceName(nvid_ctx *ctx) noexcept; static const char *pluginVersion() noexcept; - static int deviceInfo(nvid_ctx *ctx) noexcept; + static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept; static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept; - static nvid_ctx *alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept; - static std::vector devices() noexcept; + static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept; + static std::vector devices(int32_t bfactor, int32_t bsleep) noexcept; static uint32_t deviceCount() noexcept; static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept; static uint32_t driverVersion() noexcept; diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index e0c197d8b..91c1eb6c5 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -319,7 +319,7 @@ void xmrig::OclBackend::setJob(const Job &job) return stop(); } - std::vector threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag); + std::vector threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices); if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) { return; } diff --git a/src/backend/opencl/OclConfig.cpp b/src/backend/opencl/OclConfig.cpp index 71a669d6d..21cccabfa 100644 --- a/src/backend/opencl/OclConfig.cpp +++ b/src/backend/opencl/OclConfig.cpp @@ -24,6 +24,7 @@ #include "backend/opencl/OclConfig.h" +#include "backend/common/Tags.h" #include "backend/opencl/OclConfig_gen.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/io/json/Json.h" @@ -113,7 +114,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const } -std::vector xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices, const char *tag) const +std::vector xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices) const { std::vector out; const OclThreads &threads = m_threads.get(algorithm); @@ -126,7 +127,7 @@ std::vector xmrig::OclConfig::get(const Miner *miner, cons for (const OclThread &thread : threads.data()) { if (thread.index() >= devices.size()) { - LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), tag, thread.index()); + LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), ocl_tag(), thread.index()); continue; } diff --git a/src/backend/opencl/OclConfig.h b/src/backend/opencl/OclConfig.h index 9dd5ad1da..8e2db0424 100644 --- a/src/backend/opencl/OclConfig.h +++ b/src/backend/opencl/OclConfig.h @@ -42,7 +42,7 @@ public: OclPlatform platform() const; rapidjson::Value toJSON(rapidjson::Document &doc) const; - std::vector get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices, const char *tag) const; + std::vector get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices) const; void read(const rapidjson::Value &value); inline bool isCacheEnabled() const { return m_cache; } diff --git a/src/backend/opencl/OclThreads.cpp b/src/backend/opencl/OclThreads.cpp index 167ccfc0b..3e53a5f5d 100644 --- a/src/backend/opencl/OclThreads.cpp +++ b/src/backend/opencl/OclThreads.cpp @@ -46,7 +46,7 @@ xmrig::OclThreads::OclThreads(const rapidjson::Value &value) xmrig::OclThreads::OclThreads(const std::vector &devices, const Algorithm &algorithm) { - for (const OclDevice &device : devices) { + for (const auto &device : devices) { device.generate(algorithm, *this); } }