Implemented CUDA config generation.

2024-11-18 10:01:06 +00:00 · 2019-10-26 03:12:55 +07:00 · 2019-10-26 03:12:55 +07:00 · d4a3024996
commit d4a3024996
parent 77d5b73724
16 changed files with 208 additions and 42 deletions
--- a/src/backend/cuda/CudaBackend.cpp
+++ b/src/backend/cuda/CudaBackend.cpp
@ -97,10 +97,14 @@ public:
            return printDisabled(RED_S " (no devices)");
        }

+        if (!devices.empty()) {
+            return;
+        }
+
        Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%u.%u") "/" WHITE_BOLD("%u.%u") BLACK_BOLD("/%s"), "CUDA",
                   runtimeVersion / 1000, runtimeVersion % 100, driverVersion / 1000, driverVersion % 100, CudaLib::pluginVersion());

-        devices = CudaLib::devices();
+        devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep());

        for (const CudaDevice &device : devices) {
            Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("#%zu") YELLOW(" %s") GREEN_BOLD(" %s ") WHITE_BOLD("%u/%u MHz") " smx:" WHITE_BOLD("%u") " arch:" WHITE_BOLD("%u%u") " mem:" CYAN("%zu/%zu") " MB",
@ -156,13 +160,13 @@ xmrig::CudaBackend::~CudaBackend()

 bool xmrig::CudaBackend::isEnabled() const
 {
-    return false;
+    return d_ptr->controller->config()->cuda().isEnabled() && CudaLib::isInitialized() && !d_ptr->devices.empty();;
 }


 bool xmrig::CudaBackend::isEnabled(const Algorithm &algorithm) const
 {
-    return false;
+    return !d_ptr->controller->config()->cuda().threads().get(algorithm).isEmpty();
 }


--- a/src/backend/cuda/CudaConfig.cpp
+++ b/src/backend/cuda/CudaConfig.cpp
@ -25,6 +25,7 @@

 #include "backend/cuda/CudaConfig.h"
 #include "backend/cuda/CudaConfig_gen.h"
+#include "backend/cuda/wrappers/CudaLib.h"
 #include "base/io/json/Json.h"
 #include "base/io/log/Log.h"
 #include "rapidjson/document.h"
@ -91,13 +92,26 @@ void xmrig::CudaConfig::generate()
        return;
    }

+    if (!CudaLib::init(loader())) {
+        return;
+    }
+
+    if (!CudaLib::runtimeVersion() || !CudaLib::driverVersion() || !CudaLib::deviceCount()) {
+        return;
+    }
+
+    const auto devices = CudaLib::devices(bfactor(), bsleep());
+    if (devices.empty()) {
+        return;
+    }
+
    size_t count = 0;

-//    count += xmrig::generate<Algorithm::CN>(m_threads, devices);
-//    count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
-//    count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
-//    count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
-//    count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
+    count += xmrig::generate<Algorithm::CN>(m_threads, devices);
+    count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
+    count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
+    count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
+    count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);

    m_shouldSave = count > 0;
 }
--- a/src/backend/cuda/CudaConfig.h
+++ b/src/backend/cuda/CudaConfig.h
@ -45,6 +45,8 @@ public:
    inline bool isShouldSave() const                    { return m_shouldSave; }
    inline const String &loader() const                 { return m_loader; }
    inline const Threads<CudaThreads> &threads() const  { return m_threads; }
+    inline int32_t bfactor() const                      { return m_bfactor; }
+    inline int32_t bsleep() const                       { return m_bsleep; }

 private:
    void generate();
@ -55,6 +57,14 @@ private:
    std::vector<uint32_t> m_devicesHint;
    String m_loader;
    Threads<CudaThreads> m_threads;
+
+#   ifdef _WIN32
+    int32_t m_bfactor      = 6;
+    int32_t m_bsleep       = 25;
+#   else
+    int32_t m_bfactor      = 0;
+    int32_t m_bsleep       = 0;
+#   endif
 };


--- a/src/backend/cuda/CudaConfig_gen.h
+++ b/src/backend/cuda/CudaConfig_gen.h
@ -22,12 +22,13 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef XMRIG_OCLCONFIG_GEN_H
-#define XMRIG_OCLCONFIG_GEN_H
+#ifndef XMRIG_CUDACONFIG_GEN_H
+#define XMRIG_CUDACONFIG_GEN_H


 #include "backend/common/Threads.h"
 #include "backend/cuda/CudaThreads.h"
+#include "backend/cuda/wrappers/CudaDevice.h"


 #include <algorithm>
@ -36,7 +37,76 @@
 namespace xmrig {


+static inline size_t generate(const char *key, Threads<CudaThreads> &threads, const Algorithm &algorithm, const std::vector<CudaDevice> &devices)
+{
+    if (threads.isExist(algorithm) || threads.has(key)) {
+        return 0;
+    }
+
+    return threads.move(key, CudaThreads(devices, algorithm));
+}
+
+
+template<Algorithm::Family FAMILY>
+static inline size_t generate(Threads<CudaThreads> &, const std::vector<CudaDevice> &) { return 0; }
+
+
+template<>
+size_t inline generate<Algorithm::CN>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
+{
+    size_t count = 0;
+
+    count += generate("cn", threads, Algorithm::CN_1, devices);
+    count += generate("cn/2", threads, Algorithm::CN_2, devices);
+
+    if (!threads.isExist(Algorithm::CN_0)) {
+        threads.disable(Algorithm::CN_0);
+        count++;
+    }
+
+#   ifdef XMRIG_ALGO_CN_GPU
+    count += generate("cn/gpu", threads, Algorithm::CN_GPU, devices);
+#   endif
+
+    return count;
+}
+
+
+#ifdef XMRIG_ALGO_CN_LITE
+template<>
+size_t inline generate<Algorithm::CN_LITE>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
+{
+    size_t count = generate("cn-lite", threads, Algorithm::CN_LITE_1, devices);
+
+    if (!threads.isExist(Algorithm::CN_LITE_0)) {
+        threads.disable(Algorithm::CN_LITE_0);
+        ++count;
+    }
+
+    return count;
+}
+#endif
+
+
+#ifdef XMRIG_ALGO_CN_HEAVY
+template<>
+size_t inline generate<Algorithm::CN_HEAVY>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
+{
+    return generate("cn-heavy", threads, Algorithm::CN_HEAVY_0, devices);
+}
+#endif
+
+
+#ifdef XMRIG_ALGO_CN_PICO
+template<>
+size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
+{
+    return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices);
+}
+#endif
+
+
 } /* namespace xmrig */


-#endif /* XMRIG_OCLCONFIG_GEN_H */
+#endif /* XMRIG_CUDACONFIG_GEN_H */
--- a/src/backend/cuda/CudaThread.cpp
+++ b/src/backend/cuda/CudaThread.cpp
@ -24,7 +24,7 @@


 #include "backend/cuda/CudaThread.h"
-
+#include "backend/cuda/wrappers/CudaLib.h"
 #include "base/io/json/Json.h"
 #include "rapidjson/document.h"

@ -34,6 +34,12 @@

 namespace xmrig {

+static const char *kAffinity    = "affinity";
+static const char *kBFactor     = "bfactor";
+static const char *kBlocks      = "blocks";
+static const char *kBSleep      = "bsleep";
+static const char *kIndex       = "index";
+static const char *kThreads     = "threads";

 } // namespace xmrig

@ -43,12 +49,35 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value)
    if (!value.IsObject()) {
        return;
    }
+
+    m_index     = Json::getUint(value, kIndex);
+    m_threads   = Json::getInt(value, kThreads);
+    m_blocks    = Json::getInt(value, kBlocks);
+    m_bfactor   = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u);
+    m_bsleep    = Json::getUint(value, kBSleep, m_bsleep);
+    m_affinity  = Json::getUint64(value, kAffinity, m_affinity);
+}
+
+
+xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) :
+    m_blocks(CudaLib::deviceInt(ctx, CudaLib::DeviceBlocks)),
+    m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)),
+    m_index(index),
+    m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)),
+    m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep))
+{
+
 }


 bool xmrig::CudaThread::isEqual(const CudaThread &other) const
 {
-    return false;
+    return m_blocks     == other.m_blocks &&
+           m_threads    == other.m_threads &&
+           m_affinity   == other.m_affinity &&
+           m_index      == other.m_index &&
+           m_bfactor    == other.m_bfactor &&
+           m_bsleep     == other.m_bsleep;
 }


@ -59,5 +88,12 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const

    Value out(kObjectType);

+    out.AddMember(StringRef(kIndex),        index(), allocator);
+    out.AddMember(StringRef(kThreads),      threads(), allocator);
+    out.AddMember(StringRef(kBlocks),       blocks(), allocator);
+    out.AddMember(StringRef(kBFactor),      bfactor(), allocator);
+    out.AddMember(StringRef(kBSleep),       bsleep(), allocator);
+    out.AddMember(StringRef(kAffinity),     affinity(), allocator);
+
    return out;
 }
--- a/src/backend/cuda/CudaThread.h
+++ b/src/backend/cuda/CudaThread.h
@ -26,14 +26,13 @@
 #define XMRIG_CUDATHREAD_H


+using nvid_ctx = struct nvid_ctx;
+
+
 #include "crypto/common/Algorithm.h"
 #include "rapidjson/fwd.h"


-#include <bitset>
-#include <vector>
-
-
 namespace xmrig {


@ -41,10 +40,16 @@ class CudaThread
 {
 public:
    CudaThread() = delete;
-
    CudaThread(const rapidjson::Value &value);
+    CudaThread(uint32_t index, nvid_ctx *ctx);

-    inline bool isValid() const                              { return false; }
+    inline bool isValid() const                              { return m_blocks > 0 && m_threads > 0; }
+    inline int32_t bfactor() const                           { return static_cast<int32_t>(m_bfactor); }
+    inline int32_t blocks() const                            { return m_blocks; }
+    inline int32_t bsleep() const                            { return static_cast<int32_t>(m_bsleep); }
+    inline int32_t threads() const                           { return m_threads; }
+    inline int64_t affinity() const                          { return m_affinity; }
+    inline uint32_t index() const                            { return m_index; }

    inline bool operator!=(const CudaThread &other) const    { return !isEqual(other); }
    inline bool operator==(const CudaThread &other) const    { return isEqual(other); }
@ -53,6 +58,18 @@ public:
    rapidjson::Value toJSON(rapidjson::Document &doc) const;

 private:
+    int32_t m_blocks    = 0;
+    int32_t m_threads   = 0;
+    int64_t m_affinity  = -1;
+    uint32_t m_index    = 0;
+
+#   ifdef _WIN32
+    uint32_t m_bfactor      = 6;
+    uint32_t m_bsleep       = 25;
+#   else
+    uint32_t m_bfactor      = 0;
+    uint32_t m_bsleep       = 0;
+#   endif
 };


--- a/src/backend/cuda/CudaThreads.cpp
+++ b/src/backend/cuda/CudaThreads.cpp
@ -44,6 +44,14 @@ xmrig::CudaThreads::CudaThreads(const rapidjson::Value &value)
 }


+xmrig::CudaThreads::CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm)
+{
+    for (const auto &device : devices) {
+        device.generate(algorithm, *this);
+    }
+}
+
+
 bool xmrig::CudaThreads::isEqual(const CudaThreads &other) const
 {
    if (isEmpty() && other.isEmpty()) {
--- a/src/backend/cuda/CudaThreads.h
+++ b/src/backend/cuda/CudaThreads.h
@ -30,6 +30,7 @@


 #include "backend/cuda/CudaThread.h"
+#include "backend/cuda/wrappers/CudaDevice.h"


 namespace xmrig {
@ -40,6 +41,7 @@ class CudaThreads
 public:
    CudaThreads() = default;
    CudaThreads(const rapidjson::Value &value);
+    CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm);

    inline bool isEmpty() const                              { return m_data.empty(); }
    inline const std::vector<CudaThread> &data() const       { return m_data; }
--- a/src/backend/cuda/wrappers/CudaDevice.cpp
+++ b/src/backend/cuda/wrappers/CudaDevice.cpp
@ -34,12 +34,11 @@
 #include <algorithm>


-
-xmrig::CudaDevice::CudaDevice(uint32_t index) :
+xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
    m_index(index)
 {
-    auto ctx = CudaLib::alloc(index, 0, 0, 0, 0, Algorithm::INVALID);
-    if (CudaLib::deviceInfo(ctx) != 0) {
+    auto ctx = CudaLib::alloc(index, bfactor, bsleep);
+    if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) {
        CudaLib::release(ctx);

        return;
@ -105,6 +104,11 @@ uint32_t xmrig::CudaDevice::smx() const

 void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const
 {
+    if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) {
+        return;
+    }
+
+    threads.add(CudaThread(m_index, m_ctx));
 }


--- a/src/backend/cuda/wrappers/CudaDevice.h
+++ b/src/backend/cuda/wrappers/CudaDevice.h
@ -46,7 +46,7 @@ public:
    CudaDevice() = delete;
    CudaDevice(const CudaDevice &other) = delete;
    CudaDevice(CudaDevice &&other) noexcept;
-    CudaDevice(uint32_t index);
+    CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep);
    ~CudaDevice();

    size_t freeMemSize() const;
--- a/src/backend/cuda/wrappers/CudaLib.cpp
+++ b/src/backend/cuda/wrappers/CudaLib.cpp
@ -58,9 +58,9 @@ static const char *kSymbolNotFound                      = "symbol not found";
 static const char *kVersion                             = "version";


-using alloc_t                                           = nvid_ctx * (*)(size_t, int32_t, int32_t, int32_t, int32_t, int32_t);
+using alloc_t                                           = nvid_ctx * (*)(uint32_t, int32_t, int32_t);
 using deviceCount_t                                     = uint32_t (*)();
-using deviceInfo_t                                      = int32_t (*)(nvid_ctx *);
+using deviceInfo_t                                      = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t);
 using deviceInt_t                                       = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
 using deviceName_t                                      = const char * (*)(nvid_ctx *);
 using deviceUint_t                                      = uint32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
@ -129,9 +129,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept
 }


-int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx) noexcept
+int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept
 {
-    return pDeviceInfo(ctx);
+    return pDeviceInfo(ctx, blocks, threads, algorithm);
 }


@ -141,13 +141,13 @@ int32_t xmrig::CudaLib::deviceInt(nvid_ctx *ctx, DeviceProperty property) noexce
 }


-nvid_ctx *xmrig::CudaLib::alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept
+nvid_ctx *xmrig::CudaLib::alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept
 {
-    return pAlloc(id, blocks, threads, bfactor, bsleep, algorithm);
+    return pAlloc(id, bfactor, bsleep);
 }


-std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept
+std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep) noexcept
 {
    const uint32_t count = deviceCount();
    if (!count) {
@ -158,7 +158,7 @@ std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept
    out.reserve(count);

    for (uint32_t i = 0; i < count; ++i) {
-        CudaDevice device(i);
+        CudaDevice device(i, bfactor, bsleep);
        if (device.isValid()) {
            out.emplace_back(std::move(device));
        }
--- a/src/backend/cuda/wrappers/CudaLib.h
+++ b/src/backend/cuda/wrappers/CudaLib.h
@ -72,10 +72,10 @@ public:

    static const char *deviceName(nvid_ctx *ctx) noexcept;
    static const char *pluginVersion() noexcept;
-    static int deviceInfo(nvid_ctx *ctx) noexcept;
+    static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept;
    static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept;
-    static nvid_ctx *alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept;
-    static std::vector<CudaDevice> devices() noexcept;
+    static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept;
+    static std::vector<CudaDevice> devices(int32_t bfactor, int32_t bsleep) noexcept;
    static uint32_t deviceCount() noexcept;
    static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept;
    static uint32_t driverVersion() noexcept;
--- a/src/backend/opencl/OclBackend.cpp
+++ b/src/backend/opencl/OclBackend.cpp
@ -319,7 +319,7 @@ void xmrig::OclBackend::setJob(const Job &job)
        return stop();
    }

-    std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag);
+    std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices);
    if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
        return;
    }
--- a/src/backend/opencl/OclConfig.cpp
+++ b/src/backend/opencl/OclConfig.cpp
@ -24,6 +24,7 @@


 #include "backend/opencl/OclConfig.h"
+#include "backend/common/Tags.h"
 #include "backend/opencl/OclConfig_gen.h"
 #include "backend/opencl/wrappers/OclLib.h"
 #include "base/io/json/Json.h"
@ -113,7 +114,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
 }


-std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const
+std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const
 {
    std::vector<OclLaunchData> out;
    const OclThreads &threads = m_threads.get(algorithm);
@ -126,7 +127,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons

    for (const OclThread &thread : threads.data()) {
        if (thread.index() >= devices.size()) {
-            LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), tag, thread.index());
+            LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), ocl_tag(), thread.index());
            continue;
        }

--- a/src/backend/opencl/OclConfig.h
+++ b/src/backend/opencl/OclConfig.h
@ -42,7 +42,7 @@ public:

    OclPlatform platform() const;
    rapidjson::Value toJSON(rapidjson::Document &doc) const;
-    std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const;
+    std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const;
    void read(const rapidjson::Value &value);

    inline bool isCacheEnabled() const                  { return m_cache; }
--- a/src/backend/opencl/OclThreads.cpp
+++ b/src/backend/opencl/OclThreads.cpp
@ -46,7 +46,7 @@ xmrig::OclThreads::OclThreads(const rapidjson::Value &value)

 xmrig::OclThreads::OclThreads(const std::vector<OclDevice> &devices, const Algorithm &algorithm)
 {
-    for (const OclDevice &device : devices) {
+    for (const auto &device : devices) {
        device.generate(algorithm, *this);
    }
 }