Merge pull request #1263 from SChernykh/evo

RandomX: added support for dataset on host
2025-04-22 06:28:09 +00:00 · 2019-11-06 17:53:06 +07:00 · 2019-11-06 17:53:06 +07:00 · f8d1488e33
commit f8d1488e33
parent 4c4a674a4b 0013e610d5
8 changed files with 33 additions and 16 deletions
--- a/src/backend/cuda/CudaThread.cpp
+++ b/src/backend/cuda/CudaThread.cpp
@ -40,6 +40,7 @@ static const char *kBlocks      = "blocks";
 static const char *kBSleep      = "bsleep";
 static const char *kIndex       = "index";
 static const char *kThreads     = "threads";
+static const char *kDatasetHost = "dataset_host";

 } // namespace xmrig

@ -56,6 +57,13 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value)
    m_bfactor   = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u);
    m_bsleep    = Json::getUint(value, kBSleep, m_bsleep);
    m_affinity  = Json::getUint64(value, kAffinity, m_affinity);
+
+    if (Json::getValue(value, kDatasetHost).IsInt()) {
+        m_dataset_host = Json::getInt(value, kDatasetHost) != 0;
+    }
+    else {
+        m_dataset_host = Json::getBool(value, kDatasetHost);
+    }
 }


@ -64,7 +72,8 @@ xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) :
    m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)),
    m_index(index),
    m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)),
-    m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep))
+    m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep)),
+    m_dataset_host(CudaLib::deviceInt(ctx, CudaLib::DeviceDatasetHost) != 0)
 {

 }
@ -77,7 +86,8 @@ bool xmrig::CudaThread::isEqual(const CudaThread &other) const
           m_affinity   == other.m_affinity &&
           m_index      == other.m_index &&
           m_bfactor    == other.m_bfactor &&
-           m_bsleep     == other.m_bsleep;
+           m_bsleep     == other.m_bsleep &&
+           m_dataset_host == other.m_dataset_host;
 }


@ -94,6 +104,7 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const
    out.AddMember(StringRef(kBFactor),      bfactor(), allocator);
    out.AddMember(StringRef(kBSleep),       bsleep(), allocator);
    out.AddMember(StringRef(kAffinity),     affinity(), allocator);
+    out.AddMember(StringRef(kDatasetHost),  dataset_host(), allocator);

    return out;
 }
--- a/src/backend/cuda/CudaThread.h
+++ b/src/backend/cuda/CudaThread.h
@ -50,6 +50,7 @@ public:
    inline int32_t threads() const                           { return m_threads; }
    inline int64_t affinity() const                          { return m_affinity; }
    inline uint32_t index() const                            { return m_index; }
+    inline uint32_t dataset_host() const                     { return m_dataset_host; }

    inline bool operator!=(const CudaThread &other) const    { return !isEqual(other); }
    inline bool operator==(const CudaThread &other) const    { return isEqual(other); }
@ -62,6 +63,7 @@ private:
    int32_t m_threads   = 0;
    int64_t m_affinity  = -1;
    uint32_t m_index    = 0;
+    bool m_dataset_host = false;

 #   ifdef _WIN32
    uint32_t m_bfactor      = 6;
--- a/src/backend/cuda/runners/CudaBaseRunner.cpp
+++ b/src/backend/cuda/runners/CudaBaseRunner.cpp
@ -47,7 +47,7 @@ xmrig::CudaBaseRunner::~CudaBaseRunner()
 bool xmrig::CudaBaseRunner::init()
 {
    m_ctx = CudaLib::alloc(m_data.thread.index(), m_data.thread.bfactor(), m_data.thread.bsleep());
-    if (CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm) != 0) {
+    if (CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm, m_data.thread.dataset_host() ? 1 : 0) != 0) {
        return false;
    }

--- a/src/backend/cuda/runners/CudaRxRunner.cpp
+++ b/src/backend/cuda/runners/CudaRxRunner.cpp
@ -42,6 +42,8 @@ xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : Cu
    }

    m_intensity -= m_intensity % 32;
+
+    m_dataset_host = m_data.thread.dataset_host();
 }


@ -59,7 +61,7 @@ bool xmrig::CudaRxRunner::set(const Job &job, uint8_t *blob)
    }

    auto dataset = Rx::dataset(job, 0);
-    m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_intensity));
+    m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_dataset_host, m_intensity));

    return m_ready;
 }
--- a/src/backend/cuda/runners/CudaRxRunner.h
+++ b/src/backend/cuda/runners/CudaRxRunner.h
@ -46,6 +46,7 @@ protected:
 private:
    bool m_ready        = false;
    size_t m_intensity  = 0;
+    bool m_dataset_host = false;
 };


--- a/src/backend/cuda/wrappers/CudaDevice.cpp
+++ b/src/backend/cuda/wrappers/CudaDevice.cpp
@ -41,7 +41,7 @@ xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
    m_index(index)
 {
    auto ctx = CudaLib::alloc(index, bfactor, bsleep);
-    if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) {
+    if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID, 0) != 0) {
        CudaLib::release(ctx);

        return;
@ -107,7 +107,7 @@ uint32_t xmrig::CudaDevice::smx() const

 void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const
 {
-    if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) {
+    if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm, 0) != 0) {
        return;
    }

--- a/src/backend/cuda/wrappers/CudaLib.cpp
+++ b/src/backend/cuda/wrappers/CudaLib.cpp
@ -67,7 +67,7 @@ static const char *kVersion                             = "version";
 using alloc_t                                           = nvid_ctx * (*)(uint32_t, int32_t, int32_t);
 using cnHash_t                                          = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint64_t, uint32_t *, uint32_t *);
 using deviceCount_t                                     = uint32_t (*)();
-using deviceInfo_t                                      = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t);
+using deviceInfo_t                                      = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t, int32_t);
 using deviceInit_t                                      = bool (*)(nvid_ctx *);
 using deviceInt_t                                       = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
 using deviceName_t                                      = const char * (*)(nvid_ctx *);
@ -78,7 +78,7 @@ using lastError_t                                       = const char * (*)(nvid_
 using pluginVersion_t                                   = const char * (*)();
 using release_t                                         = void (*)(nvid_ctx *);
 using rxHash_t                                          = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *);
-using rxPrepare_t                                       = bool (*)(nvid_ctx *, const void *, size_t, uint32_t);
+using rxPrepare_t                                       = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t);
 using setJob_t                                          = bool (*)(nvid_ctx *, const void *, size_t, int32_t);
 using version_t                                         = uint32_t (*)(Version);

@ -155,9 +155,9 @@ bool xmrig::CudaLib::rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target,
 }


-bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept
+bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept
 {
-    return pRxPrepare(ctx, dataset, datasetSize, batchSize);
+    return pRxPrepare(ctx, dataset, datasetSize, dataset_host, batchSize);
 }


@ -185,9 +185,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept
 }


-int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept
+int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host) noexcept
 {
-    return pDeviceInfo(ctx, blocks, threads, algorithm);
+    return pDeviceInfo(ctx, blocks, threads, algorithm, dataset_host);
 }


@ -272,7 +272,7 @@ bool xmrig::CudaLib::load()
        return false;
    }

-    if (pVersion(ApiVersion) != 1u) {
+    if (pVersion(ApiVersion) != 2u) {
        return false;
    }

--- a/src/backend/cuda/wrappers/CudaLib.h
+++ b/src/backend/cuda/wrappers/CudaLib.h
@ -61,7 +61,8 @@ public:
        DeviceMemoryFree,
        DevicePciBusID,
        DevicePciDeviceID,
-        DevicePciDomainID
+        DevicePciDomainID,
+        DeviceDatasetHost,
    };

    static bool init(const char *fileName = nullptr);
@ -74,12 +75,12 @@ public:
    static bool cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce);
    static bool deviceInit(nvid_ctx *ctx) noexcept;
    static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept;
-    static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept;
+    static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept;
    static bool setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept;
    static const char *deviceName(nvid_ctx *ctx) noexcept;
    static const char *lastError(nvid_ctx *ctx) noexcept;
    static const char *pluginVersion() noexcept;
-    static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept;
+    static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host) noexcept;
    static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept;
    static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept;
    static std::string version(uint32_t version);