diff --git a/src/backend/cuda/CudaWorker.cpp b/src/backend/cuda/CudaWorker.cpp index b280e2942..c9cb602c0 100644 --- a/src/backend/cuda/CudaWorker.cpp +++ b/src/backend/cuda/CudaWorker.cpp @@ -78,9 +78,15 @@ xmrig::CudaWorker::CudaWorker(size_t id, const CudaLaunchData &data) : break; } - if (!m_runner || !m_runner->init()) { + if (!m_runner) { return; } + + if (!m_runner->init()) { + delete m_runner; + + m_runner = nullptr; + } } diff --git a/src/backend/cuda/runners/CudaBaseRunner.cpp b/src/backend/cuda/runners/CudaBaseRunner.cpp index 757f91de9..60a7a7c9e 100644 --- a/src/backend/cuda/runners/CudaBaseRunner.cpp +++ b/src/backend/cuda/runners/CudaBaseRunner.cpp @@ -47,7 +47,7 @@ xmrig::CudaBaseRunner::~CudaBaseRunner() bool xmrig::CudaBaseRunner::init() { m_ctx = CudaLib::alloc(m_data.thread.index(), m_data.thread.bfactor(), m_data.thread.bsleep()); - if (CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm, m_data.thread.datasetHost()) != 0) { + if (!callWrapper(CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm, m_data.thread.datasetHost()))) { return false; } diff --git a/src/backend/cuda/runners/CudaRxRunner.cpp b/src/backend/cuda/runners/CudaRxRunner.cpp index 20603e760..dd64e8654 100644 --- a/src/backend/cuda/runners/CudaRxRunner.cpp +++ b/src/backend/cuda/runners/CudaRxRunner.cpp @@ -31,8 +31,9 @@ #include "crypto/rx/RxDataset.h" -xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : CudaBaseRunner(index, data), - m_datasetHost(data.thread.datasetHost() > 0) +xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : + CudaBaseRunner(index, data), + m_datasetHost(data.thread.datasetHost() > 0) { m_intensity = m_data.thread.threads() * m_data.thread.blocks(); const size_t scratchpads_size = m_intensity * m_data.algorithm.l3(); diff --git a/src/backend/cuda/wrappers/CudaDevice.cpp b/src/backend/cuda/wrappers/CudaDevice.cpp index efacc800e..93234d536 100644 --- a/src/backend/cuda/wrappers/CudaDevice.cpp +++ b/src/backend/cuda/wrappers/CudaDevice.cpp @@ -41,7 +41,7 @@ xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) : m_index(index) { auto ctx = CudaLib::alloc(index, bfactor, bsleep); - if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) { + if (!CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID)) { CudaLib::release(ctx); return; @@ -107,7 +107,7 @@ uint32_t xmrig::CudaDevice::smx() const void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const { - if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) { + if (!CudaLib::deviceInfo(m_ctx, -1, -1, algorithm)) { return; } diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index c148d9f89..e6eb27578 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -50,6 +50,7 @@ static const char *kAlloc = "alloc"; static const char *kCnHash = "cnHash"; static const char *kDeviceCount = "deviceCount"; static const char *kDeviceInfo = "deviceInfo"; +static const char *kDeviceInfo_v2 = "deviceInfo_v2"; static const char *kDeviceInit = "deviceInit"; static const char *kDeviceInt = "deviceInt"; static const char *kDeviceName = "deviceName"; @@ -62,6 +63,7 @@ static const char *kRelease = "release"; static const char *kRxHash = "rxHash"; static const char *kRxPrepare = "rxPrepare"; static const char *kSetJob = "setJob"; +static const char *kSetJob_v2 = "setJob_v2"; static const char *kSymbolNotFound = "symbol not found"; static const char *kVersion = "version"; @@ -70,6 +72,7 @@ using alloc_t = nvid_ctx * (*)(uint32_ using cnHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint64_t, uint32_t *, uint32_t *); using deviceCount_t = uint32_t (*)(); using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t, int32_t); +using deviceInfo_v2_t = bool (*)(nvid_ctx *, int32_t, int32_t, const char *, int32_t); using deviceInit_t = bool (*)(nvid_ctx *); using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); using deviceName_t = const char * (*)(nvid_ctx *); @@ -82,6 +85,7 @@ using release_t = void (*)(nvid_ctx *); using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t); using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, int32_t); +using setJob_v2_t = bool (*)(nvid_ctx *, const void *, size_t, const char *); using version_t = uint32_t (*)(Version); @@ -89,6 +93,7 @@ static alloc_t pAlloc = nullptr; static cnHash_t pCnHash = nullptr; static deviceCount_t pDeviceCount = nullptr; static deviceInfo_t pDeviceInfo = nullptr; +static deviceInfo_v2_t pDeviceInfo_v2 = nullptr; static deviceInit_t pDeviceInit = nullptr; static deviceInt_t pDeviceInt = nullptr; static deviceName_t pDeviceName = nullptr; @@ -101,6 +106,7 @@ static release_t pRelease = nullptr; static rxHash_t pRxHash = nullptr; static rxPrepare_t pRxPrepare = nullptr; static setJob_t pSetJob = nullptr; +static setJob_v2_t pSetJob_v2 = nullptr; static version_t pVersion = nullptr; @@ -145,6 +151,18 @@ bool xmrig::CudaLib::cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, } +bool xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host) noexcept +{ + const Algorithm algo = RxAlgo::id(algorithm); + + if (pDeviceInfo_v2) { + return pDeviceInfo_v2(ctx, blocks, threads, algo.isValid() ? algo.shortName() : nullptr, dataset_host); + } + + return pDeviceInfo(ctx, blocks, threads, algo, dataset_host) == 0; +} + + bool xmrig::CudaLib::deviceInit(nvid_ctx *ctx) noexcept { return pDeviceInit(ctx); @@ -165,7 +183,12 @@ bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datase bool xmrig::CudaLib::setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept { - return pSetJob(ctx, data, size, RxAlgo::id(algorithm)); + const Algorithm algo = RxAlgo::id(algorithm); + if (pSetJob_v2) { + return pSetJob_v2(ctx, data, size, algo.shortName()); + } + + return pSetJob(ctx, data, size, algo); } @@ -187,12 +210,6 @@ const char *xmrig::CudaLib::pluginVersion() noexcept } -int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host) noexcept -{ - return pDeviceInfo(ctx, blocks, threads, RxAlgo::id(algorithm), dataset_host); -} - - int32_t xmrig::CudaLib::deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept { return pDeviceInt(ctx, property); @@ -292,11 +309,13 @@ bool xmrig::CudaLib::load() return false; } + uv_dlsym(&cudaLib, kDeviceInfo_v2, reinterpret_cast(&pDeviceInfo_v2)); + uv_dlsym(&cudaLib, kSetJob_v2, reinterpret_cast(&pSetJob_v2)); + try { DLSYM(Alloc); DLSYM(CnHash); DLSYM(DeviceCount); - DLSYM(DeviceInfo); DLSYM(DeviceInit); DLSYM(DeviceInt); DLSYM(DeviceName); @@ -308,8 +327,15 @@ bool xmrig::CudaLib::load() DLSYM(Release); DLSYM(RxHash); DLSYM(RxPrepare); - DLSYM(SetJob); DLSYM(Version); + + if (!pDeviceInfo_v2) { + DLSYM(DeviceInfo); + } + + if (!pSetJob_v2) { + DLSYM(SetJob); + } } catch (std::exception &ex) { return false; } diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index 6202e0491..99824f809 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -74,6 +74,7 @@ public: static inline const String &loader() { return m_loader; } static bool cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce); + static bool deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host = -1) noexcept; static bool deviceInit(nvid_ctx *ctx) noexcept; static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept; @@ -81,7 +82,6 @@ public: static const char *deviceName(nvid_ctx *ctx) noexcept; static const char *lastError(nvid_ctx *ctx) noexcept; static const char *pluginVersion() noexcept; - static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host = -1) noexcept; static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept; static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept; static std::string version(uint32_t version);