diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index 9b226811f..5b0780069 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -35,6 +35,7 @@ #include "backend/opencl/OclConfig.h" #include "backend/opencl/OclLaunchData.h" #include "backend/opencl/OclWorker.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "backend/opencl/wrappers/OclContext.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/io/log/Log.h" @@ -164,7 +165,7 @@ public: } - inline void start() + inline void start(const Job &job) { LOG_INFO("%s use profile " BLUE_BG(WHITE_BOLD_S " %s ") WHITE_BOLD_S " (" CYAN_BOLD("%zu") WHITE_BOLD(" threads)") " scratchpad " CYAN_BOLD("%zu KB"), tag, @@ -194,6 +195,8 @@ public: i++; } + OclSharedState::start(threads, job); + status.start(threads.size()); workers.start(threads); } @@ -329,7 +332,7 @@ void xmrig::OclBackend::setJob(const Job &job) return stop(); } - if (!d_ptr->context.init(d_ptr->devices, threads, job)) { + if (!d_ptr->context.init(d_ptr->devices, threads)) { LOG_WARN("%s " RED_BOLD("disabled") YELLOW(" (OpenCL context unavailable)"), tag); return stop(); @@ -338,7 +341,7 @@ void xmrig::OclBackend::setJob(const Job &job) stop(); d_ptr->threads = std::move(threads); - d_ptr->start(); + d_ptr->start(job); } @@ -371,6 +374,8 @@ void xmrig::OclBackend::stop() d_ptr->workers.stop(); d_ptr->threads.clear(); + OclSharedState::release(); + LOG_INFO("%s" YELLOW(" stopped") BLACK_BOLD(" (%" PRIu64 " ms)"), tag, Chrono::steadyMSecs() - ts); } diff --git a/src/backend/opencl/OclConfig.cpp b/src/backend/opencl/OclConfig.cpp index 7424dba7a..bdefcaae3 100644 --- a/src/backend/opencl/OclConfig.cpp +++ b/src/backend/opencl/OclConfig.cpp @@ -193,32 +193,13 @@ std::vector xmrig::OclConfig::get(const Miner *miner, cons continue; } -# ifdef XMRIG_ALGO_RANDOMX - auto dataset = algorithm.family() == Algorithm::RANDOM_X ? std::make_shared() : nullptr; -# endif - if (thread.threads().size() > 1) { - auto interleave = std::make_shared(thread.threads().size()); - for (int64_t affinity : thread.threads()) { - OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity); - data.interleave = interleave; - -# ifdef XMRIG_ALGO_RANDOMX - data.dataset = dataset; -# endif - - out.emplace_back(std::move(data)); + out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity); } } else { - OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads().front()); - -# ifdef XMRIG_ALGO_RANDOMX - data.dataset = dataset; -# endif - - out.emplace_back(std::move(data)); + out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads().front()); } } diff --git a/src/backend/opencl/OclLaunchData.h b/src/backend/opencl/OclLaunchData.h index ab4ca09b9..644023be8 100644 --- a/src/backend/opencl/OclLaunchData.h +++ b/src/backend/opencl/OclLaunchData.h @@ -27,19 +27,14 @@ #define XMRIG_OCLLAUNCHDATA_H -#include "backend/opencl/OclInterleave.h" #include "backend/opencl/OclThread.h" +#include "backend/opencl/runners/tools/OclSharedData.h" #include "backend/opencl/wrappers/OclDevice.h" #include "backend/opencl/wrappers/OclPlatform.h" #include "crypto/common/Algorithm.h" #include "crypto/common/Nonce.h" -#ifdef XMRIG_ALGO_RANDOMX -# include "backend/opencl/runners/tools/OclRxDataset.h" -#endif - - using cl_context = struct _cl_context *; @@ -72,11 +67,6 @@ public: const OclDevice device; const OclPlatform platform; const OclThread thread; - OclInterleavePtr interleave; - -# ifdef XMRIG_ALGO_RANDOMX - OclRxDatasetPtr dataset; -# endif }; diff --git a/src/backend/opencl/OclWorker.cpp b/src/backend/opencl/OclWorker.cpp index b4132e3dd..f79bc59b6 100644 --- a/src/backend/opencl/OclWorker.cpp +++ b/src/backend/opencl/OclWorker.cpp @@ -28,6 +28,8 @@ #include "backend/common/Tags.h" #include "backend/opencl/runners/OclCnRunner.h" +#include "backend/opencl/runners/tools/OclSharedData.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "base/io/log/Log.h" #include "base/tools/Chrono.h" #include "core/Miner.h" @@ -75,7 +77,7 @@ xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) : m_algorithm(data.algorithm), m_miner(data.miner), m_intensity(data.thread.intensity()), - m_interleave(data.interleave) + m_sharedData(OclSharedState::get(data.device.index())) { switch (m_algorithm.family()) { case Algorithm::RANDOM_X: @@ -149,9 +151,7 @@ void xmrig::OclWorker::start() while (Nonce::sequence(Nonce::OPENCL) > 0) { if (!isReady()) { - if (m_interleave) { - m_interleave->setResumeCounter(0); - } + m_sharedData.setResumeCounter(0); do { std::this_thread::sleep_for(std::chrono::milliseconds(200)); @@ -162,9 +162,7 @@ void xmrig::OclWorker::start() break; } - if (m_interleave) { - m_interleave->resumeDelay(m_id); - } + m_sharedData.resumeDelay(m_id); if (!consumeJob()) { return; @@ -172,9 +170,7 @@ void xmrig::OclWorker::start() } while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) { - if (m_interleave) { - m_interleave->adjustDelay(m_id); - } + m_sharedData.adjustDelay(m_id); const uint64_t t = Chrono::steadyMSecs(); @@ -233,9 +229,7 @@ void xmrig::OclWorker::storeStats(uint64_t t) m_count += m_intensity; - if (m_interleave) { - m_interleave->setRunTime(Chrono::steadyMSecs() - t); - } + m_sharedData.setRunTime(Chrono::steadyMSecs() - t); Worker::storeStats(); } diff --git a/src/backend/opencl/OclWorker.h b/src/backend/opencl/OclWorker.h index 93de8afd7..76cb63dbc 100644 --- a/src/backend/opencl/OclWorker.h +++ b/src/backend/opencl/OclWorker.h @@ -67,7 +67,7 @@ private: const Miner *m_miner; const uint32_t m_intensity; IOclRunner *m_runner = nullptr; - OclInterleavePtr m_interleave; + OclSharedData &m_sharedData; WorkerJob<1> m_job; }; diff --git a/src/backend/opencl/opencl.cmake b/src/backend/opencl/opencl.cmake index c5a6335fb..376757170 100644 --- a/src/backend/opencl/opencl.cmake +++ b/src/backend/opencl/opencl.cmake @@ -14,7 +14,6 @@ if (WITH_OPENCL) src/backend/opencl/OclCache.h src/backend/opencl/OclConfig.h src/backend/opencl/OclGenerator.h - src/backend/opencl/OclInterleave.h src/backend/opencl/OclLaunchData.h src/backend/opencl/OclThread.h src/backend/opencl/OclThreads.h @@ -22,6 +21,8 @@ if (WITH_OPENCL) src/backend/opencl/runners/OclBaseRunner.h src/backend/opencl/runners/OclCnRunner.h src/backend/opencl/runners/tools/OclCnR.h + src/backend/opencl/runners/tools/OclSharedData.h + src/backend/opencl/runners/tools/OclSharedState.h src/backend/opencl/wrappers/OclContext.h src/backend/opencl/wrappers/OclDevice.h src/backend/opencl/wrappers/OclError.h @@ -42,7 +43,6 @@ if (WITH_OPENCL) src/backend/opencl/OclBackend.cpp src/backend/opencl/OclCache.cpp src/backend/opencl/OclConfig.cpp - src/backend/opencl/OclInterleave.cpp src/backend/opencl/OclLaunchData.cpp src/backend/opencl/OclThread.cpp src/backend/opencl/OclThreads.cpp @@ -50,6 +50,8 @@ if (WITH_OPENCL) src/backend/opencl/runners/OclBaseRunner.cpp src/backend/opencl/runners/OclCnRunner.cpp src/backend/opencl/runners/tools/OclCnR.cpp + src/backend/opencl/runners/tools/OclSharedData.cpp + src/backend/opencl/runners/tools/OclSharedState.cpp src/backend/opencl/wrappers/OclContext.cpp src/backend/opencl/wrappers/OclDevice.cpp src/backend/opencl/wrappers/OclError.cpp @@ -78,7 +80,6 @@ if (WITH_OPENCL) src/backend/opencl/runners/OclRxBaseRunner.h src/backend/opencl/runners/OclRxJitRunner.h src/backend/opencl/runners/OclRxVmRunner.h - src/backend/opencl/runners/tools/OclRxDataset.h ) list(APPEND SOURCES_BACKEND_OPENCL @@ -95,7 +96,6 @@ if (WITH_OPENCL) src/backend/opencl/runners/OclRxBaseRunner.cpp src/backend/opencl/runners/OclRxJitRunner.cpp src/backend/opencl/runners/OclRxVmRunner.cpp - src/backend/opencl/runners/tools/OclRxDataset.cpp ) endif() diff --git a/src/backend/opencl/runners/OclBaseRunner.cpp b/src/backend/opencl/runners/OclBaseRunner.cpp index 71b35edc4..d8497e94f 100644 --- a/src/backend/opencl/runners/OclBaseRunner.cpp +++ b/src/backend/opencl/runners/OclBaseRunner.cpp @@ -23,10 +23,11 @@ */ +#include "backend/opencl/runners/OclBaseRunner.h" #include "backend/opencl/cl/OclSource.h" #include "backend/opencl/OclCache.h" #include "backend/opencl/OclLaunchData.h" -#include "backend/opencl/runners/OclBaseRunner.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "backend/opencl/wrappers/OclError.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/io/log/Log.h" @@ -34,6 +35,9 @@ #include "crypto/common/VirtualMemory.h" +constexpr size_t oneGiB = 1024 * 1024 * 1024; + + xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : m_algorithm(data.algorithm), m_ctx(data.ctx), @@ -93,16 +97,17 @@ void xmrig::OclBaseRunner::build() void xmrig::OclBaseRunner::init() { - m_queue = OclLib::createCommandQueue(m_ctx, data().device.id()); + m_queue = OclLib::createCommandQueue(m_ctx, data().device.id()); - constexpr size_t oneGiB = 1024 * 1024 * 1024; - size_t size = bufferSize(); + size_t size = align(bufferSize()); if (size < oneGiB && data().device.vendorId() == OCL_VENDOR_AMD && data().device.freeMemSize() >= oneGiB) { - size = oneGiB; + m_buffer = OclSharedState::get(data().device.index()).createBuffer(m_ctx, size, m_offset); + } + else { + m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size); } - m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size); m_input = createSubBuffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, Job::kMaxBlobSize); m_output = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100); } diff --git a/src/backend/opencl/runners/OclRxBaseRunner.cpp b/src/backend/opencl/runners/OclRxBaseRunner.cpp index 767b10834..fa0259b49 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.cpp +++ b/src/backend/opencl/runners/OclRxBaseRunner.cpp @@ -30,6 +30,7 @@ #include "backend/opencl/kernels/rx/FindSharesKernel.h" #include "backend/opencl/kernels/rx/HashAesKernel.h" #include "backend/opencl/OclLaunchData.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/net/stratum/Job.h" #include "crypto/rx/Rx.h" @@ -75,6 +76,7 @@ xmrig::OclRxBaseRunner::~OclRxBaseRunner() OclLib::release(m_hashes); OclLib::release(m_rounding); OclLib::release(m_scratchpads); + OclLib::release(m_dataset); } @@ -120,7 +122,7 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob) m_seed = job.seed(); auto dataset = Rx::dataset(job, 0); - enqueueWriteBuffer(data().dataset->get(), CL_TRUE, 0, dataset->size(), dataset->raw()); + enqueueWriteBuffer(m_dataset, CL_TRUE, 0, dataset->size(), dataset->raw()); } if (job.size() < Job::kMaxBlobSize) { @@ -177,4 +179,5 @@ void xmrig::OclRxBaseRunner::init() m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * m_intensity); m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * m_intensity); m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * m_intensity); + m_dataset = OclSharedState::get(data().device.index()).dataset(); } diff --git a/src/backend/opencl/runners/OclRxBaseRunner.h b/src/backend/opencl/runners/OclRxBaseRunner.h index 48b0ff8e8..c7770e6c7 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.h +++ b/src/backend/opencl/runners/OclRxBaseRunner.h @@ -62,6 +62,7 @@ protected: Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr; Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr; Buffer m_seed; + cl_mem m_dataset = nullptr; cl_mem m_entropy = nullptr; cl_mem m_hashes = nullptr; cl_mem m_rounding = nullptr; diff --git a/src/backend/opencl/runners/OclRxJitRunner.cpp b/src/backend/opencl/runners/OclRxJitRunner.cpp index 29020fe17..ca3163336 100644 --- a/src/backend/opencl/runners/OclRxJitRunner.cpp +++ b/src/backend/opencl/runners/OclRxJitRunner.cpp @@ -74,7 +74,7 @@ void xmrig::OclRxJitRunner::build() } m_randomx_run = new RxRunKernel(m_asmProgram); - m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm); + m_randomx_run->setArgs(m_dataset, m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm); } diff --git a/src/backend/opencl/runners/OclRxVmRunner.cpp b/src/backend/opencl/runners/OclRxVmRunner.cpp index 193b68f02..3a30d5610 100644 --- a/src/backend/opencl/runners/OclRxVmRunner.cpp +++ b/src/backend/opencl/runners/OclRxVmRunner.cpp @@ -70,7 +70,7 @@ void xmrig::OclRxVmRunner::build() m_init_vm->setArgs(m_entropy, m_vm_states, m_rounding); m_execute_vm = new ExecuteVmKernel(m_program); - m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), m_intensity); + m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, m_dataset, m_intensity); } diff --git a/src/backend/opencl/OclInterleave.cpp b/src/backend/opencl/runners/tools/OclSharedData.cpp similarity index 65% rename from src/backend/opencl/OclInterleave.cpp rename to src/backend/opencl/runners/tools/OclSharedData.cpp index d7226f416..355ab7755 100644 --- a/src/backend/opencl/OclInterleave.cpp +++ b/src/backend/opencl/runners/tools/OclSharedData.cpp @@ -23,17 +23,44 @@ */ -#include "backend/opencl/OclInterleave.h" +#include "backend/opencl/runners/tools/OclSharedData.h" +#include "backend/opencl/wrappers/OclLib.h" #include "base/io/log/Log.h" #include "base/tools/Chrono.h" +#include "crypto/rx/Rx.h" +#include "crypto/rx/RxDataset.h" +#include #include +#include #include -uint64_t xmrig::OclInterleave::adjustDelay(size_t id) +constexpr size_t oneGiB = 1024 * 1024 * 1024; + + +cl_mem xmrig::OclSharedData::createBuffer(cl_context context, size_t size, size_t &offset) { + std::lock_guard lock(m_mutex); + + offset += size * m_offset++; + size = std::max(size * m_threads, oneGiB); + + if (!m_buffer) { + m_buffer = OclLib::createBuffer(context, CL_MEM_READ_WRITE, size); + } + + return OclLib::retain(m_buffer); +} + + +uint64_t xmrig::OclSharedData::adjustDelay(size_t id) +{ + if (m_threads < 2) { + return 0; + } + const uint64_t t0 = Chrono::steadyMSecs(); uint64_t delay = 0; @@ -69,8 +96,12 @@ uint64_t xmrig::OclInterleave::adjustDelay(size_t id) } -uint64_t xmrig::OclInterleave::resumeDelay(size_t id) +uint64_t xmrig::OclSharedData::resumeDelay(size_t id) { + if (m_threads < 2) { + return 0; + } + uint64_t delay = 0; { @@ -99,14 +130,28 @@ uint64_t xmrig::OclInterleave::resumeDelay(size_t id) } -void xmrig::OclInterleave::setResumeCounter(uint32_t value) +void xmrig::OclSharedData::release() { + OclLib::release(m_buffer); + +# ifdef XMRIG_ALGO_RANDOMX + OclLib::release(m_dataset); +# endif +} + + +void xmrig::OclSharedData::setResumeCounter(uint32_t value) +{ + if (m_threads < 2) { + return; + } + std::lock_guard lock(m_mutex); m_resumeCounter = value; } -void xmrig::OclInterleave::setRunTime(uint64_t time) +void xmrig::OclSharedData::setRunTime(uint64_t time) { // averagingBias = 1.0 - only the last delta time is taken into account // averagingBias = 0.5 - the last delta time has the same weight as all the previous ones combined @@ -116,3 +161,34 @@ void xmrig::OclInterleave::setRunTime(uint64_t time) std::lock_guard lock(m_mutex); m_averageRunTime = m_averageRunTime * (1.0 - averagingBias) + time * averagingBias; } + + +#ifdef XMRIG_ALGO_RANDOMX +cl_mem xmrig::OclSharedData::dataset() const +{ + if (!m_dataset) { + throw std::runtime_error("RandomX dataset is not available"); + } + + return OclLib::retain(m_dataset); +} + + +void xmrig::OclSharedData::createDataset(cl_context ctx, const Job &job, bool host) +{ + if (m_dataset) { + return; + } + + cl_int ret; + + if (host) { + auto dataset = Rx::dataset(job, 0); + + m_dataset = OclLib::createBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, RxDataset::maxSize(), dataset->raw(), &ret); + } + else { + m_dataset = OclLib::createBuffer(ctx, CL_MEM_READ_ONLY, RxDataset::maxSize(), nullptr, &ret); + } +} +#endif diff --git a/src/backend/opencl/OclInterleave.h b/src/backend/opencl/runners/tools/OclSharedData.h similarity index 67% rename from src/backend/opencl/OclInterleave.h rename to src/backend/opencl/runners/tools/OclSharedData.h index 663272955..caf7b4d66 100644 --- a/src/backend/opencl/OclInterleave.h +++ b/src/backend/opencl/runners/tools/OclSharedData.h @@ -22,42 +22,62 @@ * along with this program. If not, see . */ -#ifndef XMRIG_OCLINTERLEAVE_H -#define XMRIG_OCLINTERLEAVE_H +#ifndef XMRIG_OCLSHAREDDATA_H +#define XMRIG_OCLSHAREDDATA_H #include #include +using cl_context = struct _cl_context *; +using cl_mem = struct _cl_mem *; + + namespace xmrig { -class OclInterleave +class Job; + + +class OclSharedData { public: - OclInterleave() = delete; - inline OclInterleave(size_t threads) : m_threads(threads) {} + OclSharedData() = default; + cl_mem createBuffer(cl_context context, size_t size, size_t &offset); uint64_t adjustDelay(size_t id); uint64_t resumeDelay(size_t id); + void release(); void setResumeCounter(uint32_t value); void setRunTime(uint64_t time); + inline size_t threads() const { return m_threads; } + + inline OclSharedData &operator++() { ++m_threads; return *this; } + +# ifdef XMRIG_ALGO_RANDOMX + cl_mem dataset() const; + void createDataset(cl_context ctx, const Job &job, bool host); +# endif + private: - const size_t m_threads; + cl_mem m_buffer = nullptr; double m_averageRunTime = 0.0; double m_threshold = 0.95; + size_t m_offset = 0; + size_t m_threads = 0; std::mutex m_mutex; uint32_t m_resumeCounter = 0; uint64_t m_timestamp = 0; + +# ifdef XMRIG_ALGO_RANDOMX + cl_mem m_dataset = nullptr; +# endif }; -using OclInterleavePtr = std::shared_ptr; - - } /* namespace xmrig */ -#endif /* XMRIG_OCLINTERLEAVE_H */ +#endif /* XMRIG_OCLSHAREDDATA_H */ diff --git a/src/backend/opencl/runners/tools/OclRxDataset.cpp b/src/backend/opencl/runners/tools/OclSharedState.cpp similarity index 59% rename from src/backend/opencl/runners/tools/OclRxDataset.cpp rename to src/backend/opencl/runners/tools/OclSharedState.cpp index b1d83bd0b..0b16a301f 100644 --- a/src/backend/opencl/runners/tools/OclRxDataset.cpp +++ b/src/backend/opencl/runners/tools/OclSharedState.cpp @@ -23,32 +23,52 @@ */ -#include "backend/opencl/runners/tools/OclRxDataset.h" -#include "backend/opencl/wrappers/OclLib.h" -#include "crypto/rx/Rx.h" -#include "crypto/rx/RxDataset.h" +#include "backend/opencl/runners/tools/OclSharedState.h" +#include "backend/opencl/runners/tools/OclSharedData.h" -void xmrig::OclRxDataset::createBuffer(cl_context ctx, const Job &job, bool host) +#include +#include + + +namespace xmrig { + + +static std::map map; + + +} // namespace xmrig + + +xmrig::OclSharedData &xmrig::OclSharedState::get(uint32_t index) { - if (m_dataset) { - return; - } - - cl_int ret; - - if (host) { - auto dataset = Rx::dataset(job, 0); - - m_dataset = OclLib::createBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, RxDataset::maxSize(), dataset->raw(), &ret); - } - else { - m_dataset = OclLib::createBuffer(ctx, CL_MEM_READ_ONLY, RxDataset::maxSize(), nullptr, &ret); - } + return map[index]; } -xmrig::OclRxDataset::~OclRxDataset() +void xmrig::OclSharedState::release() { - OclLib::release(m_dataset); + for (auto &kv : map) { + kv.second.release(); + } + + map.clear(); +} + + +void xmrig::OclSharedState::start(const std::vector &threads, const Job &job) +{ + assert(map.empty()); + + for (const auto &data : threads) { + auto &sharedData = map[data.device.index()]; + + ++sharedData; + +# ifdef XMRIG_ALGO_RANDOMX + if (data.algorithm.family() == Algorithm::RANDOM_X) { + sharedData.createDataset(data.ctx, job, data.thread.isDatasetHost()); + } +# endif + } } diff --git a/src/backend/opencl/runners/tools/OclRxDataset.h b/src/backend/opencl/runners/tools/OclSharedState.h similarity index 69% rename from src/backend/opencl/runners/tools/OclRxDataset.h rename to src/backend/opencl/runners/tools/OclSharedState.h index 314a2f5d6..610056cd8 100644 --- a/src/backend/opencl/runners/tools/OclRxDataset.h +++ b/src/backend/opencl/runners/tools/OclSharedState.h @@ -22,47 +22,26 @@ * along with this program. If not, see . */ -#ifndef XMRIG_OCLRXDATASET_H -#define XMRIG_OCLRXDATASET_H +#ifndef XMRIG_OCLSHAREDSTATE_H +#define XMRIG_OCLSHAREDSTATE_H -#include "base/tools/Object.h" - - -#include - - -using cl_context = struct _cl_context *; -using cl_mem = struct _cl_mem *; +#include "backend/opencl/OclLaunchData.h" namespace xmrig { -class Job; - - -class OclRxDataset +class OclSharedState { public: - XMRIG_DISABLE_COPY_MOVE(OclRxDataset) - - OclRxDataset() = default; - ~OclRxDataset(); - - inline cl_mem get() const { return m_dataset; } - - void createBuffer(cl_context ctx,const Job &job, bool host); - -private: - cl_mem m_dataset = nullptr; + static OclSharedData &get(uint32_t index); + static void release(); + static void start(const std::vector &threads, const Job &job); }; -using OclRxDatasetPtr = std::shared_ptr; - - } /* namespace xmrig */ -#endif /* XMRIG_OCLINTERLEAVE_H */ +#endif /* XMRIG_OCLSHAREDSTATE_H */ diff --git a/src/backend/opencl/wrappers/OclContext.cpp b/src/backend/opencl/wrappers/OclContext.cpp index a25593350..eed4272c8 100644 --- a/src/backend/opencl/wrappers/OclContext.cpp +++ b/src/backend/opencl/wrappers/OclContext.cpp @@ -24,6 +24,7 @@ #include "backend/opencl/wrappers/OclContext.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "backend/opencl/wrappers/OclLib.h" @@ -42,7 +43,7 @@ xmrig::OclContext::~OclContext() } -bool xmrig::OclContext::init(const std::vector &devices, std::vector &threads, const Job &job) +bool xmrig::OclContext::init(const std::vector &devices, std::vector &threads) { if (!m_ctx) { std::vector ids(devices.size()); @@ -59,12 +60,6 @@ bool xmrig::OclContext::init(const std::vector &devices, std::vector< for (OclLaunchData &data : threads) { data.ctx = m_ctx; - -# ifdef XMRIG_ALGO_RANDOMX - if (data.algorithm.family() == Algorithm::RANDOM_X) { - data.dataset->createBuffer(m_ctx, job, data.thread.isDatasetHost()); - } -# endif } return true; diff --git a/src/backend/opencl/wrappers/OclContext.h b/src/backend/opencl/wrappers/OclContext.h index 8f2cc432c..fb45358e9 100644 --- a/src/backend/opencl/wrappers/OclContext.h +++ b/src/backend/opencl/wrappers/OclContext.h @@ -49,7 +49,7 @@ public: OclContext(const OclDevice &device); ~OclContext(); - bool init(const std::vector &devices, std::vector &threads, const Job &job); + bool init(const std::vector &devices, std::vector &threads); inline bool isValid() const { return m_ctx != nullptr; } inline cl_context ctx() const { return m_ctx; } diff --git a/src/backend/opencl/wrappers/OclLib.cpp b/src/backend/opencl/wrappers/OclLib.cpp index 826b88c5d..1b530bae9 100644 --- a/src/backend/opencl/wrappers/OclLib.cpp +++ b/src/backend/opencl/wrappers/OclLib.cpp @@ -73,6 +73,7 @@ static const char *kReleaseDevice = "clReleaseDevice"; static const char *kReleaseKernel = "clReleaseKernel"; static const char *kReleaseMemObject = "clReleaseMemObject"; static const char *kReleaseProgram = "clReleaseProgram"; +static const char *kRetainMemObject = "clRetainMemObject"; static const char *kRetainProgram = "clRetainProgram"; static const char *kSetKernelArg = "clSetKernelArg"; static const char *kSetMemObjectDestructorCallback = "clSetMemObjectDestructorCallback"; @@ -106,6 +107,7 @@ typedef cl_int (CL_API_CALL *releaseDevice_t)(cl_device_id device); typedef cl_int (CL_API_CALL *releaseKernel_t)(cl_kernel); typedef cl_int (CL_API_CALL *releaseMemObject_t)(cl_mem); typedef cl_int (CL_API_CALL *releaseProgram_t)(cl_program); +typedef cl_int (CL_API_CALL *retainMemObject_t)(cl_mem); typedef cl_int (CL_API_CALL *retainProgram_t)(cl_program); typedef cl_int (CL_API_CALL *setKernelArg_t)(cl_kernel, cl_uint, size_t, const void *); typedef cl_int (CL_API_CALL *setMemObjectDestructorCallback_t)(cl_mem, void (CL_CALLBACK *)(cl_mem, void *), void *); @@ -148,6 +150,7 @@ static releaseDevice_t pReleaseDevice = nu static releaseKernel_t pReleaseKernel = nullptr; static releaseMemObject_t pReleaseMemObject = nullptr; static releaseProgram_t pReleaseProgram = nullptr; +static retainMemObject_t pRetainMemObject = nullptr; static retainProgram_t pRetainProgram = nullptr; static setKernelArg_t pSetKernelArg = nullptr; static setMemObjectDestructorCallback_t pSetMemObjectDestructorCallback = nullptr; @@ -239,6 +242,7 @@ bool xmrig::OclLib::load() DLSYM(SetMemObjectDestructorCallback); DLSYM(CreateSubBuffer); DLSYM(RetainProgram); + DLSYM(RetainMemObject); # if defined(CL_VERSION_2_0) uv_dlsym(&oclLib, kCreateCommandQueueWithProperties, reinterpret_cast(&pCreateCommandQueueWithProperties)); @@ -670,6 +674,18 @@ cl_mem xmrig::OclLib::createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t } +cl_mem xmrig::OclLib::retain(cl_mem memobj) noexcept +{ + assert(pRetainMemObject != nullptr); + + if (memobj != nullptr) { + pRetainMemObject(memobj); + } + + return memobj; +} + + cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) noexcept { assert(pCreateProgramWithBinary != nullptr); diff --git a/src/backend/opencl/wrappers/OclLib.h b/src/backend/opencl/wrappers/OclLib.h index e9e4b8f3d..0c0eb0d4d 100644 --- a/src/backend/opencl/wrappers/OclLib.h +++ b/src/backend/opencl/wrappers/OclLib.h @@ -79,6 +79,7 @@ public: static cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) noexcept; static cl_mem createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size, cl_int *errcode_ret) noexcept; static cl_mem createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size); + static cl_mem retain(cl_mem memobj) noexcept; static cl_program createProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) noexcept; static cl_program createProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) noexcept; static cl_program retain(cl_program program) noexcept;