diff --git a/src/backend/opencl/runners/OclBaseRunner.cpp b/src/backend/opencl/runners/OclBaseRunner.cpp index 71b35edc4..d8497e94f 100644 --- a/src/backend/opencl/runners/OclBaseRunner.cpp +++ b/src/backend/opencl/runners/OclBaseRunner.cpp @@ -23,10 +23,11 @@ */ +#include "backend/opencl/runners/OclBaseRunner.h" #include "backend/opencl/cl/OclSource.h" #include "backend/opencl/OclCache.h" #include "backend/opencl/OclLaunchData.h" -#include "backend/opencl/runners/OclBaseRunner.h" +#include "backend/opencl/runners/tools/OclSharedState.h" #include "backend/opencl/wrappers/OclError.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/io/log/Log.h" @@ -34,6 +35,9 @@ #include "crypto/common/VirtualMemory.h" +constexpr size_t oneGiB = 1024 * 1024 * 1024; + + xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : m_algorithm(data.algorithm), m_ctx(data.ctx), @@ -93,16 +97,17 @@ void xmrig::OclBaseRunner::build() void xmrig::OclBaseRunner::init() { - m_queue = OclLib::createCommandQueue(m_ctx, data().device.id()); + m_queue = OclLib::createCommandQueue(m_ctx, data().device.id()); - constexpr size_t oneGiB = 1024 * 1024 * 1024; - size_t size = bufferSize(); + size_t size = align(bufferSize()); if (size < oneGiB && data().device.vendorId() == OCL_VENDOR_AMD && data().device.freeMemSize() >= oneGiB) { - size = oneGiB; + m_buffer = OclSharedState::get(data().device.index()).createBuffer(m_ctx, size, m_offset); + } + else { + m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size); } - m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size); m_input = createSubBuffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, Job::kMaxBlobSize); m_output = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100); } diff --git a/src/backend/opencl/runners/tools/OclSharedData.cpp b/src/backend/opencl/runners/tools/OclSharedData.cpp index 51571c027..355ab7755 100644 --- a/src/backend/opencl/runners/tools/OclSharedData.cpp +++ b/src/backend/opencl/runners/tools/OclSharedData.cpp @@ -31,11 +31,30 @@ #include "crypto/rx/RxDataset.h" +#include #include #include #include +constexpr size_t oneGiB = 1024 * 1024 * 1024; + + +cl_mem xmrig::OclSharedData::createBuffer(cl_context context, size_t size, size_t &offset) +{ + std::lock_guard lock(m_mutex); + + offset += size * m_offset++; + size = std::max(size * m_threads, oneGiB); + + if (!m_buffer) { + m_buffer = OclLib::createBuffer(context, CL_MEM_READ_WRITE, size); + } + + return OclLib::retain(m_buffer); +} + + uint64_t xmrig::OclSharedData::adjustDelay(size_t id) { if (m_threads < 2) { @@ -113,6 +132,8 @@ uint64_t xmrig::OclSharedData::resumeDelay(size_t id) void xmrig::OclSharedData::release() { + OclLib::release(m_buffer); + # ifdef XMRIG_ALGO_RANDOMX OclLib::release(m_dataset); # endif diff --git a/src/backend/opencl/runners/tools/OclSharedData.h b/src/backend/opencl/runners/tools/OclSharedData.h index 75ce04830..caf7b4d66 100644 --- a/src/backend/opencl/runners/tools/OclSharedData.h +++ b/src/backend/opencl/runners/tools/OclSharedData.h @@ -45,13 +45,16 @@ class OclSharedData public: OclSharedData() = default; + cl_mem createBuffer(cl_context context, size_t size, size_t &offset); uint64_t adjustDelay(size_t id); uint64_t resumeDelay(size_t id); void release(); void setResumeCounter(uint32_t value); void setRunTime(uint64_t time); - inline OclSharedData &operator++() { ++m_threads; return *this; } + inline size_t threads() const { return m_threads; } + + inline OclSharedData &operator++() { ++m_threads; return *this; } # ifdef XMRIG_ALGO_RANDOMX cl_mem dataset() const; @@ -59,8 +62,10 @@ public: # endif private: + cl_mem m_buffer = nullptr; double m_averageRunTime = 0.0; double m_threshold = 0.95; + size_t m_offset = 0; size_t m_threads = 0; std::mutex m_mutex; uint32_t m_resumeCounter = 0;