diff --git a/src/backend/opencl/interfaces/IOclRunner.h b/src/backend/opencl/interfaces/IOclRunner.h index 005c1b086..c1718a2a7 100644 --- a/src/backend/opencl/interfaces/IOclRunner.h +++ b/src/backend/opencl/interfaces/IOclRunner.h @@ -63,6 +63,9 @@ public: virtual uint32_t deviceIndex() const = 0; virtual void build() = 0; virtual void init() = 0; + +protected: + virtual size_t bufferSize() const = 0; }; diff --git a/src/backend/opencl/runners/OclBaseRunner.cpp b/src/backend/opencl/runners/OclBaseRunner.cpp index 3e8225a91..43cffab21 100644 --- a/src/backend/opencl/runners/OclBaseRunner.cpp +++ b/src/backend/opencl/runners/OclBaseRunner.cpp @@ -27,10 +27,11 @@ #include "backend/opencl/OclCache.h" #include "backend/opencl/OclLaunchData.h" #include "backend/opencl/runners/OclBaseRunner.h" +#include "backend/opencl/wrappers/OclError.h" #include "backend/opencl/wrappers/OclLib.h" #include "base/io/log/Log.h" #include "base/net/stratum/Job.h" -#include "backend/opencl/wrappers/OclError.h" +#include "crypto/common/VirtualMemory.h" xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : @@ -38,6 +39,7 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : m_ctx(data.ctx), m_source(OclSource::get(data.algorithm)), m_data(data), + m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)), m_threadId(id) { m_deviceKey = data.device.name(); @@ -61,10 +63,17 @@ xmrig::OclBaseRunner::~OclBaseRunner() OclLib::release(m_program); OclLib::release(m_input); OclLib::release(m_output); + OclLib::release(m_buffer); OclLib::release(m_queue); } +size_t xmrig::OclBaseRunner::bufferSize() const +{ + return align(Job::kMaxBlobSize) + align(sizeof(cl_uint) * 0x100); +} + + uint32_t xmrig::OclBaseRunner::deviceIndex() const { return data().thread.index(); @@ -84,8 +93,33 @@ void xmrig::OclBaseRunner::build() void xmrig::OclBaseRunner::init() { m_queue = OclLib::createCommandQueue(m_ctx, data().device.id()); - m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize); - m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100); + + constexpr size_t oneGiB = 1024 * 1024 * 1024; + size_t size = bufferSize(); + + if (size < oneGiB && data().device.freeMemSize() >= oneGiB) { + size = oneGiB; + } + + m_buffer = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, size); + m_input = createSubBuffer(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, Job::kMaxBlobSize); + m_output = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100); +} + + +cl_mem xmrig::OclBaseRunner::createSubBuffer(cl_mem_flags flags, size_t size) +{ + auto mem = OclLib::createSubBuffer(m_buffer, flags, m_offset, size); + + m_offset += align(size); + + return mem; +} + + +size_t xmrig::OclBaseRunner::align(size_t size) const +{ + return VirtualMemory::align(size, m_align); } diff --git a/src/backend/opencl/runners/OclBaseRunner.h b/src/backend/opencl/runners/OclBaseRunner.h index 530bdce96..a6ab05877 100644 --- a/src/backend/opencl/runners/OclBaseRunner.h +++ b/src/backend/opencl/runners/OclBaseRunner.h @@ -57,11 +57,14 @@ protected: inline const OclLaunchData &data() const override { return m_data; } inline size_t threadId() const override { return m_threadId; } + size_t bufferSize() const override; uint32_t deviceIndex() const override; void build() override; void init() override; protected: + cl_mem createSubBuffer(cl_mem_flags flags, size_t size); + size_t align(size_t size) const; void enqueueReadBuffer(cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void *ptr); void enqueueWriteBuffer(cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void *ptr); void finalize(uint32_t *hashOutput); @@ -69,12 +72,15 @@ protected: Algorithm m_algorithm; cl_command_queue m_queue = nullptr; cl_context m_ctx; + cl_mem m_buffer = nullptr; cl_mem m_input = nullptr; cl_mem m_output = nullptr; cl_program m_program = nullptr; const char *m_source; const OclLaunchData &m_data; + const size_t m_align; const size_t m_threadId; + size_t m_offset = 0; std::string m_deviceKey; std::string m_options; }; diff --git a/src/backend/opencl/runners/OclCnRunner.cpp b/src/backend/opencl/runners/OclCnRunner.cpp index ab5aff15b..65e9611f3 100644 --- a/src/backend/opencl/runners/OclCnRunner.cpp +++ b/src/backend/opencl/runners/OclCnRunner.cpp @@ -81,6 +81,17 @@ xmrig::OclCnRunner::~OclCnRunner() } +size_t xmrig::OclCnRunner::bufferSize() const +{ + const size_t g_thd = data().thread.intensity(); + + return OclBaseRunner::bufferSize() + + align(m_algorithm.l3() * g_thd) + + align(200 * g_thd) + + (align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX); +} + + void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput) { static const cl_uint zero = 0; @@ -167,10 +178,10 @@ void xmrig::OclCnRunner::init() const size_t g_thd = data().thread.intensity(); - m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd); - m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd); + m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd); for (size_t i = 0; i < BRANCH_MAX; ++i) { - m_branches[i] = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2)); + m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2)); } } diff --git a/src/backend/opencl/runners/OclCnRunner.h b/src/backend/opencl/runners/OclCnRunner.h index e08496c09..bd20019fc 100644 --- a/src/backend/opencl/runners/OclCnRunner.h +++ b/src/backend/opencl/runners/OclCnRunner.h @@ -47,6 +47,7 @@ public: ~OclCnRunner() override; protected: + size_t bufferSize() const override; void run(uint32_t nonce, uint32_t *hashOutput) override; void set(const Job &job, uint8_t *blob) override; void build() override; diff --git a/src/backend/opencl/runners/OclRxBaseRunner.cpp b/src/backend/opencl/runners/OclRxBaseRunner.cpp index 8fe9a4793..640803dae 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.cpp +++ b/src/backend/opencl/runners/OclRxBaseRunner.cpp @@ -136,6 +136,18 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob) } +size_t xmrig::OclRxBaseRunner::bufferSize() const +{ + const size_t g_thd = data().thread.intensity(); + + return OclBaseRunner::bufferSize() + + align((m_algorithm.l3() + 64) * g_thd) + + align(64 * g_thd) + + align((128 + 2560) * g_thd) + + align(sizeof(uint32_t) * g_thd); +} + + void xmrig::OclRxBaseRunner::build() { OclBaseRunner::build(); @@ -168,8 +180,8 @@ void xmrig::OclRxBaseRunner::init() const size_t g_thd = data().thread.intensity(); - m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, (m_algorithm.l3() + 64) * g_thd); - m_hashes = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 64 * g_thd); - m_entropy = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, (128 + 2560) * g_thd); - m_rounding = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(uint32_t) * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd); + m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd); + m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd); + m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd); } diff --git a/src/backend/opencl/runners/OclRxBaseRunner.h b/src/backend/opencl/runners/OclRxBaseRunner.h index d8afdd702..48b0ff8e8 100644 --- a/src/backend/opencl/runners/OclRxBaseRunner.h +++ b/src/backend/opencl/runners/OclRxBaseRunner.h @@ -49,10 +49,11 @@ public: ~OclRxBaseRunner() override; protected: - void run(uint32_t nonce, uint32_t *hashOutput) override; - void set(const Job &job, uint8_t *blob) override; + size_t bufferSize() const override; void build() override; void init() override; + void run(uint32_t nonce, uint32_t *hashOutput) override; + void set(const Job &job, uint8_t *blob) override; protected: virtual void execute(uint32_t iteration) = 0; diff --git a/src/backend/opencl/runners/OclRxJitRunner.cpp b/src/backend/opencl/runners/OclRxJitRunner.cpp index 58cbb7c42..0bf39437c 100644 --- a/src/backend/opencl/runners/OclRxJitRunner.cpp +++ b/src/backend/opencl/runners/OclRxJitRunner.cpp @@ -52,6 +52,14 @@ xmrig::OclRxJitRunner::~OclRxJitRunner() } +size_t xmrig::OclRxJitRunner::bufferSize() const +{ + const size_t g_thd = data().thread.intensity(); + + return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd); +} + + void xmrig::OclRxJitRunner::build() { OclRxBaseRunner::build(); @@ -92,9 +100,9 @@ void xmrig::OclRxJitRunner::init() const size_t g_thd = data().thread.intensity(); - m_registers = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 256 * g_thd, nullptr); - m_intermediate_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr); - m_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr); + m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd); + m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd); + m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd); } diff --git a/src/backend/opencl/runners/OclRxJitRunner.h b/src/backend/opencl/runners/OclRxJitRunner.h index 0ae1d6a4f..db885bab4 100644 --- a/src/backend/opencl/runners/OclRxJitRunner.h +++ b/src/backend/opencl/runners/OclRxJitRunner.h @@ -45,6 +45,7 @@ public: ~OclRxJitRunner() override; protected: + size_t bufferSize() const override; void build() override; void execute(uint32_t iteration) override; void init() override; diff --git a/src/backend/opencl/runners/OclRxVmRunner.cpp b/src/backend/opencl/runners/OclRxVmRunner.cpp index 6ed1e397a..76009e89a 100644 --- a/src/backend/opencl/runners/OclRxVmRunner.cpp +++ b/src/backend/opencl/runners/OclRxVmRunner.cpp @@ -47,6 +47,12 @@ xmrig::OclRxVmRunner::~OclRxVmRunner() } +size_t xmrig::OclRxVmRunner::bufferSize() const +{ + return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity())); +} + + void xmrig::OclRxVmRunner::build() { OclRxBaseRunner::build(); @@ -94,5 +100,5 @@ void xmrig::OclRxVmRunner::init() { OclRxBaseRunner::init(); - m_vm_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 2560 * data().thread.intensity()); + m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity()); } diff --git a/src/backend/opencl/runners/OclRxVmRunner.h b/src/backend/opencl/runners/OclRxVmRunner.h index a1cbd1880..8d044e744 100644 --- a/src/backend/opencl/runners/OclRxVmRunner.h +++ b/src/backend/opencl/runners/OclRxVmRunner.h @@ -45,6 +45,7 @@ public: ~OclRxVmRunner() override; protected: + size_t bufferSize() const override; void build() override; void execute(uint32_t iteration) override; void init() override; diff --git a/src/backend/opencl/runners/OclRyoRunner.cpp b/src/backend/opencl/runners/OclRyoRunner.cpp index 1b3d08c76..47e9098d6 100644 --- a/src/backend/opencl/runners/OclRyoRunner.cpp +++ b/src/backend/opencl/runners/OclRyoRunner.cpp @@ -61,6 +61,14 @@ xmrig::OclRyoRunner::~OclRyoRunner() } +size_t xmrig::OclRyoRunner::bufferSize() const +{ + const size_t g_thd = data().thread.intensity(); + + return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd); +} + + void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput) { static const cl_uint zero = 0; @@ -123,6 +131,6 @@ void xmrig::OclRyoRunner::init() const size_t g_thd = data().thread.intensity(); - m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd); - m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd); + m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd); + m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd); } diff --git a/src/backend/opencl/runners/OclRyoRunner.h b/src/backend/opencl/runners/OclRyoRunner.h index e236a3d60..fd9f41c31 100644 --- a/src/backend/opencl/runners/OclRyoRunner.h +++ b/src/backend/opencl/runners/OclRyoRunner.h @@ -48,6 +48,7 @@ public: ~OclRyoRunner() override; protected: + size_t bufferSize() const override; void run(uint32_t nonce, uint32_t *hashOutput) override; void set(const Job &job, uint8_t *blob) override; void build() override; diff --git a/src/backend/opencl/wrappers/OclLib.cpp b/src/backend/opencl/wrappers/OclLib.cpp index a9e41a943..687c4c0ae 100644 --- a/src/backend/opencl/wrappers/OclLib.cpp +++ b/src/backend/opencl/wrappers/OclLib.cpp @@ -51,6 +51,7 @@ static const char *kCreateContext = "clCreateContext"; static const char *kCreateKernel = "clCreateKernel"; static const char *kCreateProgramWithBinary = "clCreateProgramWithBinary"; static const char *kCreateProgramWithSource = "clCreateProgramWithSource"; +static const char *kCreateSubBuffer = "clCreateSubBuffer"; static const char *kEnqueueNDRangeKernel = "clEnqueueNDRangeKernel"; static const char *kEnqueueReadBuffer = "clEnqueueReadBuffer"; static const char *kEnqueueWriteBuffer = "clEnqueueWriteBuffer"; @@ -72,6 +73,8 @@ static const char *kReleaseKernel = "clReleaseKernel"; static const char *kReleaseMemObject = "clReleaseMemObject"; static const char *kReleaseProgram = "clReleaseProgram"; static const char *kSetKernelArg = "clSetKernelArg"; +static const char *kSetMemObjectDestructorCallback = "clSetMemObjectDestructorCallback"; +static const char *kUnloadPlatformCompiler = "clUnloadPlatformCompiler"; #if defined(CL_VERSION_2_0) @@ -102,8 +105,11 @@ typedef cl_int (CL_API_CALL *releaseKernel_t)(cl_kernel); typedef cl_int (CL_API_CALL *releaseMemObject_t)(cl_mem); typedef cl_int (CL_API_CALL *releaseProgram_t)(cl_program); typedef cl_int (CL_API_CALL *setKernelArg_t)(cl_kernel, cl_uint, size_t, const void *); +typedef cl_int (CL_API_CALL *setMemObjectDestructorCallback_t)(cl_mem, void (CL_CALLBACK *)(cl_mem, void *), void *); +typedef cl_int (CL_API_CALL *unloadPlatformCompiler_t)(cl_platform_id); typedef cl_kernel (CL_API_CALL *createKernel_t)(cl_program, const char *, cl_int *); typedef cl_mem (CL_API_CALL *createBuffer_t)(cl_context, cl_mem_flags, size_t, void *, cl_int *); +typedef cl_mem (CL_API_CALL *createSubBuffer_t)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *); typedef cl_program (CL_API_CALL *createProgramWithBinary_t)(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *); typedef cl_program (CL_API_CALL *createProgramWithSource_t)(cl_context, cl_uint, const char **, const size_t *, cl_int *); @@ -118,6 +124,7 @@ static createContext_t pCreateContext = nu static createKernel_t pCreateKernel = nullptr; static createProgramWithBinary_t pCreateProgramWithBinary = nullptr; static createProgramWithSource_t pCreateProgramWithSource = nullptr; +static createSubBuffer_t pCreateSubBuffer = nullptr; static enqueueNDRangeKernel_t pEnqueueNDRangeKernel = nullptr; static enqueueReadBuffer_t pEnqueueReadBuffer = nullptr; static enqueueWriteBuffer_t pEnqueueWriteBuffer = nullptr; @@ -139,6 +146,8 @@ static releaseKernel_t pReleaseKernel = nu static releaseMemObject_t pReleaseMemObject = nullptr; static releaseProgram_t pReleaseProgram = nullptr; static setKernelArg_t pSetKernelArg = nullptr; +static setMemObjectDestructorCallback_t pSetMemObjectDestructorCallback = nullptr; +static unloadPlatformCompiler_t pUnloadPlatformCompiler = nullptr; #define DLSYM(x) if (uv_dlsym(&oclLib, k##x, reinterpret_cast(&p##x)) == -1) { return false; } @@ -222,6 +231,9 @@ bool xmrig::OclLib::load() DLSYM(GetMemObjectInfo); DLSYM(GetContextInfo); DLSYM(ReleaseDevice); + DLSYM(UnloadPlatformCompiler); + DLSYM(SetMemObjectDestructorCallback); + DLSYM(CreateSubBuffer); # if defined(CL_VERSION_2_0) uv_dlsym(&oclLib, kCreateCommandQueueWithProperties, reinterpret_cast(&pCreateCommandQueueWithProperties)); @@ -563,6 +575,12 @@ cl_int xmrig::OclLib::setKernelArg(cl_kernel kernel, cl_uint arg_index, size_t a } +cl_int xmrig::OclLib::unloadPlatformCompiler(cl_platform_id platform) noexcept +{ + return pUnloadPlatformCompiler(platform); +} + + cl_kernel xmrig::OclLib::createKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) noexcept { assert(pCreateKernel != nullptr); @@ -619,6 +637,34 @@ cl_mem xmrig::OclLib::createBuffer(cl_context context, cl_mem_flags flags, size_ } +cl_mem xmrig::OclLib::createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size, cl_int *errcode_ret) noexcept +{ + const cl_buffer_region region = { offset, size }; + + auto result = pCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, errcode_ret); + if (*errcode_ret != CL_SUCCESS) { + LOG_ERR("%s" RED(" error ") RED_BOLD("%s") RED(" when calling ") RED_BOLD("%s") RED(" with offset ") RED_BOLD("%zu") RED(" and size ") RED_BOLD("%zu"), + ocl_tag(), OclError::toString(*errcode_ret), kCreateSubBuffer, offset, size); + + return nullptr; + } + + return result; +} + + +cl_mem xmrig::OclLib::createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size) +{ + cl_int ret; + cl_mem mem = createSubBuffer(buffer, flags, offset, size, &ret); + if (ret != CL_SUCCESS) { + throw std::runtime_error(OclError::toString(ret)); + } + + return mem; +} + + cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) noexcept { assert(pCreateProgramWithBinary != nullptr); diff --git a/src/backend/opencl/wrappers/OclLib.h b/src/backend/opencl/wrappers/OclLib.h index c181e553c..893e2a25b 100644 --- a/src/backend/opencl/wrappers/OclLib.h +++ b/src/backend/opencl/wrappers/OclLib.h @@ -72,10 +72,13 @@ public: static cl_int release(cl_mem mem_obj) noexcept; static cl_int release(cl_program program) noexcept; static cl_int setKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) noexcept; + static cl_int unloadPlatformCompiler(cl_platform_id platform) noexcept; static cl_kernel createKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) noexcept; static cl_kernel createKernel(cl_program program, const char *kernel_name); static cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr = nullptr); static cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) noexcept; + static cl_mem createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size, cl_int *errcode_ret) noexcept; + static cl_mem createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size); static cl_program createProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) noexcept; static cl_program createProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) noexcept; static cl_uint getNumPlatforms() noexcept;