From 29790da63dbc4328b52092755eceac4bd3bdc6a0 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 8 Sep 2019 16:28:51 +0700 Subject: [PATCH] Added autoconfig for cn/gpu. --- src/backend/opencl/OclBackend.cpp | 4 +- src/backend/opencl/OclConfig.cpp | 4 +- src/backend/opencl/OclThread.cpp | 15 ++-- src/backend/opencl/OclThread.h | 44 +++++++--- .../generators/ocl_generic_cn_generator.cpp | 6 +- .../ocl_generic_cn_gpu_generator.cpp | 84 +++++++++++++++++++ .../generators/ocl_vega_cn_generator.cpp | 4 +- src/backend/opencl/opencl.cmake | 5 +- src/backend/opencl/wrappers/OclDevice.cpp | 19 ++++- src/backend/opencl/wrappers/OclDevice.h | 5 +- 10 files changed, 157 insertions(+), 33 deletions(-) create mode 100644 src/backend/opencl/generators/ocl_generic_cn_gpu_generator.cpp diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index 0ec6cc8bf..2d066e7c3 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -136,8 +136,8 @@ public: device.printableName().data(), device.clock(), device.computeUnits(), - device.freeMem() / oneMiB, - device.globalMem() / oneMiB); + device.freeMemSize() / oneMiB, + device.globalMemSize() / oneMiB); } } diff --git a/src/backend/opencl/OclConfig.cpp b/src/backend/opencl/OclConfig.cpp index 4bd2ab848..bca6e30f6 100644 --- a/src/backend/opencl/OclConfig.cpp +++ b/src/backend/opencl/OclConfig.cpp @@ -44,7 +44,7 @@ static const char *kPlatform = "platform"; #ifdef XMRIG_ALGO_CN_GPU -//static const char *kCnGPU = "cn/gpu"; +static const char *kCnGPU = "cn/gpu"; #endif #ifdef XMRIG_ALGO_CN_LITE @@ -225,7 +225,7 @@ void xmrig::OclConfig::generate() m_threads.move(kCn2, xmrig::generate(Algorithm::CN_2, devices)); # ifdef XMRIG_ALGO_CN_GPU -// m_threads.move(kCnGPU, xmrig::generate(Algorithm::CN_GPU, devices)); + m_threads.move(kCnGPU, xmrig::generate(Algorithm::CN_GPU, devices)); # endif # ifdef XMRIG_ALGO_CN_LITE diff --git a/src/backend/opencl/OclThread.cpp b/src/backend/opencl/OclThread.cpp index 940a907a9..517e98d4d 100644 --- a/src/backend/opencl/OclThread.cpp +++ b/src/backend/opencl/OclThread.cpp @@ -69,6 +69,9 @@ xmrig::OclThread::OclThread(const rapidjson::Value &value) m_stridedIndex = std::min(si[0].GetUint(), 2u); m_memChunk = std::min(si[1].GetUint(), 18u); } + else { + m_fields.set(STRIDED_INDEX_FIELD, false); + } const rapidjson::Value &threads = Json::getArray(value, kThreads); if (threads.IsArray()) { @@ -112,10 +115,13 @@ rapidjson::Value xmrig::OclThread::toJSON(rapidjson::Document &doc) const out.AddMember(StringRef(kIntensity), intensity(), allocator); out.AddMember(StringRef(kWorksize), worksize(), allocator); - Value si(kArrayType); - si.Reserve(2, allocator); - si.PushBack(stridedIndex(), allocator); - si.PushBack(memChunk(), allocator); + if (m_fields.test(STRIDED_INDEX_FIELD)) { + Value si(kArrayType); + si.Reserve(2, allocator); + si.PushBack(stridedIndex(), allocator); + si.PushBack(memChunk(), allocator); + out.AddMember(StringRef(kStridedIndex), si, allocator); + } Value threads(kArrayType); threads.Reserve(m_threads.size(), allocator); @@ -124,7 +130,6 @@ rapidjson::Value xmrig::OclThread::toJSON(rapidjson::Document &doc) const threads.PushBack(thread, allocator); } - out.AddMember(StringRef(kStridedIndex), si, allocator); out.AddMember(StringRef(kThreads), threads, allocator); out.AddMember(StringRef(kUnroll), unrollFactor(), allocator); diff --git a/src/backend/opencl/OclThread.h b/src/backend/opencl/OclThread.h index 1de7c35d3..824216da1 100644 --- a/src/backend/opencl/OclThread.h +++ b/src/backend/opencl/OclThread.h @@ -30,6 +30,7 @@ #include "rapidjson/fwd.h" +#include #include @@ -40,8 +41,7 @@ class OclThread { public: OclThread() = delete; - OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads, uint32_t unrollFactor, const Algorithm &algorithm) : - m_algorithm(algorithm), + OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads, uint32_t unrollFactor) : m_threads(threads, -1), m_index(index), m_memChunk(memChunk), @@ -52,6 +52,20 @@ public: setIntensity(intensity); } +# ifdef XMRIG_ALGO_CN_GPU + OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t threads, uint32_t unrollFactor) : + m_fields(0), + m_threads(threads, -1), + m_index(index), + m_memChunk(0), + m_stridedIndex(0), + m_unrollFactor(unrollFactor), + m_worksize(worksize) + { + setIntensity(intensity); + } +# endif + OclThread(const rapidjson::Value &value); inline bool isValid() const { return m_intensity > 0; } @@ -73,19 +87,25 @@ public: rapidjson::Value toJSON(rapidjson::Document &doc) const; private: + enum Fields { + STRIDED_INDEX_FIELD, + RANDOMX_FIELDS, + FIELD_MAX + }; + inline void setIntensity(uint32_t intensity) { m_intensity = intensity / m_worksize * m_worksize; } - Algorithm m_algorithm; - int m_datasetHost = -1; + int m_datasetHost = -1; + std::bitset m_fields = 1; std::vector m_threads; - uint32_t m_bfactor = 6; - uint32_t m_gcnAsm = 1; - uint32_t m_index = 0; - uint32_t m_intensity = 0; - uint32_t m_memChunk = 2; - uint32_t m_stridedIndex = 2; - uint32_t m_unrollFactor = 8; - uint32_t m_worksize = 0; + uint32_t m_bfactor = 6; + uint32_t m_gcnAsm = 1; + uint32_t m_index = 0; + uint32_t m_intensity = 0; + uint32_t m_memChunk = 2; + uint32_t m_stridedIndex = 2; + uint32_t m_unrollFactor = 8; + uint32_t m_worksize = 0; }; diff --git a/src/backend/opencl/generators/ocl_generic_cn_generator.cpp b/src/backend/opencl/generators/ocl_generic_cn_generator.cpp index a6d598517..7bdaf027c 100644 --- a/src/backend/opencl/generators/ocl_generic_cn_generator.cpp +++ b/src/backend/opencl/generators/ocl_generic_cn_generator.cpp @@ -50,7 +50,7 @@ static inline uint32_t getPossibleIntensity(const OclDevice &device, const Algor { const uint32_t maxThreads = getMaxThreads(device, algorithm); const size_t minFreeMem = (maxThreads == 40000u ? 512u : 128u) * oneMiB; - const size_t availableMem = device.freeMem() - minFreeMem; + const size_t availableMem = device.freeMemSize() - minFreeMem; const size_t perThread = algorithm.l3() + 224u; const auto maxIntensity = static_cast(availableMem / perThread); @@ -104,9 +104,9 @@ bool ocl_generic_cn_generator(const OclDevice &device, const Algorithm &algorith return false; } - const uint32_t threadCount = ((device.globalMem() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1; + const uint32_t threadCount = ((device.globalMemSize() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1; - threads.add(OclThread(device.index(), intensity, 8, getStridedIndex(device, algorithm), 2, threadCount, 8, algorithm)); + threads.add(OclThread(device.index(), intensity, 8, getStridedIndex(device, algorithm), 2, threadCount, 8)); return true; } diff --git a/src/backend/opencl/generators/ocl_generic_cn_gpu_generator.cpp b/src/backend/opencl/generators/ocl_generic_cn_gpu_generator.cpp new file mode 100644 index 000000000..cbb91a25a --- /dev/null +++ b/src/backend/opencl/generators/ocl_generic_cn_gpu_generator.cpp @@ -0,0 +1,84 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "backend/opencl/OclThreads.h" +#include "backend/opencl/wrappers/OclDevice.h" +#include "crypto/common/Algorithm.h" + + +namespace xmrig { + + +constexpr const size_t oneMiB = 1024u * 1024u; + + + +bool ocl_generic_cn_gpu_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads) +{ + if (algorithm != Algorithm::CN_GPU) { + return false; + } + + uint32_t worksize = 8; + uint32_t numThreads = 1u; + size_t minFreeMem = 128u * oneMiB; + + if (device.type() == OclDevice::Vega_10 || device.type() == OclDevice::Vega_20) { + minFreeMem = oneMiB; + worksize = 16; + } + else if (device.type() == OclDevice::Navi_10) { + numThreads = 2u; + } + else if (device.name() == "Fiji") { + worksize = 16; + } + + size_t maxThreads = device.computeUnits() * 6 * 8; + + const size_t maxAvailableFreeMem = device.freeMemSize() - minFreeMem; + const size_t memPerThread = std::min(device.maxMemAllocSize(), maxAvailableFreeMem); + + size_t memPerHash = algorithm.l3() + 240u; + size_t maxIntensity = memPerThread / memPerHash; + size_t possibleIntensity = std::min(maxThreads, maxIntensity); + size_t intensity = 0; + size_t cuUtilization = ((possibleIntensity * 100) / (worksize * device.computeUnits())) % 100; + + if (cuUtilization >= 75) { + intensity = (possibleIntensity / worksize) * worksize; + } + else { + intensity = (possibleIntensity / (worksize * device.computeUnits())) * device.computeUnits() * worksize; + } + + threads.add(OclThread(device.index(), intensity, worksize, numThreads, 1)); + + return true; +} + + +} // namespace xmrig diff --git a/src/backend/opencl/generators/ocl_vega_cn_generator.cpp b/src/backend/opencl/generators/ocl_vega_cn_generator.cpp index f9e51b18a..6c6f73047 100644 --- a/src/backend/opencl/generators/ocl_vega_cn_generator.cpp +++ b/src/backend/opencl/generators/ocl_vega_cn_generator.cpp @@ -61,7 +61,7 @@ static inline uint32_t getMaxThreads(const OclDevice &device, const Algorithm &a static inline uint32_t getPossibleIntensity(const OclDevice &device, const Algorithm &algorithm) { const uint32_t maxThreads = getMaxThreads(device, algorithm); - const size_t availableMem = device.freeMem() - (128u * oneMiB); + const size_t availableMem = device.freeMemSize() - (128u * oneMiB); const size_t perThread = algorithm.l3() + 224u; const auto maxIntensity = static_cast(availableMem / perThread); @@ -123,7 +123,7 @@ bool ocl_vega_cn_generator(const OclDevice &device, const Algorithm &algorithm, const uint32_t worksize = getWorksize(algorithm); const uint32_t memChunk = getMemChunk(algorithm); - threads.add(OclThread(device.index(), intensity, worksize, getStridedIndex(algorithm), memChunk, 2, 8, algorithm)); + threads.add(OclThread(device.index(), intensity, worksize, getStridedIndex(algorithm), memChunk, 2, 8)); return true; } diff --git a/src/backend/opencl/opencl.cmake b/src/backend/opencl/opencl.cmake index b40f5d33b..3a8c9596b 100644 --- a/src/backend/opencl/opencl.cmake +++ b/src/backend/opencl/opencl.cmake @@ -5,8 +5,6 @@ if (WITH_OPENCL) set(HEADERS_BACKEND_OPENCL src/backend/opencl/cl/OclSource.h - src/backend/opencl/generators/ocl_generic_cn_generator.cpp - src/backend/opencl/generators/ocl_vega_cn_generator.cpp src/backend/opencl/interfaces/IOclRunner.h src/backend/opencl/kernels/Cn0Kernel.h src/backend/opencl/kernels/Cn1Kernel.h @@ -35,6 +33,8 @@ if (WITH_OPENCL) set(SOURCES_BACKEND_OPENCL src/backend/opencl/cl/OclSource.cpp + src/backend/opencl/generators/ocl_generic_cn_generator.cpp + src/backend/opencl/generators/ocl_vega_cn_generator.cpp src/backend/opencl/kernels/Cn0Kernel.cpp src/backend/opencl/kernels/Cn1Kernel.cpp src/backend/opencl/kernels/Cn2Kernel.cpp @@ -78,6 +78,7 @@ if (WITH_OPENCL) ) list(APPEND SOURCES_BACKEND_OPENCL + src/backend/opencl/generators/ocl_generic_cn_gpu_generator.cpp src/backend/opencl/kernels/Cn00RyoKernel.cpp src/backend/opencl/kernels/Cn1RyoKernel.cpp src/backend/opencl/kernels/Cn2RyoKernel.cpp diff --git a/src/backend/opencl/wrappers/OclDevice.cpp b/src/backend/opencl/wrappers/OclDevice.cpp index e95c9d43a..b005d4902 100644 --- a/src/backend/opencl/wrappers/OclDevice.cpp +++ b/src/backend/opencl/wrappers/OclDevice.cpp @@ -47,11 +47,18 @@ typedef union namespace xmrig { +#ifdef XMRIG_ALGO_CN_GPU +extern bool ocl_generic_cn_gpu_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads); +#endif + extern bool ocl_vega_cn_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads); extern bool ocl_generic_cn_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads); ocl_gen_config_fun generators[] = { +# ifdef XMRIG_ALGO_CN_GPU + ocl_generic_cn_gpu_generator, +# endif ocl_vega_cn_generator, ocl_generic_cn_generator }; @@ -149,18 +156,24 @@ xmrig::OclDevice::OclDevice(uint32_t index, cl_device_id id, cl_platform_id plat } -size_t xmrig::OclDevice::freeMem() const +size_t xmrig::OclDevice::freeMemSize() const { - return std::min(OclLib::getDeviceUlong(id(), CL_DEVICE_MAX_MEM_ALLOC_SIZE), globalMem()); + return std::min(maxMemAllocSize(), globalMemSize()); } -size_t xmrig::OclDevice::globalMem() const +size_t xmrig::OclDevice::globalMemSize() const { return OclLib::getDeviceUlong(id(), CL_DEVICE_GLOBAL_MEM_SIZE); } +size_t xmrig::OclDevice::maxMemAllocSize() const +{ + return OclLib::getDeviceUlong(id(), CL_DEVICE_MAX_MEM_ALLOC_SIZE); +} + + xmrig::String xmrig::OclDevice::printableName() const { const size_t size = m_board.size() + m_name.size() + 64; diff --git a/src/backend/opencl/wrappers/OclDevice.h b/src/backend/opencl/wrappers/OclDevice.h index 0790199d5..517fa2bdb 100644 --- a/src/backend/opencl/wrappers/OclDevice.h +++ b/src/backend/opencl/wrappers/OclDevice.h @@ -62,8 +62,9 @@ public: OclDevice() = delete; OclDevice(uint32_t index, cl_device_id id, cl_platform_id platform); - size_t freeMem() const; - size_t globalMem() const; + size_t freeMemSize() const; + size_t globalMemSize() const; + size_t maxMemAllocSize() const; String printableName() const; uint32_t clock() const; void generate(const Algorithm &algorithm, OclThreads &threads) const;