Implemented CUDA config generation.

This commit is contained in:
XMRig 2019-10-26 03:12:55 +07:00
parent 77d5b73724
commit d4a3024996
16 changed files with 208 additions and 42 deletions

View file

@ -97,10 +97,14 @@ public:
return printDisabled(RED_S " (no devices)");
}
if (!devices.empty()) {
return;
}
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%u.%u") "/" WHITE_BOLD("%u.%u") BLACK_BOLD("/%s"), "CUDA",
runtimeVersion / 1000, runtimeVersion % 100, driverVersion / 1000, driverVersion % 100, CudaLib::pluginVersion());
devices = CudaLib::devices();
devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep());
for (const CudaDevice &device : devices) {
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("#%zu") YELLOW(" %s") GREEN_BOLD(" %s ") WHITE_BOLD("%u/%u MHz") " smx:" WHITE_BOLD("%u") " arch:" WHITE_BOLD("%u%u") " mem:" CYAN("%zu/%zu") " MB",
@ -156,13 +160,13 @@ xmrig::CudaBackend::~CudaBackend()
bool xmrig::CudaBackend::isEnabled() const
{
return false;
return d_ptr->controller->config()->cuda().isEnabled() && CudaLib::isInitialized() && !d_ptr->devices.empty();;
}
bool xmrig::CudaBackend::isEnabled(const Algorithm &algorithm) const
{
return false;
return !d_ptr->controller->config()->cuda().threads().get(algorithm).isEmpty();
}

View file

@ -25,6 +25,7 @@
#include "backend/cuda/CudaConfig.h"
#include "backend/cuda/CudaConfig_gen.h"
#include "backend/cuda/wrappers/CudaLib.h"
#include "base/io/json/Json.h"
#include "base/io/log/Log.h"
#include "rapidjson/document.h"
@ -91,13 +92,26 @@ void xmrig::CudaConfig::generate()
return;
}
if (!CudaLib::init(loader())) {
return;
}
if (!CudaLib::runtimeVersion() || !CudaLib::driverVersion() || !CudaLib::deviceCount()) {
return;
}
const auto devices = CudaLib::devices(bfactor(), bsleep());
if (devices.empty()) {
return;
}
size_t count = 0;
// count += xmrig::generate<Algorithm::CN>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
// count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
count += xmrig::generate<Algorithm::CN>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
m_shouldSave = count > 0;
}

View file

@ -45,6 +45,8 @@ public:
inline bool isShouldSave() const { return m_shouldSave; }
inline const String &loader() const { return m_loader; }
inline const Threads<CudaThreads> &threads() const { return m_threads; }
inline int32_t bfactor() const { return m_bfactor; }
inline int32_t bsleep() const { return m_bsleep; }
private:
void generate();
@ -55,6 +57,14 @@ private:
std::vector<uint32_t> m_devicesHint;
String m_loader;
Threads<CudaThreads> m_threads;
# ifdef _WIN32
int32_t m_bfactor = 6;
int32_t m_bsleep = 25;
# else
int32_t m_bfactor = 0;
int32_t m_bsleep = 0;
# endif
};

View file

@ -22,12 +22,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_OCLCONFIG_GEN_H
#define XMRIG_OCLCONFIG_GEN_H
#ifndef XMRIG_CUDACONFIG_GEN_H
#define XMRIG_CUDACONFIG_GEN_H
#include "backend/common/Threads.h"
#include "backend/cuda/CudaThreads.h"
#include "backend/cuda/wrappers/CudaDevice.h"
#include <algorithm>
@ -36,7 +37,76 @@
namespace xmrig {
static inline size_t generate(const char *key, Threads<CudaThreads> &threads, const Algorithm &algorithm, const std::vector<CudaDevice> &devices)
{
if (threads.isExist(algorithm) || threads.has(key)) {
return 0;
}
return threads.move(key, CudaThreads(devices, algorithm));
}
template<Algorithm::Family FAMILY>
static inline size_t generate(Threads<CudaThreads> &, const std::vector<CudaDevice> &) { return 0; }
template<>
size_t inline generate<Algorithm::CN>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
size_t count = 0;
count += generate("cn", threads, Algorithm::CN_1, devices);
count += generate("cn/2", threads, Algorithm::CN_2, devices);
if (!threads.isExist(Algorithm::CN_0)) {
threads.disable(Algorithm::CN_0);
count++;
}
# ifdef XMRIG_ALGO_CN_GPU
count += generate("cn/gpu", threads, Algorithm::CN_GPU, devices);
# endif
return count;
}
#ifdef XMRIG_ALGO_CN_LITE
template<>
size_t inline generate<Algorithm::CN_LITE>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
size_t count = generate("cn-lite", threads, Algorithm::CN_LITE_1, devices);
if (!threads.isExist(Algorithm::CN_LITE_0)) {
threads.disable(Algorithm::CN_LITE_0);
++count;
}
return count;
}
#endif
#ifdef XMRIG_ALGO_CN_HEAVY
template<>
size_t inline generate<Algorithm::CN_HEAVY>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
return generate("cn-heavy", threads, Algorithm::CN_HEAVY_0, devices);
}
#endif
#ifdef XMRIG_ALGO_CN_PICO
template<>
size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices);
}
#endif
} /* namespace xmrig */
#endif /* XMRIG_OCLCONFIG_GEN_H */
#endif /* XMRIG_CUDACONFIG_GEN_H */

View file

@ -24,7 +24,7 @@
#include "backend/cuda/CudaThread.h"
#include "backend/cuda/wrappers/CudaLib.h"
#include "base/io/json/Json.h"
#include "rapidjson/document.h"
@ -34,6 +34,12 @@
namespace xmrig {
static const char *kAffinity = "affinity";
static const char *kBFactor = "bfactor";
static const char *kBlocks = "blocks";
static const char *kBSleep = "bsleep";
static const char *kIndex = "index";
static const char *kThreads = "threads";
} // namespace xmrig
@ -43,12 +49,35 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value)
if (!value.IsObject()) {
return;
}
m_index = Json::getUint(value, kIndex);
m_threads = Json::getInt(value, kThreads);
m_blocks = Json::getInt(value, kBlocks);
m_bfactor = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u);
m_bsleep = Json::getUint(value, kBSleep, m_bsleep);
m_affinity = Json::getUint64(value, kAffinity, m_affinity);
}
xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) :
m_blocks(CudaLib::deviceInt(ctx, CudaLib::DeviceBlocks)),
m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)),
m_index(index),
m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)),
m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep))
{
}
bool xmrig::CudaThread::isEqual(const CudaThread &other) const
{
return false;
return m_blocks == other.m_blocks &&
m_threads == other.m_threads &&
m_affinity == other.m_affinity &&
m_index == other.m_index &&
m_bfactor == other.m_bfactor &&
m_bsleep == other.m_bsleep;
}
@ -59,5 +88,12 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const
Value out(kObjectType);
out.AddMember(StringRef(kIndex), index(), allocator);
out.AddMember(StringRef(kThreads), threads(), allocator);
out.AddMember(StringRef(kBlocks), blocks(), allocator);
out.AddMember(StringRef(kBFactor), bfactor(), allocator);
out.AddMember(StringRef(kBSleep), bsleep(), allocator);
out.AddMember(StringRef(kAffinity), affinity(), allocator);
return out;
}

View file

@ -26,14 +26,13 @@
#define XMRIG_CUDATHREAD_H
using nvid_ctx = struct nvid_ctx;
#include "crypto/common/Algorithm.h"
#include "rapidjson/fwd.h"
#include <bitset>
#include <vector>
namespace xmrig {
@ -41,10 +40,16 @@ class CudaThread
{
public:
CudaThread() = delete;
CudaThread(const rapidjson::Value &value);
CudaThread(uint32_t index, nvid_ctx *ctx);
inline bool isValid() const { return false; }
inline bool isValid() const { return m_blocks > 0 && m_threads > 0; }
inline int32_t bfactor() const { return static_cast<int32_t>(m_bfactor); }
inline int32_t blocks() const { return m_blocks; }
inline int32_t bsleep() const { return static_cast<int32_t>(m_bsleep); }
inline int32_t threads() const { return m_threads; }
inline int64_t affinity() const { return m_affinity; }
inline uint32_t index() const { return m_index; }
inline bool operator!=(const CudaThread &other) const { return !isEqual(other); }
inline bool operator==(const CudaThread &other) const { return isEqual(other); }
@ -53,6 +58,18 @@ public:
rapidjson::Value toJSON(rapidjson::Document &doc) const;
private:
int32_t m_blocks = 0;
int32_t m_threads = 0;
int64_t m_affinity = -1;
uint32_t m_index = 0;
# ifdef _WIN32
uint32_t m_bfactor = 6;
uint32_t m_bsleep = 25;
# else
uint32_t m_bfactor = 0;
uint32_t m_bsleep = 0;
# endif
};

View file

@ -44,6 +44,14 @@ xmrig::CudaThreads::CudaThreads(const rapidjson::Value &value)
}
xmrig::CudaThreads::CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm)
{
for (const auto &device : devices) {
device.generate(algorithm, *this);
}
}
bool xmrig::CudaThreads::isEqual(const CudaThreads &other) const
{
if (isEmpty() && other.isEmpty()) {

View file

@ -30,6 +30,7 @@
#include "backend/cuda/CudaThread.h"
#include "backend/cuda/wrappers/CudaDevice.h"
namespace xmrig {
@ -40,6 +41,7 @@ class CudaThreads
public:
CudaThreads() = default;
CudaThreads(const rapidjson::Value &value);
CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm);
inline bool isEmpty() const { return m_data.empty(); }
inline const std::vector<CudaThread> &data() const { return m_data; }

View file

@ -34,12 +34,11 @@
#include <algorithm>
xmrig::CudaDevice::CudaDevice(uint32_t index) :
xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
m_index(index)
{
auto ctx = CudaLib::alloc(index, 0, 0, 0, 0, Algorithm::INVALID);
if (CudaLib::deviceInfo(ctx) != 0) {
auto ctx = CudaLib::alloc(index, bfactor, bsleep);
if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) {
CudaLib::release(ctx);
return;
@ -105,6 +104,11 @@ uint32_t xmrig::CudaDevice::smx() const
void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const
{
if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) {
return;
}
threads.add(CudaThread(m_index, m_ctx));
}

View file

@ -46,7 +46,7 @@ public:
CudaDevice() = delete;
CudaDevice(const CudaDevice &other) = delete;
CudaDevice(CudaDevice &&other) noexcept;
CudaDevice(uint32_t index);
CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep);
~CudaDevice();
size_t freeMemSize() const;

View file

@ -58,9 +58,9 @@ static const char *kSymbolNotFound = "symbol not found";
static const char *kVersion = "version";
using alloc_t = nvid_ctx * (*)(size_t, int32_t, int32_t, int32_t, int32_t, int32_t);
using alloc_t = nvid_ctx * (*)(uint32_t, int32_t, int32_t);
using deviceCount_t = uint32_t (*)();
using deviceInfo_t = int32_t (*)(nvid_ctx *);
using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t);
using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
using deviceName_t = const char * (*)(nvid_ctx *);
using deviceUint_t = uint32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
@ -129,9 +129,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept
}
int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx) noexcept
int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept
{
return pDeviceInfo(ctx);
return pDeviceInfo(ctx, blocks, threads, algorithm);
}
@ -141,13 +141,13 @@ int32_t xmrig::CudaLib::deviceInt(nvid_ctx *ctx, DeviceProperty property) noexce
}
nvid_ctx *xmrig::CudaLib::alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept
nvid_ctx *xmrig::CudaLib::alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept
{
return pAlloc(id, blocks, threads, bfactor, bsleep, algorithm);
return pAlloc(id, bfactor, bsleep);
}
std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept
std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep) noexcept
{
const uint32_t count = deviceCount();
if (!count) {
@ -158,7 +158,7 @@ std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept
out.reserve(count);
for (uint32_t i = 0; i < count; ++i) {
CudaDevice device(i);
CudaDevice device(i, bfactor, bsleep);
if (device.isValid()) {
out.emplace_back(std::move(device));
}

View file

@ -72,10 +72,10 @@ public:
static const char *deviceName(nvid_ctx *ctx) noexcept;
static const char *pluginVersion() noexcept;
static int deviceInfo(nvid_ctx *ctx) noexcept;
static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept;
static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept;
static nvid_ctx *alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept;
static std::vector<CudaDevice> devices() noexcept;
static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept;
static std::vector<CudaDevice> devices(int32_t bfactor, int32_t bsleep) noexcept;
static uint32_t deviceCount() noexcept;
static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept;
static uint32_t driverVersion() noexcept;

View file

@ -319,7 +319,7 @@ void xmrig::OclBackend::setJob(const Job &job)
return stop();
}
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag);
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices);
if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
return;
}

View file

@ -24,6 +24,7 @@
#include "backend/opencl/OclConfig.h"
#include "backend/common/Tags.h"
#include "backend/opencl/OclConfig_gen.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/json/Json.h"
@ -113,7 +114,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
}
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const
{
std::vector<OclLaunchData> out;
const OclThreads &threads = m_threads.get(algorithm);
@ -126,7 +127,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
for (const OclThread &thread : threads.data()) {
if (thread.index() >= devices.size()) {
LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), tag, thread.index());
LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), ocl_tag(), thread.index());
continue;
}

View file

@ -42,7 +42,7 @@ public:
OclPlatform platform() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const;
void read(const rapidjson::Value &value);
inline bool isCacheEnabled() const { return m_cache; }

View file

@ -46,7 +46,7 @@ xmrig::OclThreads::OclThreads(const rapidjson::Value &value)
xmrig::OclThreads::OclThreads(const std::vector<OclDevice> &devices, const Algorithm &algorithm)
{
for (const OclDevice &device : devices) {
for (const auto &device : devices) {
device.generate(algorithm, *this);
}
}