mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-22 11:39:33 +00:00
Restored OclCache.
This commit is contained in:
parent
47b8cb6044
commit
ec1839d580
27 changed files with 290 additions and 72 deletions
|
@ -15,6 +15,7 @@ option(WITH_TLS "Enable OpenSSL support" ON)
|
|||
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
|
||||
option(WITH_OPENCL "Enable OpenCL backend" OFF)
|
||||
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
|
|
6
src/3rdparty/base32/base32.h
vendored
6
src/3rdparty/base32/base32.h
vendored
|
@ -25,8 +25,8 @@
|
|||
// All functions return the number of output bytes or -1 on error. If the
|
||||
// output buffer is too small, the result will silently be truncated.
|
||||
|
||||
#ifndef _BASE32_H_
|
||||
#define _BASE32_H_
|
||||
#ifndef XMRIG_BASE32_H
|
||||
#define XMRIG_BASE32_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
@ -65,4 +65,4 @@ int base32_encode(const uint8_t *data, int length, uint8_t *result, int bufSize)
|
|||
}
|
||||
|
||||
|
||||
#endif /* _BASE32_H_ */
|
||||
#endif /* XMRIG_BASE32_H */
|
||||
|
|
|
@ -42,18 +42,18 @@ template<class T>
|
|||
class Thread
|
||||
{
|
||||
public:
|
||||
inline Thread(IBackend *backend, size_t index, const T &config) : m_index(index), m_config(config), m_backend(backend) {}
|
||||
inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {}
|
||||
inline ~Thread() { m_thread.join(); delete m_worker; }
|
||||
|
||||
inline const T &config() const { return m_config; }
|
||||
inline IBackend *backend() const { return m_backend; }
|
||||
inline IWorker *worker() const { return m_worker; }
|
||||
inline size_t index() const { return m_index; }
|
||||
inline size_t id() const { return m_id; }
|
||||
inline void setWorker(IWorker *worker) { m_worker = worker; }
|
||||
inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); }
|
||||
|
||||
private:
|
||||
const size_t m_index = 0;
|
||||
const size_t m_id = 0;
|
||||
const T m_config;
|
||||
IBackend *m_backend;
|
||||
IWorker *m_worker = nullptr;
|
||||
|
|
|
@ -134,7 +134,7 @@ void xmrig::Workers<T>::tick(uint64_t)
|
|||
return;
|
||||
}
|
||||
|
||||
d_ptr->hashrate->add(handle->index(), handle->worker()->hashCount(), handle->worker()->timestamp());
|
||||
d_ptr->hashrate->add(handle->id(), handle->worker()->hashCount(), handle->worker()->timestamp());
|
||||
}
|
||||
|
||||
d_ptr->hashrate->updateHighest();
|
||||
|
@ -175,19 +175,19 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
|
|||
{
|
||||
switch (handle->config().intensity) {
|
||||
case 1:
|
||||
return new CpuWorker<1>(handle->index(), handle->config());
|
||||
return new CpuWorker<1>(handle->id(), handle->config());
|
||||
|
||||
case 2:
|
||||
return new CpuWorker<2>(handle->index(), handle->config());
|
||||
return new CpuWorker<2>(handle->id(), handle->config());
|
||||
|
||||
case 3:
|
||||
return new CpuWorker<3>(handle->index(), handle->config());
|
||||
return new CpuWorker<3>(handle->id(), handle->config());
|
||||
|
||||
case 4:
|
||||
return new CpuWorker<4>(handle->index(), handle->config());
|
||||
return new CpuWorker<4>(handle->id(), handle->config());
|
||||
|
||||
case 5:
|
||||
return new CpuWorker<5>(handle->index(), handle->config());
|
||||
return new CpuWorker<5>(handle->id(), handle->config());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -201,7 +201,7 @@ template class Workers<CpuLaunchData>;
|
|||
template<>
|
||||
xmrig::IWorker *xmrig::Workers<OclLaunchData>::create(Thread<OclLaunchData> *handle)
|
||||
{
|
||||
return new OclWorker(handle->index(), handle->config());
|
||||
return new OclWorker(handle->id(), handle->config());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -53,8 +53,8 @@ static constexpr uint32_t kReserveCount = 4096;
|
|||
|
||||
|
||||
template<size_t N>
|
||||
xmrig::CpuWorker<N>::CpuWorker(size_t index, const CpuLaunchData &data) :
|
||||
Worker(index, data.affinity, data.priority),
|
||||
xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
||||
Worker(id, data.affinity, data.priority),
|
||||
m_algorithm(data.algorithm),
|
||||
m_assembly(data.assembly),
|
||||
m_hwAES(data.hwAES),
|
||||
|
|
|
@ -43,7 +43,7 @@ template<size_t N>
|
|||
class CpuWorker : public Worker
|
||||
{
|
||||
public:
|
||||
CpuWorker(size_t index, const CpuLaunchData &data);
|
||||
CpuWorker(size_t id, const CpuLaunchData &data);
|
||||
~CpuWorker() override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -260,7 +260,7 @@ void xmrig::OclBackend::setJob(const Job &job)
|
|||
|
||||
const OclConfig &cl = d_ptr->controller->config()->cl();
|
||||
|
||||
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->devices, tag);
|
||||
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag);
|
||||
if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -23,4 +23,162 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
#include "3rdparty/base32/base32.h"
|
||||
#include "backend/opencl/interfaces/IOclRunner.h"
|
||||
#include "backend/opencl/OclCache.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "crypto/common/keccak.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ");
|
||||
static std::mutex mutex;
|
||||
|
||||
|
||||
static cl_program createFromSource(const IOclRunner *runner)
|
||||
{
|
||||
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), tag, runner->data().device.index());
|
||||
|
||||
cl_int ret;
|
||||
cl_device_id device = runner->data().device.id();
|
||||
const char *source = runner->source();
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
cl_program program = OclLib::createProgramWithSource(runner->data().ctx, 1, &source, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (OclLib::buildProgram(program, 1, &device, runner->buildOptions()) != CL_SUCCESS) {
|
||||
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
|
||||
|
||||
OclLib::releaseProgram(program);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " GREEN_BOLD("compilation completed") BLACK_BOLD( " (%.3fs)"),
|
||||
tag, runner->data().device.index(), (Chrono::steadyMSecs() - ts) / 1000.0);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
static cl_program createFromBinary(const IOclRunner *runner, const std::string &fileName)
|
||||
{
|
||||
std::ifstream file(fileName, std::ofstream::in | std::ofstream::binary);
|
||||
if (!file.good()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::ostringstream ss;
|
||||
ss << file.rdbuf();
|
||||
|
||||
const std::string s = ss.str();
|
||||
const size_t bin_size = s.size();
|
||||
auto data_ptr = s.data();
|
||||
cl_device_id device = runner->data().device.id();
|
||||
|
||||
cl_int clStatus;
|
||||
cl_int ret;
|
||||
cl_program program = OclLib::createProgramWithBinary(runner->data().ctx, 1, &device, &bin_size, reinterpret_cast<const unsigned char **>(&data_ptr), &clStatus, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (OclLib::buildProgram(program, 1, &device) != CL_SUCCESS) {
|
||||
OclLib::releaseProgram(program);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
cl_program xmrig::OclCache::build(const IOclRunner *runner)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
if (Nonce::sequence(Nonce::OPENCL) == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string fileName;
|
||||
if (runner->data().cache) {
|
||||
# ifdef _WIN32
|
||||
fileName = prefix() + "\\xmrig\\.cache\\" + cacheKey(runner) + ".bin";
|
||||
# else
|
||||
fileName = prefix() + "/.cache/" + cacheKey(runner) + ".bin";
|
||||
# endif
|
||||
|
||||
cl_program program = createFromBinary(runner, fileName);
|
||||
if (program) {
|
||||
return program;
|
||||
}
|
||||
}
|
||||
|
||||
cl_program program = createFromSource(runner);
|
||||
if (runner->data().cache && program) {
|
||||
save(program, fileName);
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
std::string xmrig::OclCache::cacheKey(const char *deviceKey, const char *options, const char *source)
|
||||
{
|
||||
std::string in(source);
|
||||
in += options;
|
||||
in += deviceKey;
|
||||
|
||||
uint8_t hash[200];
|
||||
keccak(in.c_str(), in.size(), hash);
|
||||
|
||||
uint8_t result[32] = { 0 };
|
||||
base32_encode(hash, 12, result, sizeof(result));
|
||||
|
||||
return reinterpret_cast<char *>(result);
|
||||
}
|
||||
|
||||
|
||||
std::string xmrig::OclCache::cacheKey(const IOclRunner *runner)
|
||||
{
|
||||
return cacheKey(runner->deviceKey(), runner->buildOptions(), runner->source());
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclCache::save(cl_program program, const std::string &fileName)
|
||||
{
|
||||
size_t size = 0;
|
||||
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size) != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<char> binary(size);
|
||||
|
||||
char *data = binary.data();
|
||||
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &data) != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
createDirectory();
|
||||
|
||||
std::ofstream file_stream;
|
||||
file_stream.open(fileName, std::ofstream::out | std::ofstream::binary);
|
||||
file_stream.write(binary.data(), static_cast<int64_t>(binary.size()));
|
||||
file_stream.close();
|
||||
}
|
||||
|
|
|
@ -29,17 +29,26 @@
|
|||
#include <string>
|
||||
|
||||
|
||||
typedef struct _cl_program *cl_program;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class IOclRunner;
|
||||
|
||||
|
||||
class OclCache
|
||||
{
|
||||
public:
|
||||
|
||||
static cl_program build(const IOclRunner *runner);
|
||||
static std::string cacheKey(const char *deviceKey, const char *options, const char *source);
|
||||
static std::string cacheKey(const IOclRunner *runner);
|
||||
|
||||
private:
|
||||
void createDirectory() const;
|
||||
static std::string prefix();
|
||||
static void createDirectory();
|
||||
static void save(cl_program program, const std::string &fileName);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "backend/opencl/OclCache.h"
|
||||
|
||||
|
||||
void xmrig::OclCache::createDirectory() const
|
||||
void xmrig::OclCache::createDirectory()
|
||||
{
|
||||
std::string path = prefix() + "/.cache";
|
||||
mkdir(path.c_str(), 0744);
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
#include "backend/opencl/OclCache.h"
|
||||
|
||||
|
||||
void xmrig::OclCache::createDirectory() const
|
||||
void xmrig::OclCache::createDirectory()
|
||||
{
|
||||
std::string path = prefix() + "/xmrig";
|
||||
_mkdir(path.c_str());
|
||||
|
|
|
@ -149,7 +149,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
|
|||
}
|
||||
|
||||
|
||||
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const
|
||||
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const
|
||||
{
|
||||
std::vector<OclLaunchData> out;
|
||||
const OclThreads &threads = m_threads.get(algorithm);
|
||||
|
@ -166,7 +166,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
|
|||
continue;
|
||||
}
|
||||
|
||||
out.emplace_back(miner, algorithm, *this, thread, devices[thread.index()]);
|
||||
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()]);
|
||||
}
|
||||
|
||||
return out;
|
||||
|
|
|
@ -42,7 +42,7 @@ public:
|
|||
|
||||
OclPlatform platform() const;
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const;
|
||||
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const;
|
||||
void read(const rapidjson::Value &value);
|
||||
|
||||
inline bool isCacheEnabled() const { return m_cache; }
|
||||
|
|
|
@ -28,11 +28,12 @@
|
|||
#include "backend/opencl/OclConfig.h"
|
||||
|
||||
|
||||
xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device) :
|
||||
xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device) :
|
||||
algorithm(algorithm),
|
||||
cache(config.isCacheEnabled()),
|
||||
miner(miner),
|
||||
device(device),
|
||||
platform(platform),
|
||||
thread(thread)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
#include "backend/opencl/OclThread.h"
|
||||
#include "backend/opencl/wrappers/OclDevice.h"
|
||||
#include "backend/opencl/wrappers/OclPlatform.h"
|
||||
#include "crypto/common/Algorithm.h"
|
||||
#include "crypto/common/Nonce.h"
|
||||
|
||||
|
@ -46,7 +47,7 @@ class Miner;
|
|||
class OclLaunchData
|
||||
{
|
||||
public:
|
||||
OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device);
|
||||
OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device);
|
||||
|
||||
bool isEqual(const OclLaunchData &other) const;
|
||||
|
||||
|
@ -60,6 +61,7 @@ public:
|
|||
const bool cache;
|
||||
const Miner *miner;
|
||||
const OclDevice device;
|
||||
const OclPlatform platform;
|
||||
const OclThread thread;
|
||||
};
|
||||
|
||||
|
|
|
@ -48,15 +48,15 @@ static constexpr uint32_t kReserveCount = 4096;
|
|||
|
||||
|
||||
|
||||
xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) :
|
||||
Worker(index, data.thread.affinity(), -1),
|
||||
xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
|
||||
Worker(id, data.thread.affinity(), -1),
|
||||
m_algorithm(data.algorithm),
|
||||
m_miner(data.miner)
|
||||
{
|
||||
switch (m_algorithm.family()) {
|
||||
case Algorithm::RANDOM_X:
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
m_runner = new OclRxRunner(index, data);
|
||||
m_runner = new OclRxRunner(id, data);
|
||||
# endif
|
||||
break;
|
||||
|
||||
|
@ -67,9 +67,13 @@ xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) :
|
|||
break;
|
||||
|
||||
default:
|
||||
m_runner = new OclCnRunner(index, data);
|
||||
m_runner = new OclCnRunner(id, data);
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_runner) {
|
||||
m_runner->build();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class IOclRunner;
|
|||
class OclWorker : public Worker
|
||||
{
|
||||
public:
|
||||
OclWorker(size_t index, const OclLaunchData &data);
|
||||
OclWorker(size_t id, const OclLaunchData &data);
|
||||
~OclWorker() override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -573,7 +573,7 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
|
|||
if (gIdx < Threads)
|
||||
# endif
|
||||
{
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
ulong c[2];
|
||||
|
||||
|
@ -686,7 +686,7 @@ __kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states,
|
|||
uint2 division_result = as_uint2(states[12]);
|
||||
uint sqrt_result = as_uint2(states[13]).s0;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < ITERATIONS; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
@ -846,7 +846,7 @@ __kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, ui
|
|||
uint2 division_result = as_uint2(states[12]);
|
||||
uint sqrt_result = as_uint2(states[13]).s0;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < 0x40000; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
@ -1074,7 +1074,7 @@ __kernel void cn1_tube(__global uint4 *Scratchpad, __global ulong *states, uint
|
|||
{
|
||||
uint idx0 = a[0];
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
ulong c[2];
|
||||
|
||||
|
@ -1171,7 +1171,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
|||
{
|
||||
uint idx0 = a[0];
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for (int i = 0; i < ITERATIONS; ++i) {
|
||||
ulong c[2];
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ __kernel void cn1_v2_rwz(__global uint4 *Scratchpad, __global ulong *states, uin
|
|||
uint2 division_result = as_uint2(states[12]);
|
||||
uint sqrt_result = as_uint2(states[13]).s0;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < 0x60000; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
@ -235,7 +235,7 @@ __kernel void cn1_v2_zls(__global uint4 *Scratchpad, __global ulong *states, uin
|
|||
uint2 division_result = as_uint2(states[12]);
|
||||
uint sqrt_result = as_uint2(states[13]).s0;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < 0x60000; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
@ -395,7 +395,7 @@ __kernel void cn1_v2_double(__global uint4 *Scratchpad, __global ulong *states,
|
|||
uint2 division_result = as_uint2(states[12]);
|
||||
uint sqrt_result = as_uint2(states[13]).s0;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < 0x100000; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
|
|
@ -199,7 +199,7 @@ struct SharedMemChunk
|
|||
float4 va[16];
|
||||
};
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE_GPU * 16, 1, 1)))
|
||||
__attribute__((reqd_work_group_size(WORKSIZE * 16, 1, 1)))
|
||||
__kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThreads)
|
||||
{
|
||||
const uint gIdx = getIdx();
|
||||
|
@ -214,7 +214,7 @@ __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThre
|
|||
|
||||
__global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16));
|
||||
|
||||
__local struct SharedMemChunk smem_in[WORKSIZE_GPU];
|
||||
__local struct SharedMemChunk smem_in[WORKSIZE];
|
||||
__local struct SharedMemChunk* smem = smem_in + chunk;
|
||||
|
||||
uint tid = get_local_id(0) % 16;
|
||||
|
|
|
@ -66,7 +66,7 @@ __kernel void cn1_cryptonight_r_N(__global uint4 *Scratchpad, __global ulong *st
|
|||
uint r2 = as_uint2(states[13]).s0;
|
||||
uint r3 = as_uint2(states[13]).s1;
|
||||
|
||||
#pragma unroll UNROLL_FACTOR
|
||||
#pragma unroll CN_UNROLL
|
||||
for(int i = 0; i < ITERATIONS; ++i)
|
||||
{
|
||||
# ifdef __NV_CL_C_VERSION
|
||||
|
|
|
@ -33,6 +33,7 @@ namespace xmrig {
|
|||
|
||||
|
||||
class Job;
|
||||
class OclLaunchData;
|
||||
|
||||
|
||||
class IOclRunner
|
||||
|
@ -42,6 +43,11 @@ public:
|
|||
|
||||
virtual bool selfTest() const = 0;
|
||||
virtual const char *buildOptions() const = 0;
|
||||
virtual const char *deviceKey() const = 0;
|
||||
virtual const char *source() const = 0;
|
||||
virtual const OclLaunchData &data() const = 0;
|
||||
virtual size_t threadId() const = 0;
|
||||
virtual void build() = 0;
|
||||
virtual void run(uint32_t *hashOutput) = 0;
|
||||
virtual void set(const Job &job) = 0;
|
||||
};
|
||||
|
|
|
@ -51,6 +51,12 @@ if (WITH_OPENCL)
|
|||
list(APPEND HEADERS_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.h)
|
||||
list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.cpp)
|
||||
endif()
|
||||
|
||||
if (WITH_STRICT_CACHE)
|
||||
add_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
|
||||
else()
|
||||
remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_FEATURE_OPENCL)
|
||||
|
||||
|
|
|
@ -23,29 +23,49 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "backend/opencl/cl/OclSource.h"
|
||||
#include "backend/opencl/OclCache.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/runners/OclBaseRunner.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/net/stratum/Job.h"
|
||||
|
||||
|
||||
xmrig::OclBaseRunner::OclBaseRunner(size_t, const OclLaunchData &data) :
|
||||
xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
|
||||
m_algorithm(data.algorithm),
|
||||
m_ctx(data.ctx)
|
||||
m_source(OclSource::get(data.algorithm)),
|
||||
m_data(data),
|
||||
m_threadId(id)
|
||||
{
|
||||
cl_int ret;
|
||||
m_queue = OclLib::createCommandQueue(m_ctx, data.device.id(), &ret);
|
||||
m_queue = OclLib::createCommandQueue(data.ctx, data.device.id(), &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret);
|
||||
m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret);
|
||||
m_input = OclLib::createBuffer(data.ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret);
|
||||
m_output = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret);
|
||||
|
||||
m_deviceKey = data.device.name();
|
||||
|
||||
# ifdef XMRIG_STRICT_OPENCL_CACHE
|
||||
m_deviceKey += ":";
|
||||
m_deviceKey += data.platform.version();
|
||||
|
||||
m_deviceKey += ":";
|
||||
m_deviceKey += OclLib::getDeviceString(data.device.id(), CL_DRIVER_VERSION);
|
||||
# endif
|
||||
|
||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
|
||||
m_deviceKey += ":64";
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
xmrig::OclBaseRunner::~OclBaseRunner()
|
||||
{
|
||||
OclLib::releaseProgram(m_program);
|
||||
OclLib::releaseMemObject(m_input);
|
||||
OclLib::releaseMemObject(m_output);
|
||||
|
||||
|
@ -55,14 +75,17 @@ xmrig::OclBaseRunner::~OclBaseRunner()
|
|||
|
||||
bool xmrig::OclBaseRunner::selfTest() const
|
||||
{
|
||||
return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty();
|
||||
return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty() && m_source != nullptr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char *xmrig::OclBaseRunner::buildOptions() const
|
||||
void xmrig::OclBaseRunner::build()
|
||||
{
|
||||
return m_options.c_str();
|
||||
if (!selfTest()) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_program = OclCache::build(this);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -43,21 +43,31 @@ class OclLaunchData;
|
|||
class OclBaseRunner : public IOclRunner
|
||||
{
|
||||
public:
|
||||
OclBaseRunner(size_t index, const OclLaunchData &data);
|
||||
OclBaseRunner(size_t id, const OclLaunchData &data);
|
||||
~OclBaseRunner() override;
|
||||
|
||||
protected:
|
||||
inline const char *buildOptions() const override { return m_options.c_str(); }
|
||||
inline const char *deviceKey() const override { return m_deviceKey.c_str(); }
|
||||
inline const char *source() const override { return m_source; }
|
||||
inline const OclLaunchData &data() const override { return m_data; }
|
||||
inline size_t threadId() const override { return m_threadId; }
|
||||
|
||||
bool selfTest() const override;
|
||||
const char *buildOptions() const override;
|
||||
void build() override;
|
||||
void run(uint32_t *hashOutput) override;
|
||||
void set(const Job &job) override;
|
||||
|
||||
protected:
|
||||
Algorithm m_algorithm;
|
||||
cl_command_queue m_queue = nullptr;
|
||||
cl_context m_ctx;
|
||||
cl_mem m_input = nullptr;
|
||||
cl_mem m_output = nullptr;
|
||||
cl_program m_program = nullptr;
|
||||
const char *m_source;
|
||||
const OclLaunchData &m_data;
|
||||
const size_t m_threadId;
|
||||
std::string m_deviceKey;
|
||||
std::string m_options;
|
||||
};
|
||||
|
||||
|
|
|
@ -38,16 +38,16 @@ xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBa
|
|||
const size_t g_thd = data.thread.intensity();
|
||||
|
||||
cl_int ret;
|
||||
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret);
|
||||
m_scratchpads = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret);
|
||||
m_blake256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_groestl256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_jh256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_skein512 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_states = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret);
|
||||
m_blake256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_groestl256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_jh256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
m_skein512 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
|
||||
|
||||
uint32_t stridedIndex = data.thread.stridedIndex();
|
||||
if (data.device.vendorId() == OCL_VENDOR_NVIDIA) {
|
||||
|
|
|
@ -375,23 +375,13 @@ cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue)
|
|||
assert(pReleaseCommandQueue != nullptr);
|
||||
assert(pGetCommandQueueInfo != nullptr);
|
||||
|
||||
finish(command_queue);
|
||||
|
||||
cl_int ret = pReleaseCommandQueue(command_queue);
|
||||
if (ret != CL_SUCCESS) {
|
||||
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseCommandQueue);
|
||||
}
|
||||
|
||||
cl_uint refs = 0;
|
||||
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
|
||||
if (ret == CL_SUCCESS && refs > 0) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
}
|
||||
|
||||
# ifndef NDEBUG
|
||||
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
|
||||
assert(ret == CL_SUCCESS);
|
||||
assert(refs == 0);
|
||||
# endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -447,6 +437,10 @@ cl_int xmrig::OclLib::releaseProgram(cl_program program)
|
|||
{
|
||||
assert(pReleaseProgram != nullptr);
|
||||
|
||||
if (program == nullptr) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
const cl_int ret = pReleaseProgram(program);
|
||||
if (ret != CL_SUCCESS) {
|
||||
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseProgram);
|
||||
|
@ -500,6 +494,8 @@ cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint nu
|
|||
auto result = pCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret);
|
||||
if (*errcode_ret != CL_SUCCESS) {
|
||||
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithBinary);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -513,6 +509,8 @@ cl_program xmrig::OclLib::createProgramWithSource(cl_context context, cl_uint co
|
|||
auto result = pCreateProgramWithSource(context, count, strings, lengths, errcode_ret);
|
||||
if (*errcode_ret != CL_SUCCESS) {
|
||||
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithSource);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
Loading…
Reference in a new issue