cn/r part 2 of 2.

This commit is contained in:
XMRig 2019-09-04 11:23:04 +07:00
parent b9e15389ca
commit 57f82f7504
16 changed files with 287 additions and 139 deletions

View file

@ -63,7 +63,7 @@ static cl_program createFromSource(const IOclRunner *runner)
if (OclLib::buildProgram(program, 1, &device, runner->buildOptions()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
OclLib::releaseProgram(program);
OclLib::release(program);
return nullptr;
}
@ -97,7 +97,7 @@ static cl_program createFromBinary(const IOclRunner *runner, const std::string &
}
if (OclLib::buildProgram(program, 1, &device) != CL_SUCCESS) {
OclLib::releaseProgram(program);
OclLib::release(program);
return nullptr;
}

View file

@ -55,6 +55,7 @@ public:
virtual const char *source() const = 0;
virtual const OclLaunchData &data() const = 0;
virtual size_t threadId() const = 0;
virtual uint32_t deviceIndex() const = 0;
virtual void build() = 0;
protected:

View file

@ -66,11 +66,10 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
xmrig::OclBaseRunner::~OclBaseRunner()
{
OclLib::releaseProgram(m_program);
OclLib::releaseMemObject(m_input);
OclLib::releaseMemObject(m_output);
OclLib::releaseCommandQueue(m_queue);
OclLib::release(m_program);
OclLib::release(m_input);
OclLib::release(m_output);
OclLib::release(m_queue);
}
@ -86,6 +85,12 @@ bool xmrig::OclBaseRunner::selfTest() const
}
uint32_t xmrig::OclBaseRunner::deviceIndex() const
{
return data().thread.index();
}
void xmrig::OclBaseRunner::build()
{
if (!isReadyToBuild()) {

View file

@ -57,6 +57,7 @@ protected:
bool isReadyToBuild() const override;
bool selfTest() const override;
uint32_t deviceIndex() const override;
void build() override;
protected:

View file

@ -95,12 +95,17 @@ xmrig::OclCnRunner::~OclCnRunner()
delete m_cn1;
delete m_cn2;
OclLib::releaseMemObject(m_scratchpads);
OclLib::releaseMemObject(m_states);
OclLib::release(m_scratchpads);
OclLib::release(m_states);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
delete m_branchKernels[i];
OclLib::releaseMemObject(m_branches[i]);
OclLib::release(m_branches[i]);
}
if (m_algorithm == Algorithm::CN_R) {
OclLib::release(m_cnr);
OclCnR::clear();
}
}
@ -205,7 +210,8 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
delete m_cn1;
m_height = job.height();
m_cn1 = new Cn1Kernel(OclCnR::get(*this, m_height), m_height);
m_cnr = OclCnR::get(*this, m_height);
m_cn1 = new Cn1Kernel(m_cnr, m_height);
}
if (!m_cn1->setArgs(m_input, m_scratchpads, m_states, intensity)) {

View file

@ -63,6 +63,7 @@ private:
cl_mem m_scratchpads = nullptr;
cl_mem m_states = nullptr;
cl_program m_cnr = nullptr;
Cn0Kernel *m_cn0 = nullptr;
Cn1Kernel *m_cn1 = nullptr;
Cn2Kernel *m_cn2 = nullptr;

View file

@ -22,157 +22,286 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cstring>
#include <mutex>
#include <regex>
#include <sstream>
#include <string>
#include <thread>
#include "backend/opencl/runners/tools/OclCnR.h"
#include "backend/opencl/cl/cn/cryptonight_r_cl.h"
#include "backend/opencl/interfaces/IOclRunner.h"
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/OclThread.h"
#include "backend/opencl/runners/tools/OclCnR.h"
#include "backend/opencl/wrappers/OclError.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/tools/Baton.h"
#include "base/tools/Chrono.h"
#include "crypto/cn/CryptoNight_monero.h"
#include <cstring>
#include <mutex>
#include <regex>
#include <sstream>
#include <string>
#include <thread>
#include <uv.h>
namespace xmrig {
static std::string getCode(const V4_Instruction *code, int code_size)
{
std::stringstream s;
for (int i = 0; i < code_size; ++i) {
const V4_Instruction inst = code[i];
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
switch (inst.opcode)
{
case MUL:
s << 'r' << a << "*=r" << b << ';';
break;
case ADD:
s << 'r' << a << "+=r" << b << '+' << inst.C << "U;";
break;
case SUB:
s << 'r' << a << "-=r" << b << ';';
break;
case ROR:
case ROL:
s << 'r' << a << "=rotate(r" << a << ((inst.opcode == ROR) ? ",ROT_BITS-r" : ",r") << b << ");";
break;
case XOR:
s << 'r' << a << "^=r" << b << ';';
break;
}
s << '\n';
}
return s.str();
}
class CacheEntry
class CnrCacheEntry
{
public:
inline CacheEntry(const Algorithm &algorithm, uint64_t heightOffset, uint32_t deviceIndex, cl_program program) :
algorithm(algorithm),
inline CnrCacheEntry(const Algorithm &algo, uint64_t offset, uint32_t index, cl_program program) :
program(program),
deviceIndex(deviceIndex),
heightOffset(heightOffset)
m_algo(algo),
m_index(index),
m_offset(offset)
{}
const Algorithm algorithm;
const cl_program program;
const uint32_t deviceIndex;
const uint64_t heightOffset;
inline bool isExpired(uint64_t offset) const { return m_offset + OclCnR::kHeightChunkSize < offset; }
inline bool match(const Algorithm &algo, uint64_t offset, uint32_t index) const { return m_algo == algo && m_offset == offset && m_index == index; }
inline bool match(const IOclRunner &runner, uint64_t offset) const { return match(runner.algorithm(), offset, runner.deviceIndex()); }
inline void release() { OclLib::release(program); }
cl_program program;
private:
const Algorithm m_algo;
const uint32_t m_index;
const uint64_t m_offset;
};
static std::mutex mutex;
static std::vector<CacheEntry> cache;
static cl_program search(const Algorithm &algorithm, uint64_t offset, uint32_t index)
class CnrCache
{
std::lock_guard<std::mutex> lock(mutex);
public:
CnrCache() = default;
for (const CacheEntry &entry : cache) {
if (entry.heightOffset == offset && entry.deviceIndex == index && entry.algorithm == algorithm) {
return entry.program;
inline cl_program search(const IOclRunner &runner, uint64_t offset) { return search(runner.algorithm(), offset, runner.deviceIndex()); }
inline cl_program search(const Algorithm &algo, uint64_t offset, uint32_t index)
{
std::lock_guard<std::mutex> lock(m_mutex);
for (const auto &entry : m_data) {
if (entry.match(algo, offset, index)) {
return entry.program;
}
}
return nullptr;
}
void add(const Algorithm &algo, uint64_t offset, uint32_t index, cl_program program)
{
if (search(algo, offset, index)) {
OclLib::release(program);
return;
}
std::lock_guard<std::mutex> lock(m_mutex);
gc(offset);
m_data.emplace_back(algo, offset, index, program);
}
void clear()
{
std::lock_guard<std::mutex> lock(m_mutex);
for (auto &entry : m_data) {
entry.release();
}
m_data.clear();
}
private:
void gc(uint64_t offset)
{
for (size_t i = 0; i < m_data.size();) {
const auto &entry = m_data[i];
if (entry.isExpired(offset)) {
m_data.back().release();
m_data.pop_back();
}
else {
++i;
}
}
}
return nullptr;
}
std::mutex m_mutex;
std::vector<CnrCacheEntry> m_data;
};
static inline cl_program search(const IOclRunner &runner, uint64_t offset) { return search(runner.algorithm(), offset, runner.data().thread.index()); }
static CnrCache cache;
cl_program build(const IOclRunner &runner, const std::string &source, uint64_t offset)
class CnrBuilder
{
std::lock_guard<std::mutex> lock(mutex);
public:
CnrBuilder() = default;
cl_int ret;
cl_device_id device = runner.data().device.id();
const char *s = source.c_str();
cl_program build(const IOclRunner &runner, uint64_t offset)
{
# ifdef APP_DEBUG
const uint64_t ts = Chrono::steadyMSecs();
# endif
cl_program program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
std::lock_guard<std::mutex> lock(m_mutex);
cl_program program = cache.search(runner, offset);
if (program) {
return program;
}
cl_int ret;
const std::string source = getSource(offset);
cl_device_id device = runner.data().device.id();
const char *s = source.c_str();
program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
if (OclLib::buildProgram(program, 1, &device, runner.buildOptions()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
OclLib::release(program);
return nullptr;
}
LOG_DEBUG(GREEN_BOLD("[ocl]") " programs for heights %" PRIu64 " - %" PRIu64 " compiled. (%" PRIu64 "ms)", offset, offset + OclCnR::kHeightChunkSize - 1, Chrono::steadyMSecs() - ts);
cache.add(runner.algorithm(), offset, runner.deviceIndex(), program);
return program;
}
if (OclLib::buildProgram(program, 1, &device, runner.buildOptions()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
private:
std::string getCode(const V4_Instruction *code, int code_size) const
{
std::stringstream s;
OclLib::releaseProgram(program);
return nullptr;
for (int i = 0; i < code_size; ++i) {
const V4_Instruction inst = code[i];
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
switch (inst.opcode)
{
case MUL:
s << 'r' << a << "*=r" << b << ';';
break;
case ADD:
s << 'r' << a << "+=r" << b << '+' << inst.C << "U;";
break;
case SUB:
s << 'r' << a << "-=r" << b << ';';
break;
case ROR:
case ROL:
s << 'r' << a << "=rotate(r" << a << ((inst.opcode == ROR) ? ",ROT_BITS-r" : ",r") << b << ");";
break;
case XOR:
s << 'r' << a << "^=r" << b << ';';
break;
}
s << '\n';
}
return s.str();
}
cache.emplace_back(runner.algorithm(), offset, runner.data().thread.index(), program);
return program;
}
std::string getSource(uint64_t offset) const
{
std::string source(cryptonight_r_defines_cl);
for (size_t i = 0; i < OclCnR::kHeightChunkSize; ++i) {
V4_Instruction code[256];
const int code_size = v4_random_math_init<Algorithm::CN_R>(code, offset + i);
const std::string kernel = std::regex_replace(cryptonight_r_cl, std::regex("XMRIG_INCLUDE_RANDOM_MATH"), getCode(code, code_size));
source += std::regex_replace(kernel, std::regex("KERNEL_NAME"), "cn1_" + std::to_string(offset + i));
}
return source;
}
std::mutex m_mutex;
};
class CnrBaton : public Baton<uv_work_t>
{
public:
inline CnrBaton(const IOclRunner &runner, uint64_t offset) :
runner(runner),
offset(offset)
{}
const IOclRunner &runner;
const uint64_t offset;
};
static CnrBuilder builder;
static std::mutex bg_mutex;
} // namespace xmrig
cl_program xmrig::OclCnR::get(const IOclRunner &runner, uint64_t height, bool background)
cl_program xmrig::OclCnR::get(const IOclRunner &runner, uint64_t height)
{
const uint64_t offset = (height / kHeightChunkSize) * kHeightChunkSize;
cl_program program = search(runner, offset);
if (offset + kHeightChunkSize - height == 1) {
auto baton = new CnrBaton(runner, offset + kHeightChunkSize);
uv_queue_work(uv_default_loop(), &baton->req,
[](uv_work_t *req) {
auto baton = static_cast<CnrBaton*>(req->data);
std::lock_guard<std::mutex> lock(bg_mutex);
builder.build(baton->runner, baton->offset);
},
[](uv_work_t *req, int) { delete static_cast<CnrBaton*>(req->data); }
);
}
cl_program program = cache.search(runner, offset);
if (program) {
return program;
}
std::string source(cryptonight_r_defines_cl);
for (size_t i = 0; i < kHeightChunkSize; ++i) {
V4_Instruction code[256];
const int code_size = v4_random_math_init<Algorithm::CN_R>(code, offset + i);
const std::string kernel = std::regex_replace(cryptonight_r_cl, std::regex("XMRIG_INCLUDE_RANDOM_MATH"), getCode(code, code_size));
source += std::regex_replace(kernel, std::regex("KERNEL_NAME"), "cn1_" + std::to_string(offset + i));
}
return build(runner, source, offset);;
return builder.build(runner, offset);;
}
void xmrig::OclCnR::clear()
{
std::lock_guard<std::mutex> lock(bg_mutex);
cache.clear();
}

View file

@ -42,10 +42,10 @@ class IOclRunner;
class OclCnR
{
public:
constexpr static size_t kPrecompilationDepth = 1;
constexpr static size_t kHeightChunkSize = 10;
constexpr static size_t kHeightChunkSize = 10;
static cl_program get(const IOclRunner &runner, uint64_t height, bool background = false);
static cl_program get(const IOclRunner &runner, uint64_t height);
static void clear();
};

View file

@ -37,7 +37,7 @@ xmrig::OclContext::OclContext(const OclDevice &device)
xmrig::OclContext::~OclContext()
{
if (m_ctx) {
OclLib::releaseContext(m_ctx);
OclLib::release(m_ctx);
}
}

View file

@ -39,7 +39,7 @@ xmrig::OclKernel::OclKernel(cl_program program, const char *name) :
xmrig::OclKernel::~OclKernel()
{
OclLib::releaseKernel(m_kernel);
OclLib::release(m_kernel);
}

View file

@ -375,7 +375,7 @@ cl_int xmrig::OclLib::getProgramInfo(cl_program program, cl_program_info param_n
}
cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue)
cl_int xmrig::OclLib::release(cl_command_queue command_queue)
{
assert(pReleaseCommandQueue != nullptr);
assert(pGetCommandQueueInfo != nullptr);
@ -391,7 +391,7 @@ cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue)
}
cl_int xmrig::OclLib::releaseContext(cl_context context)
cl_int xmrig::OclLib::release(cl_context context)
{
assert(pReleaseContext != nullptr);
@ -404,7 +404,7 @@ cl_int xmrig::OclLib::releaseContext(cl_context context)
}
cl_int xmrig::OclLib::releaseKernel(cl_kernel kernel)
cl_int xmrig::OclLib::release(cl_kernel kernel)
{
assert(pReleaseKernel != nullptr);
@ -421,7 +421,7 @@ cl_int xmrig::OclLib::releaseKernel(cl_kernel kernel)
}
cl_int xmrig::OclLib::releaseMemObject(cl_mem mem_obj)
cl_int xmrig::OclLib::release(cl_mem mem_obj)
{
assert(pReleaseMemObject != nullptr);
@ -438,7 +438,7 @@ cl_int xmrig::OclLib::releaseMemObject(cl_mem mem_obj)
}
cl_int xmrig::OclLib::releaseProgram(cl_program program)
cl_int xmrig::OclLib::release(cl_program program)
{
assert(pReleaseProgram != nullptr);
@ -550,6 +550,15 @@ cl_uint xmrig::OclLib::getNumPlatforms()
}
cl_uint xmrig::OclLib::getReferenceCount(cl_program program)
{
cl_uint out = 0;
OclLib::getProgramInfo(program, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &out);
return out;
}
cl_ulong xmrig::OclLib::getDeviceUlong(cl_device_id id, cl_device_info param, cl_ulong defaultValue)
{
OclLib::getDeviceInfo(id, param, sizeof(cl_ulong), &defaultValue);

View file

@ -60,11 +60,11 @@ public:
static cl_int getPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
static cl_int getProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret);
static cl_int getProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret = nullptr);
static cl_int releaseCommandQueue(cl_command_queue command_queue);
static cl_int releaseContext(cl_context context);
static cl_int releaseKernel(cl_kernel kernel);
static cl_int releaseMemObject(cl_mem mem_obj);
static cl_int releaseProgram(cl_program program);
static cl_int release(cl_command_queue command_queue);
static cl_int release(cl_context context);
static cl_int release(cl_kernel kernel);
static cl_int release(cl_mem mem_obj);
static cl_int release(cl_program program);
static cl_int setKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
static cl_kernel createKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret);
static cl_mem createBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret);
@ -72,6 +72,7 @@ public:
static cl_program createProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret);
static cl_uint getDeviceUint(cl_device_id id, cl_device_info param, cl_uint defaultValue = 0);
static cl_uint getNumPlatforms();
static cl_uint getReferenceCount(cl_program program);
static cl_ulong getDeviceUlong(cl_device_id id, cl_device_info param, cl_ulong defaultValue = 0);
static std::vector<cl_platform_id> getPlatformIDs();
static String getDeviceString(cl_device_id id, cl_device_info param);

View file

@ -31,7 +31,6 @@
xmrig::String::String(const char *str) :
m_data(nullptr),
m_size(str == nullptr ? 0 : strlen(str))
{
if (m_size == 0) {
@ -44,7 +43,6 @@ xmrig::String::String(const char *str) :
xmrig::String::String(const char *str, size_t size) :
m_data(nullptr),
m_size(size)
{
if (str == nullptr) {
@ -60,7 +58,6 @@ xmrig::String::String(const char *str, size_t size) :
xmrig::String::String(const String &other) :
m_data(nullptr),
m_size(other.m_size)
{
if (other.m_data == nullptr) {
@ -117,7 +114,7 @@ std::vector<xmrig::String> xmrig::String::split(char sep) const
for (pos = 0; pos < m_size; ++pos) {
if (m_data[pos] == sep) {
if ((pos - start) > 0) {
out.push_back(String(m_data + start, pos - start));
out.emplace_back(m_data + start, pos - start);
}
start = pos + 1;
@ -125,7 +122,7 @@ std::vector<xmrig::String> xmrig::String::split(char sep) const
}
if ((pos - start) > 0) {
out.push_back(String(m_data + start, pos - start));
out.emplace_back(m_data + start, pos - start);
}
return out;

View file

@ -46,9 +46,9 @@ namespace xmrig {
class String
{
public:
inline String() : m_data(nullptr), m_size(0) {}
inline String(char *str) : m_data(str), m_size(str == nullptr ? 0 : strlen(str)) {}
inline String(String &&other) : m_data(other.m_data), m_size(other.m_size) { other.m_data = nullptr; other.m_size = 0; }
inline String() = default;
inline String(char *str) : m_data(str), m_size(str == nullptr ? 0 : strlen(str)) {}
inline String(String &&other) noexcept : m_data(other.m_data), m_size(other.m_size) { other.m_data = nullptr; other.m_size = 0; }
String(const char *str);
String(const char *str, size_t size);
@ -81,7 +81,7 @@ public:
inline String &operator=(const char *str) { copy(str); return *this; }
inline String &operator=(const String &str) { copy(str); return *this; }
inline String &operator=(std::nullptr_t) { delete [] m_data; m_data = nullptr; m_size = 0; return *this; }
inline String &operator=(String &&other) { move(std::move(other)); return *this; }
inline String &operator=(String &&other) noexcept { move(std::move(other)); return *this; }
rapidjson::Value toJSON() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const;
@ -97,8 +97,8 @@ private:
void move(char *str);
void move(String &&other);
char *m_data;
size_t m_size;
char *m_data = nullptr;
size_t m_size = 0;
};

View file

@ -77,7 +77,7 @@ public:
ARGON2
};
inline Algorithm() {}
inline Algorithm() = default;
inline Algorithm(const char *algo) : m_id(parse(algo)) {}
inline Algorithm(Id id) : m_id(id) {}
@ -109,7 +109,7 @@ private:
};
typedef std::vector<Algorithm> Algorithms;
using Algorithms = std::vector<Algorithm>;
} /* namespace xmrig */

View file

@ -43,8 +43,6 @@
# include "crypto/common/VirtualMemory.h"
#endif
#include "base/tools/Buffer.h"
namespace xmrig {