Implemented cn0 kernel launch.

This commit is contained in:
XMRig 2019-09-01 07:05:49 +07:00
parent ce3e2401cb
commit b541960611
15 changed files with 108 additions and 49 deletions

View file

@ -64,18 +64,18 @@ public:
}
inline void nextRound(uint32_t reserveCount)
inline void nextRound(uint32_t rounds, uint32_t roundSize)
{
m_rounds[index()]++;
if ((m_rounds[index()] % reserveCount) == 0) {
if ((m_rounds[index()] % rounds) == 0) {
for (size_t i = 0; i < N; ++i) {
*nonce(i) = Nonce::next(index(), *nonce(i), reserveCount, currentJob().isNicehash());
*nonce(i) = Nonce::next(index(), *nonce(i), rounds * roundSize, currentJob().isNicehash());
}
}
else {
for (size_t i = 0; i < N; ++i) {
*nonce(i) += 1;
*nonce(i) += roundSize;
}
}
}
@ -112,15 +112,15 @@ inline uint32_t *xmrig::WorkerJob<1>::nonce(size_t)
template<>
inline void xmrig::WorkerJob<1>::nextRound(uint32_t reserveCount)
inline void xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
{
m_rounds[index()]++;
if ((m_rounds[index()] % reserveCount) == 0) {
*nonce() = Nonce::next(index(), *nonce(), reserveCount, currentJob().isNicehash());
if ((m_rounds[index()] % rounds) == 0) {
*nonce() = Nonce::next(index(), *nonce(), rounds * roundSize, currentJob().isNicehash());
}
else {
*nonce() += 1;
*nonce() += roundSize;
}
}

View file

@ -46,7 +46,7 @@
namespace xmrig {
static constexpr uint32_t kReserveCount = 4096;
static constexpr uint32_t kReserveCount = 32768;
} // namespace xmrig
@ -212,7 +212,7 @@ void xmrig::CpuWorker<N>::start()
}
}
m_job.nextRound(kReserveCount);
m_job.nextRound(kReserveCount, 1);
m_count += N;
std::this_thread::yield();

View file

@ -42,7 +42,12 @@
namespace xmrig {
static constexpr uint32_t kReserveCount = 4096;
static constexpr uint32_t kReserveCount = 32768;
static inline uint32_t roundSize(uint32_t intensity) { return kReserveCount / intensity + 1; }
} // namespace xmrig
@ -51,7 +56,8 @@ static constexpr uint32_t kReserveCount = 4096;
xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
Worker(id, data.thread.affinity(), -1),
m_algorithm(data.algorithm),
m_miner(data.miner)
m_miner(data.miner),
m_intensity(data.thread.intensity())
{
switch (m_algorithm.family()) {
case Algorithm::RANDOM_X:
@ -108,20 +114,14 @@ void xmrig::OclWorker::start()
}
while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) {
if ((m_count & 0x7) == 0) {
storeStats();
if (!m_runner->run(*m_job.nonce(), results)) {
return;
}
const Job &job = m_job.currentJob();
if (job.algorithm().l3() != m_algorithm.l3()) {
break;
}
m_runner->run(results);
std::this_thread::sleep_for(std::chrono::milliseconds(2000)); // FIXME
m_job.nextRound(kReserveCount);
m_job.nextRound(roundSize(m_intensity), m_intensity);
m_count += m_intensity;
std::this_thread::yield();
}
@ -137,6 +137,6 @@ void xmrig::OclWorker::consumeJob()
return;
}
m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), kReserveCount);
m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), roundSize(m_intensity) * m_intensity);
m_runner->set(m_job.currentJob(), m_job.blob());
}

View file

@ -54,6 +54,7 @@ private:
const Algorithm m_algorithm;
const Miner *m_miner;
const uint32_t m_intensity;
IOclRunner *m_runner = nullptr;
WorkerJob<1> m_job;
};

View file

@ -41,6 +41,7 @@ class IOclRunner
public:
virtual ~IOclRunner() = default;
virtual bool run(uint32_t nonce, uint32_t *hashOutput) = 0;
virtual bool selfTest() const = 0;
virtual bool set(const Job &job, uint8_t *blob) = 0;
virtual const char *buildOptions() const = 0;
@ -49,7 +50,6 @@ public:
virtual const OclLaunchData &data() const = 0;
virtual size_t threadId() const = 0;
virtual void build() = 0;
virtual void run(uint32_t *hashOutput) = 0;
protected:
virtual bool isReadyToBuild() const = 0;

View file

@ -32,6 +32,16 @@ xmrig::Cn0Kernel::Cn0Kernel(cl_program program) : OclKernel(program, "cn0")
}
bool xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
{
const size_t offset[2] = { nonce, 1 };
const size_t gthreads[2] = { threads, 8 };
static const size_t lthreads[2] = { 8, 8 };
return enqueueNDRange(queue, 2, offset, gthreads, lthreads);
}
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
{

View file

@ -29,9 +29,6 @@
#include "backend/opencl/wrappers/OclKernel.h"
typedef struct _cl_mem *cl_mem;
namespace xmrig {
@ -39,6 +36,7 @@ class Cn0Kernel : public OclKernel
{
public:
Cn0Kernel(cl_program program);
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
};

View file

@ -93,9 +93,3 @@ void xmrig::OclBaseRunner::build()
m_program = OclCache::build(this);
}
void xmrig::OclBaseRunner::run(uint32_t *hashOutput)
{
}

View file

@ -56,7 +56,6 @@ protected:
bool isReadyToBuild() const override;
bool selfTest() const override;
void build() override;
void run(uint32_t *hashOutput) override;
protected:
Algorithm m_algorithm;

View file

@ -110,6 +110,37 @@ bool xmrig::OclCnRunner::isReadyToBuild() const
}
bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
cl_int ret;
size_t branchNonces[4] = { 0 };
const size_t g_intensity = data().thread.intensity();
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
for (size_t i = 0; i < BRANCH_MAX; ++i) {
if (OclLib::enqueueWriteBuffer(m_queue, m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
return false;
}
}
if (OclLib::enqueueWriteBuffer(m_queue, m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
return false;
}
if (!m_cn0->enqueue(m_queue, nonce, g_thd)) {
return false;
}
OclLib::finish(m_queue);
return true;
}
bool xmrig::OclCnRunner::selfTest() const
{
return OclBaseRunner::selfTest() && m_cn0->isValid();
@ -133,8 +164,7 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
return false;
}
LOG_WARN(GREEN_S "OK");
return false;
return true;
}

View file

@ -43,6 +43,7 @@ public:
protected:
bool isReadyToBuild() const override;
bool run(uint32_t nonce, uint32_t *hashOutput) override;
bool selfTest() const override;
bool set(const Job &job, uint8_t *blob) override;
void build() override;

View file

@ -30,6 +30,12 @@ xmrig::OclRxRunner::OclRxRunner(size_t index, const OclLaunchData &data) : OclBa
}
bool xmrig::OclRxRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
return false;
}
bool xmrig::OclRxRunner::selfTest() const
{
return false; // TODO

View file

@ -38,6 +38,7 @@ public:
OclRxRunner(size_t index, const OclLaunchData &data);
protected:
bool run(uint32_t nonce, uint32_t *hashOutput) override;
bool selfTest() const override;
bool set(const Job &job, uint8_t *blob) override;
};

View file

@ -43,6 +43,22 @@ xmrig::OclKernel::~OclKernel()
}
bool xmrig::OclKernel::enqueueNDRange(cl_command_queue queue, uint32_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size)
{
if (!isValid()) {
return false;
}
const cl_int ret = OclLib::enqueueNDRangeKernel(queue, m_kernel, work_dim, global_work_offset, global_work_size, local_work_size, 0, nullptr, nullptr);
if (ret != CL_SUCCESS) {
LOG_ERR(MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ") RED(" error ") RED_BOLD("%s") RED(" when calling ") RED_BOLD("clEnqueueNDRangeKernel") RED(" for kernel ") RED_BOLD("%s"),
OclError::toString(ret), name().data());
}
return ret == CL_SUCCESS;
}
bool xmrig::OclKernel::setArg(uint32_t index, size_t size, const void *value)
{
if (!isValid()) {

View file

@ -29,7 +29,9 @@
#include "base/tools/String.h"
typedef struct _cl_command_queue *cl_command_queue;
typedef struct _cl_kernel *cl_kernel;
typedef struct _cl_mem *cl_mem;
typedef struct _cl_program *cl_program;
@ -46,6 +48,7 @@ public:
inline cl_kernel kernel() const { return m_kernel; }
inline const String &name() const { return m_name; }
bool enqueueNDRange(cl_command_queue queue, uint32_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size);
bool setArg(uint32_t index, size_t size, const void *value);
private: