Simplify code.

This commit is contained in:
XMRig 2019-09-16 23:53:39 +07:00
parent 2a107cc463
commit e8acb8a2a9
7 changed files with 58 additions and 87 deletions

View file

@ -40,7 +40,8 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
m_source(OclSource::get(data.algorithm)),
m_data(data),
m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)),
m_threadId(id)
m_threadId(id),
m_intensity(data.thread.intensity())
{
m_deviceKey = data.device.name();

View file

@ -83,6 +83,7 @@ protected:
size_t m_offset = 0;
std::string m_deviceKey;
std::string m_options;
uint32_t m_intensity;
};

View file

@ -83,12 +83,10 @@ xmrig::OclCnRunner::~OclCnRunner()
size_t xmrig::OclCnRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align(m_algorithm.l3() * g_thd) +
align(200 * g_thd) +
(align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX);
align(m_algorithm.l3() * m_intensity) +
align(200 * m_intensity) +
(align(sizeof(cl_uint) * (m_intensity + 2)) * BRANCH_MAX);
}
@ -96,14 +94,13 @@ void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
const size_t g_intensity = data().thread.intensity();
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
assert(g_thd % w_size == 0);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero);
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * m_intensity, sizeof(cl_uint), &zero);
}
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero);
@ -137,7 +134,7 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
m_height = job.height();
m_cnr = OclCnR::get(*this, m_height);
m_cn1 = new Cn1Kernel(m_cnr, m_height);
m_cn1->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity());
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
}
for (auto kernel : m_branchKernels) {
@ -150,22 +147,20 @@ void xmrig::OclCnRunner::build()
{
OclBaseRunner::build();
const uint32_t intensity = data().thread.intensity();
m_cn0 = new Cn0Kernel(m_program);
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
m_cn2 = new Cn2Kernel(m_program);
m_cn2->setArgs(m_scratchpads, m_states, m_branches, intensity);
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
if (m_algorithm != Algorithm::CN_R) {
m_cn1 = new Cn1Kernel(m_program);
m_cn1->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
}
for (size_t i = 0; i < BRANCH_MAX; ++i) {
auto kernel = new CnBranchKernel(i, m_program);
kernel->setArgs(m_states, m_branches[i], m_output, intensity);
kernel->setArgs(m_states, m_branches[i], m_output, m_intensity);
m_branchKernels[i] = kernel;
}
@ -176,12 +171,10 @@ void xmrig::OclCnRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * m_intensity);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2));
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (m_intensity + 2));
}
}

View file

@ -87,28 +87,26 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
const uint32_t g_intensity = data().thread.intensity();
m_blake2b_initial_hash->enqueue(m_queue, g_intensity);
m_fillAes1Rx4_scratchpad->enqueue(m_queue, g_intensity);
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity);
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
for (uint32_t i = 0; i < programCount; ++i) {
m_fillAes4Rx4_entropy->enqueue(m_queue, g_intensity);
m_fillAes4Rx4_entropy->enqueue(m_queue, m_intensity);
execute(i);
if (i == programCount - 1) {
m_hashAes1Rx4->enqueue(m_queue, g_intensity);
m_blake2b_hash_registers_32->enqueue(m_queue, g_intensity);
m_hashAes1Rx4->enqueue(m_queue, m_intensity);
m_blake2b_hash_registers_32->enqueue(m_queue, m_intensity);
}
else {
m_blake2b_hash_registers_64->enqueue(m_queue, g_intensity);
m_blake2b_hash_registers_64->enqueue(m_queue, m_intensity);
}
}
m_find_shares->enqueue(m_queue, g_intensity);
m_find_shares->enqueue(m_queue, m_intensity);
finalize(hashOutput);
@ -138,13 +136,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
size_t xmrig::OclRxBaseRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align((m_algorithm.l3() + 64) * g_thd) +
align(64 * g_thd) +
align((128 + 2560) * g_thd) +
align(sizeof(uint32_t) * g_thd);
align((m_algorithm.l3() + 64) * m_intensity) +
align(64 * m_intensity) +
align((128 + 2560) * m_intensity) +
align(sizeof(uint32_t) * m_intensity);
}
@ -152,14 +148,13 @@ void xmrig::OclRxBaseRunner::build()
{
OclBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
const uint32_t rx_version = RxAlgo::version(m_algorithm);
m_fillAes1Rx4_scratchpad = new FillAesKernel(m_program, "fillAes1Rx4_scratchpad");
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, batch_size, rx_version);
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, m_intensity, rx_version);
m_fillAes4Rx4_entropy = new FillAesKernel(m_program, "fillAes4Rx4_entropy");
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, batch_size, rx_version);
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, m_intensity, rx_version);
m_hashAes1Rx4 = new HashAesKernel(m_program);
@ -178,10 +173,8 @@ void xmrig::OclRxBaseRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd);
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd);
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd);
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * m_intensity);
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * m_intensity);
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * m_intensity);
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * m_intensity);
}

View file

@ -54,9 +54,7 @@ xmrig::OclRxJitRunner::~OclRxJitRunner()
size_t xmrig::OclRxJitRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd);
return OclRxBaseRunner::bufferSize() + align(256 * m_intensity) + align(5120 * m_intensity) + align(10048 * m_intensity);
}
@ -64,33 +62,29 @@ void xmrig::OclRxJitRunner::build()
{
OclRxBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, batch_size);
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, m_intensity);
m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256);
m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256);
m_randomx_jit = new RxJitKernel(m_program);
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, batch_size, m_rounding);
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, m_intensity, m_rounding);
if (!loadAsmProgram()) {
throw std::runtime_error(OclError::toString(CL_INVALID_PROGRAM));
}
m_randomx_run = new RxRunKernel(m_asmProgram);
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, batch_size, m_algorithm);
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm);
}
void xmrig::OclRxJitRunner::execute(uint32_t iteration)
{
const uint32_t g_intensity = data().thread.intensity();
m_randomx_jit->enqueue(m_queue, g_intensity, iteration);
m_randomx_jit->enqueue(m_queue, m_intensity, iteration);
OclLib::finish(m_queue);
m_randomx_run->enqueue(m_queue, g_intensity);
m_randomx_run->enqueue(m_queue, m_intensity);
}
@ -98,11 +92,9 @@ void xmrig::OclRxJitRunner::init()
{
OclRxBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd);
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd);
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd);
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * m_intensity);
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * m_intensity);
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * m_intensity);
}

View file

@ -52,7 +52,7 @@ xmrig::OclRxVmRunner::~OclRxVmRunner()
size_t xmrig::OclRxVmRunner::bufferSize() const
{
return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity()));
return OclRxBaseRunner::bufferSize() + (align(2560 * m_intensity));
}
@ -60,10 +60,9 @@ void xmrig::OclRxVmRunner::build()
{
OclRxBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8;
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, batch_size);
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, m_intensity);
m_blake2b_hash_registers_32->setArgs(m_hashes, m_vm_states, hashStrideBytes);
m_blake2b_hash_registers_64->setArgs(m_hashes, m_vm_states, hashStrideBytes);
@ -71,7 +70,7 @@ void xmrig::OclRxVmRunner::build()
m_init_vm->setArgs(m_entropy, m_vm_states, m_rounding);
m_execute_vm = new ExecuteVmKernel(m_program);
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), batch_size);
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), m_intensity);
}
@ -79,9 +78,8 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
{
const uint32_t bfactor = std::min(data().thread.bfactor(), 8u);
const uint32_t num_iterations = RxAlgo::programIterations(m_algorithm) >> bfactor;
const uint32_t g_intensity = data().thread.intensity();
m_init_vm->enqueue(m_queue, g_intensity, iteration);
m_init_vm->enqueue(m_queue, m_intensity, iteration);
m_execute_vm->setIterations(num_iterations);
@ -90,7 +88,7 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
m_execute_vm->setLast(1);
}
m_execute_vm->enqueue(m_queue, g_intensity, m_worksize);
m_execute_vm->enqueue(m_queue, m_intensity, m_worksize);
if (j == 0) {
m_execute_vm->setFirst(0);
@ -103,5 +101,5 @@ void xmrig::OclRxVmRunner::init()
{
OclRxBaseRunner::init();
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * m_intensity);
}

View file

@ -63,9 +63,7 @@ xmrig::OclRyoRunner::~OclRyoRunner()
size_t xmrig::OclRyoRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd);
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * m_intensity) + align(200 * m_intensity);
}
@ -73,9 +71,8 @@ void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
const size_t g_intensity = data().thread.intensity();
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
assert(g_thd % w_size == 0);
@ -109,19 +106,17 @@ void xmrig::OclRyoRunner::build()
{
OclBaseRunner::build();
const uint32_t intensity = data().thread.intensity();
m_cn00 = new Cn00RyoKernel(m_program);
m_cn00->setArgs(m_scratchpads, m_states);
m_cn0 = new Cn0Kernel(m_program);
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
m_cn1 = new Cn1RyoKernel(m_program);
m_cn1->setArgs(m_scratchpads, m_states, intensity);
m_cn1->setArgs(m_scratchpads, m_states, m_intensity);
m_cn2 = new Cn2RyoKernel(m_program);
m_cn2->setArgs(m_scratchpads, m_states, m_output, intensity);
m_cn2->setArgs(m_scratchpads, m_states, m_output, m_intensity);
}
@ -129,8 +124,6 @@ void xmrig::OclRyoRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * m_intensity);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
}