mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 10:01:06 +00:00
Simplify code.
This commit is contained in:
parent
2a107cc463
commit
e8acb8a2a9
7 changed files with 58 additions and 87 deletions
|
@ -40,7 +40,8 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
|
|||
m_source(OclSource::get(data.algorithm)),
|
||||
m_data(data),
|
||||
m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)),
|
||||
m_threadId(id)
|
||||
m_threadId(id),
|
||||
m_intensity(data.thread.intensity())
|
||||
{
|
||||
m_deviceKey = data.device.name();
|
||||
|
||||
|
|
|
@ -83,6 +83,7 @@ protected:
|
|||
size_t m_offset = 0;
|
||||
std::string m_deviceKey;
|
||||
std::string m_options;
|
||||
uint32_t m_intensity;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -83,12 +83,10 @@ xmrig::OclCnRunner::~OclCnRunner()
|
|||
|
||||
size_t xmrig::OclCnRunner::bufferSize() const
|
||||
{
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
return OclBaseRunner::bufferSize() +
|
||||
align(m_algorithm.l3() * g_thd) +
|
||||
align(200 * g_thd) +
|
||||
(align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX);
|
||||
align(m_algorithm.l3() * m_intensity) +
|
||||
align(200 * m_intensity) +
|
||||
(align(sizeof(cl_uint) * (m_intensity + 2)) * BRANCH_MAX);
|
||||
}
|
||||
|
||||
|
||||
|
@ -96,14 +94,13 @@ void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
{
|
||||
static const cl_uint zero = 0;
|
||||
|
||||
const size_t g_intensity = data().thread.intensity();
|
||||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
|
||||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
|
||||
|
||||
assert(g_thd % w_size == 0);
|
||||
|
||||
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
||||
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero);
|
||||
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * m_intensity, sizeof(cl_uint), &zero);
|
||||
}
|
||||
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero);
|
||||
|
@ -137,7 +134,7 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
|||
m_height = job.height();
|
||||
m_cnr = OclCnR::get(*this, m_height);
|
||||
m_cn1 = new Cn1Kernel(m_cnr, m_height);
|
||||
m_cn1->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity());
|
||||
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
}
|
||||
|
||||
for (auto kernel : m_branchKernels) {
|
||||
|
@ -150,22 +147,20 @@ void xmrig::OclCnRunner::build()
|
|||
{
|
||||
OclBaseRunner::build();
|
||||
|
||||
const uint32_t intensity = data().thread.intensity();
|
||||
|
||||
m_cn0 = new Cn0Kernel(m_program);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn2 = new Cn2Kernel(m_program);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_branches, intensity);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
|
||||
|
||||
if (m_algorithm != Algorithm::CN_R) {
|
||||
m_cn1 = new Cn1Kernel(m_program);
|
||||
m_cn1->setArgs(m_input, m_scratchpads, m_states, intensity);
|
||||
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
||||
auto kernel = new CnBranchKernel(i, m_program);
|
||||
kernel->setArgs(m_states, m_branches[i], m_output, intensity);
|
||||
kernel->setArgs(m_states, m_branches[i], m_output, m_intensity);
|
||||
|
||||
m_branchKernels[i] = kernel;
|
||||
}
|
||||
|
@ -176,12 +171,10 @@ void xmrig::OclCnRunner::init()
|
|||
{
|
||||
OclBaseRunner::init();
|
||||
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd);
|
||||
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * m_intensity);
|
||||
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
|
||||
|
||||
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
||||
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2));
|
||||
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (m_intensity + 2));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,28 +87,26 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
|
||||
|
||||
const uint32_t g_intensity = data().thread.intensity();
|
||||
|
||||
m_blake2b_initial_hash->enqueue(m_queue, g_intensity);
|
||||
m_fillAes1Rx4_scratchpad->enqueue(m_queue, g_intensity);
|
||||
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
|
||||
m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity);
|
||||
|
||||
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
|
||||
|
||||
for (uint32_t i = 0; i < programCount; ++i) {
|
||||
m_fillAes4Rx4_entropy->enqueue(m_queue, g_intensity);
|
||||
m_fillAes4Rx4_entropy->enqueue(m_queue, m_intensity);
|
||||
|
||||
execute(i);
|
||||
|
||||
if (i == programCount - 1) {
|
||||
m_hashAes1Rx4->enqueue(m_queue, g_intensity);
|
||||
m_blake2b_hash_registers_32->enqueue(m_queue, g_intensity);
|
||||
m_hashAes1Rx4->enqueue(m_queue, m_intensity);
|
||||
m_blake2b_hash_registers_32->enqueue(m_queue, m_intensity);
|
||||
}
|
||||
else {
|
||||
m_blake2b_hash_registers_64->enqueue(m_queue, g_intensity);
|
||||
m_blake2b_hash_registers_64->enqueue(m_queue, m_intensity);
|
||||
}
|
||||
}
|
||||
|
||||
m_find_shares->enqueue(m_queue, g_intensity);
|
||||
m_find_shares->enqueue(m_queue, m_intensity);
|
||||
|
||||
finalize(hashOutput);
|
||||
|
||||
|
@ -138,13 +136,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
|
|||
|
||||
size_t xmrig::OclRxBaseRunner::bufferSize() const
|
||||
{
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
return OclBaseRunner::bufferSize() +
|
||||
align((m_algorithm.l3() + 64) * g_thd) +
|
||||
align(64 * g_thd) +
|
||||
align((128 + 2560) * g_thd) +
|
||||
align(sizeof(uint32_t) * g_thd);
|
||||
align((m_algorithm.l3() + 64) * m_intensity) +
|
||||
align(64 * m_intensity) +
|
||||
align((128 + 2560) * m_intensity) +
|
||||
align(sizeof(uint32_t) * m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -152,14 +148,13 @@ void xmrig::OclRxBaseRunner::build()
|
|||
{
|
||||
OclBaseRunner::build();
|
||||
|
||||
const uint32_t batch_size = data().thread.intensity();
|
||||
const uint32_t rx_version = RxAlgo::version(m_algorithm);
|
||||
|
||||
m_fillAes1Rx4_scratchpad = new FillAesKernel(m_program, "fillAes1Rx4_scratchpad");
|
||||
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, batch_size, rx_version);
|
||||
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, m_intensity, rx_version);
|
||||
|
||||
m_fillAes4Rx4_entropy = new FillAesKernel(m_program, "fillAes4Rx4_entropy");
|
||||
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, batch_size, rx_version);
|
||||
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, m_intensity, rx_version);
|
||||
|
||||
m_hashAes1Rx4 = new HashAesKernel(m_program);
|
||||
|
||||
|
@ -178,10 +173,8 @@ void xmrig::OclRxBaseRunner::init()
|
|||
{
|
||||
OclBaseRunner::init();
|
||||
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd);
|
||||
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd);
|
||||
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd);
|
||||
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd);
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * m_intensity);
|
||||
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * m_intensity);
|
||||
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * m_intensity);
|
||||
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * m_intensity);
|
||||
}
|
||||
|
|
|
@ -54,9 +54,7 @@ xmrig::OclRxJitRunner::~OclRxJitRunner()
|
|||
|
||||
size_t xmrig::OclRxJitRunner::bufferSize() const
|
||||
{
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd);
|
||||
return OclRxBaseRunner::bufferSize() + align(256 * m_intensity) + align(5120 * m_intensity) + align(10048 * m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,33 +62,29 @@ void xmrig::OclRxJitRunner::build()
|
|||
{
|
||||
OclRxBaseRunner::build();
|
||||
|
||||
const uint32_t batch_size = data().thread.intensity();
|
||||
|
||||
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, batch_size);
|
||||
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, m_intensity);
|
||||
m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256);
|
||||
m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256);
|
||||
|
||||
m_randomx_jit = new RxJitKernel(m_program);
|
||||
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, batch_size, m_rounding);
|
||||
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, m_intensity, m_rounding);
|
||||
|
||||
if (!loadAsmProgram()) {
|
||||
throw std::runtime_error(OclError::toString(CL_INVALID_PROGRAM));
|
||||
}
|
||||
|
||||
m_randomx_run = new RxRunKernel(m_asmProgram);
|
||||
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, batch_size, m_algorithm);
|
||||
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclRxJitRunner::execute(uint32_t iteration)
|
||||
{
|
||||
const uint32_t g_intensity = data().thread.intensity();
|
||||
|
||||
m_randomx_jit->enqueue(m_queue, g_intensity, iteration);
|
||||
m_randomx_jit->enqueue(m_queue, m_intensity, iteration);
|
||||
|
||||
OclLib::finish(m_queue);
|
||||
|
||||
m_randomx_run->enqueue(m_queue, g_intensity);
|
||||
m_randomx_run->enqueue(m_queue, m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -98,11 +92,9 @@ void xmrig::OclRxJitRunner::init()
|
|||
{
|
||||
OclRxBaseRunner::init();
|
||||
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd);
|
||||
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd);
|
||||
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd);
|
||||
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * m_intensity);
|
||||
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * m_intensity);
|
||||
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ xmrig::OclRxVmRunner::~OclRxVmRunner()
|
|||
|
||||
size_t xmrig::OclRxVmRunner::bufferSize() const
|
||||
{
|
||||
return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity()));
|
||||
return OclRxBaseRunner::bufferSize() + (align(2560 * m_intensity));
|
||||
}
|
||||
|
||||
|
||||
|
@ -60,10 +60,9 @@ void xmrig::OclRxVmRunner::build()
|
|||
{
|
||||
OclRxBaseRunner::build();
|
||||
|
||||
const uint32_t batch_size = data().thread.intensity();
|
||||
const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8;
|
||||
const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8;
|
||||
|
||||
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, batch_size);
|
||||
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, m_intensity);
|
||||
m_blake2b_hash_registers_32->setArgs(m_hashes, m_vm_states, hashStrideBytes);
|
||||
m_blake2b_hash_registers_64->setArgs(m_hashes, m_vm_states, hashStrideBytes);
|
||||
|
||||
|
@ -71,7 +70,7 @@ void xmrig::OclRxVmRunner::build()
|
|||
m_init_vm->setArgs(m_entropy, m_vm_states, m_rounding);
|
||||
|
||||
m_execute_vm = new ExecuteVmKernel(m_program);
|
||||
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), batch_size);
|
||||
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -79,9 +78,8 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
|
|||
{
|
||||
const uint32_t bfactor = std::min(data().thread.bfactor(), 8u);
|
||||
const uint32_t num_iterations = RxAlgo::programIterations(m_algorithm) >> bfactor;
|
||||
const uint32_t g_intensity = data().thread.intensity();
|
||||
|
||||
m_init_vm->enqueue(m_queue, g_intensity, iteration);
|
||||
m_init_vm->enqueue(m_queue, m_intensity, iteration);
|
||||
|
||||
m_execute_vm->setIterations(num_iterations);
|
||||
|
||||
|
@ -90,7 +88,7 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
|
|||
m_execute_vm->setLast(1);
|
||||
}
|
||||
|
||||
m_execute_vm->enqueue(m_queue, g_intensity, m_worksize);
|
||||
m_execute_vm->enqueue(m_queue, m_intensity, m_worksize);
|
||||
|
||||
if (j == 0) {
|
||||
m_execute_vm->setFirst(0);
|
||||
|
@ -103,5 +101,5 @@ void xmrig::OclRxVmRunner::init()
|
|||
{
|
||||
OclRxBaseRunner::init();
|
||||
|
||||
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
|
||||
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * m_intensity);
|
||||
}
|
||||
|
|
|
@ -63,9 +63,7 @@ xmrig::OclRyoRunner::~OclRyoRunner()
|
|||
|
||||
size_t xmrig::OclRyoRunner::bufferSize() const
|
||||
{
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd);
|
||||
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * m_intensity) + align(200 * m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -73,9 +71,8 @@ void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
{
|
||||
static const cl_uint zero = 0;
|
||||
|
||||
const size_t g_intensity = data().thread.intensity();
|
||||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
|
||||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
|
||||
|
||||
assert(g_thd % w_size == 0);
|
||||
|
||||
|
@ -109,19 +106,17 @@ void xmrig::OclRyoRunner::build()
|
|||
{
|
||||
OclBaseRunner::build();
|
||||
|
||||
const uint32_t intensity = data().thread.intensity();
|
||||
|
||||
m_cn00 = new Cn00RyoKernel(m_program);
|
||||
m_cn00->setArgs(m_scratchpads, m_states);
|
||||
|
||||
m_cn0 = new Cn0Kernel(m_program);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn1 = new Cn1RyoKernel(m_program);
|
||||
m_cn1->setArgs(m_scratchpads, m_states, intensity);
|
||||
m_cn1->setArgs(m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn2 = new Cn2RyoKernel(m_program);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_output, intensity);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_output, m_intensity);
|
||||
}
|
||||
|
||||
|
||||
|
@ -129,8 +124,6 @@ void xmrig::OclRyoRunner::init()
|
|||
{
|
||||
OclBaseRunner::init();
|
||||
|
||||
const size_t g_thd = data().thread.intensity();
|
||||
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd);
|
||||
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
|
||||
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * m_intensity);
|
||||
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue