Implemented cn1 kernel launch.

This commit is contained in:
XMRig 2019-09-01 09:34:37 +07:00
parent fdaa0b7ba1
commit 138304ff51
9 changed files with 1062 additions and 981 deletions

View file

@ -74,7 +74,7 @@ inline ulong getIdx()
__attribute__((reqd_work_group_size(8, 8, 1)))
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
{
uint ExpandedKey1[40];
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
@ -94,10 +94,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
__local ulong State_buf[8 * 25];
# if (COMP_MODE == 1)
// do not use early return here
if (gIdx < Threads)
# endif
{
states += 25 * gIdx;
@ -154,10 +150,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
barrier(CLK_GLOBAL_MEM_FENCE);
# if (COMP_MODE == 1)
// do not use early return here
if (gIdx < Threads)
# endif
{
text = vload4(get_local_id(1) + 4, (__global uint *)(states));
@ -198,10 +190,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
}
# endif
# if (COMP_MODE == 1)
// do not use early return here
if (gIdx < Threads)
# endif
{
const uint local_id1 = get_local_id(1);
#pragma unroll 2
@ -488,7 +476,7 @@ __kernel void cn1_v2(__global uint4 *Scratchpad, __global ulong *states, uint va
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
__kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
{
ulong a[2], b[2];
__local uint AES0[256], AES1[256];
@ -504,10 +492,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
barrier(CLK_LOCAL_MEM_FENCE);
uint4 b_x;
# if (COMP_MODE == 1)
// do not use early return here
if (gIdx < Threads)
# endif
{
states += 25 * gIdx;
# if (STRIDED_INDEX == 0)
@ -532,10 +517,6 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
mem_fence(CLK_LOCAL_MEM_FENCE);
# if (COMP_MODE == 1)
// do not use early return here
if (gIdx < Threads)
# endif
{
uint idx0 = a[0];
@ -576,6 +557,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
# endif
}
}
mem_fence(CLK_GLOBAL_MEM_FENCE);
}

File diff suppressed because it is too large Load diff

View file

@ -42,11 +42,10 @@ bool xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t th
}
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
{
return setArg(0, sizeof(cl_mem), &input) &&
setArg(1, sizeof(cl_mem), &scratchpads) &&
setArg(2, sizeof(cl_mem), &states) &&
setArg(3, sizeof(uint32_t), &threads);
setArg(2, sizeof(cl_mem), &states);
}

View file

@ -37,7 +37,7 @@ class Cn0Kernel : public OclKernel
public:
Cn0Kernel(cl_program program);
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
};

View file

@ -0,0 +1,51 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/kernels/Cn1Kernel.h"
#include "backend/opencl/wrappers/OclLib.h"
xmrig::Cn1Kernel::Cn1Kernel(cl_program program) : OclKernel(program, "cn1")
{
}
bool xmrig::Cn1Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
{
const size_t offset = nonce;
const size_t gthreads = threads;
const size_t lthreads = worksize;
return enqueueNDRange(queue, 1, &offset, &gthreads, &lthreads);
}
// __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
bool xmrig::Cn1Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
{
return setArg(0, sizeof(cl_mem), &input) &&
setArg(1, sizeof(cl_mem), &scratchpads) &&
setArg(2, sizeof(cl_mem), &states);
}

View file

@ -0,0 +1,47 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CN1KERNEL_H
#define XMRIG_CN1KERNEL_H
#include "backend/opencl/wrappers/OclKernel.h"
namespace xmrig {
class Cn1Kernel : public OclKernel
{
public:
Cn1Kernel(cl_program program);
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
};
} // namespace xmrig
#endif /* XMRIG_CN1KERNEL_H */

View file

@ -7,6 +7,7 @@ if (WITH_OPENCL)
src/backend/opencl/cl/OclSource.h
src/backend/opencl/interfaces/IOclRunner.h
src/backend/opencl/kernels/Cn0Kernel.h
src/backend/opencl/kernels/Cn1Kernel.h
src/backend/opencl/OclBackend.h
src/backend/opencl/OclCache.h
src/backend/opencl/OclConfig.h
@ -28,6 +29,7 @@ if (WITH_OPENCL)
set(SOURCES_BACKEND_OPENCL
src/backend/opencl/cl/OclSource.cpp
src/backend/opencl/kernels/Cn0Kernel.cpp
src/backend/opencl/kernels/Cn1Kernel.cpp
src/backend/opencl/OclBackend.cpp
src/backend/opencl/OclCache.cpp
src/backend/opencl/OclConfig.cpp

View file

@ -24,6 +24,7 @@
#include "backend/opencl/kernels/Cn0Kernel.h"
#include "backend/opencl/kernels/Cn1Kernel.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/runners/OclCnRunner.h"
#include "backend/opencl/wrappers/OclLib.h"
@ -121,6 +122,8 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
assert(g_thd % w_size == 0);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
if (OclLib::enqueueWriteBuffer(m_queue, m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
return false;
@ -135,6 +138,10 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
return false;
}
if (!m_cn1->enqueue(m_queue, nonce, g_thd, w_size)) {
return false;
}
OclLib::finish(m_queue);
return true;
@ -143,7 +150,9 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
bool xmrig::OclCnRunner::selfTest() const
{
return OclBaseRunner::selfTest() && m_cn0->isValid();
return OclBaseRunner::selfTest() &&
m_cn0->isValid() &&
m_cn1->isValid();
}
@ -160,7 +169,11 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
return false;
}
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity())) {
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states)) {
return false;
}
if (!m_cn1->setArgs(m_input, m_scratchpads, m_states)) {
return false;
}
@ -177,4 +190,5 @@ void xmrig::OclCnRunner::build()
}
m_cn0 = new Cn0Kernel(m_program);
m_cn1 = new Cn1Kernel(m_program);
}

View file

@ -33,6 +33,7 @@ namespace xmrig {
class Cn0Kernel;
class Cn1Kernel;
class OclCnRunner : public OclBaseRunner
@ -62,6 +63,7 @@ private:
cl_mem m_scratchpads = nullptr;
cl_mem m_states = nullptr;
Cn0Kernel *m_cn0 = nullptr;
Cn1Kernel *m_cn1 = nullptr;
};