mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-11 05:14:40 +00:00
Implemented cn1 kernel launch.
This commit is contained in:
parent
fdaa0b7ba1
commit
138304ff51
9 changed files with 1062 additions and 981 deletions
|
@ -74,7 +74,7 @@ inline ulong getIdx()
|
|||
|
||||
|
||||
__attribute__((reqd_work_group_size(8, 8, 1)))
|
||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||
{
|
||||
uint ExpandedKey1[40];
|
||||
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
||||
|
@ -94,10 +94,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
|
||||
__local ulong State_buf[8 * 25];
|
||||
|
||||
# if (COMP_MODE == 1)
|
||||
// do not use early return here
|
||||
if (gIdx < Threads)
|
||||
# endif
|
||||
{
|
||||
states += 25 * gIdx;
|
||||
|
||||
|
@ -154,10 +150,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
# if (COMP_MODE == 1)
|
||||
// do not use early return here
|
||||
if (gIdx < Threads)
|
||||
# endif
|
||||
{
|
||||
text = vload4(get_local_id(1) + 4, (__global uint *)(states));
|
||||
|
||||
|
@ -198,10 +190,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
}
|
||||
# endif
|
||||
|
||||
# if (COMP_MODE == 1)
|
||||
// do not use early return here
|
||||
if (gIdx < Threads)
|
||||
# endif
|
||||
{
|
||||
const uint local_id1 = get_local_id(1);
|
||||
#pragma unroll 2
|
||||
|
@ -488,7 +476,7 @@ __kernel void cn1_v2(__global uint4 *Scratchpad, __global ulong *states, uint va
|
|||
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
|
||||
__kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||
{
|
||||
ulong a[2], b[2];
|
||||
__local uint AES0[256], AES1[256];
|
||||
|
@ -504,10 +492,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
|||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint4 b_x;
|
||||
# if (COMP_MODE == 1)
|
||||
// do not use early return here
|
||||
if (gIdx < Threads)
|
||||
# endif
|
||||
|
||||
{
|
||||
states += 25 * gIdx;
|
||||
# if (STRIDED_INDEX == 0)
|
||||
|
@ -532,10 +517,6 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
|||
|
||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
# if (COMP_MODE == 1)
|
||||
// do not use early return here
|
||||
if (gIdx < Threads)
|
||||
# endif
|
||||
{
|
||||
uint idx0 = a[0];
|
||||
|
||||
|
@ -576,6 +557,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
|||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -42,11 +42,10 @@ bool xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t th
|
|||
}
|
||||
|
||||
|
||||
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
||||
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
|
||||
{
|
||||
return setArg(0, sizeof(cl_mem), &input) &&
|
||||
setArg(1, sizeof(cl_mem), &scratchpads) &&
|
||||
setArg(2, sizeof(cl_mem), &states) &&
|
||||
setArg(3, sizeof(uint32_t), &threads);
|
||||
setArg(2, sizeof(cl_mem), &states);
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ class Cn0Kernel : public OclKernel
|
|||
public:
|
||||
Cn0Kernel(cl_program program);
|
||||
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
||||
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
||||
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
|
||||
};
|
||||
|
||||
|
||||
|
|
51
src/backend/opencl/kernels/Cn1Kernel.cpp
Normal file
51
src/backend/opencl/kernels/Cn1Kernel.cpp
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/opencl/kernels/Cn1Kernel.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
|
||||
|
||||
xmrig::Cn1Kernel::Cn1Kernel(cl_program program) : OclKernel(program, "cn1")
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
bool xmrig::Cn1Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
|
||||
{
|
||||
const size_t offset = nonce;
|
||||
const size_t gthreads = threads;
|
||||
const size_t lthreads = worksize;
|
||||
|
||||
return enqueueNDRange(queue, 1, &offset, >hreads, <hreads);
|
||||
}
|
||||
|
||||
|
||||
// __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||
bool xmrig::Cn1Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
|
||||
{
|
||||
return setArg(0, sizeof(cl_mem), &input) &&
|
||||
setArg(1, sizeof(cl_mem), &scratchpads) &&
|
||||
setArg(2, sizeof(cl_mem), &states);
|
||||
}
|
47
src/backend/opencl/kernels/Cn1Kernel.h
Normal file
47
src/backend/opencl/kernels/Cn1Kernel.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CN1KERNEL_H
|
||||
#define XMRIG_CN1KERNEL_H
|
||||
|
||||
|
||||
#include "backend/opencl/wrappers/OclKernel.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class Cn1Kernel : public OclKernel
|
||||
{
|
||||
public:
|
||||
Cn1Kernel(cl_program program);
|
||||
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
|
||||
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
|
||||
};
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CN1KERNEL_H */
|
|
@ -7,6 +7,7 @@ if (WITH_OPENCL)
|
|||
src/backend/opencl/cl/OclSource.h
|
||||
src/backend/opencl/interfaces/IOclRunner.h
|
||||
src/backend/opencl/kernels/Cn0Kernel.h
|
||||
src/backend/opencl/kernels/Cn1Kernel.h
|
||||
src/backend/opencl/OclBackend.h
|
||||
src/backend/opencl/OclCache.h
|
||||
src/backend/opencl/OclConfig.h
|
||||
|
@ -28,6 +29,7 @@ if (WITH_OPENCL)
|
|||
set(SOURCES_BACKEND_OPENCL
|
||||
src/backend/opencl/cl/OclSource.cpp
|
||||
src/backend/opencl/kernels/Cn0Kernel.cpp
|
||||
src/backend/opencl/kernels/Cn1Kernel.cpp
|
||||
src/backend/opencl/OclBackend.cpp
|
||||
src/backend/opencl/OclCache.cpp
|
||||
src/backend/opencl/OclConfig.cpp
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
|
||||
#include "backend/opencl/kernels/Cn0Kernel.h"
|
||||
#include "backend/opencl/kernels/Cn1Kernel.h"
|
||||
#include "backend/opencl/OclLaunchData.h"
|
||||
#include "backend/opencl/runners/OclCnRunner.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
|
@ -121,6 +122,8 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
const size_t w_size = data().thread.worksize();
|
||||
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
|
||||
|
||||
assert(g_thd % w_size == 0);
|
||||
|
||||
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
||||
if (OclLib::enqueueWriteBuffer(m_queue, m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
|
||||
return false;
|
||||
|
@ -135,6 +138,10 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!m_cn1->enqueue(m_queue, nonce, g_thd, w_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OclLib::finish(m_queue);
|
||||
|
||||
return true;
|
||||
|
@ -143,7 +150,9 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
|
||||
bool xmrig::OclCnRunner::selfTest() const
|
||||
{
|
||||
return OclBaseRunner::selfTest() && m_cn0->isValid();
|
||||
return OclBaseRunner::selfTest() &&
|
||||
m_cn0->isValid() &&
|
||||
m_cn1->isValid();
|
||||
}
|
||||
|
||||
|
||||
|
@ -160,7 +169,11 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity())) {
|
||||
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_cn1->setArgs(m_input, m_scratchpads, m_states)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -177,4 +190,5 @@ void xmrig::OclCnRunner::build()
|
|||
}
|
||||
|
||||
m_cn0 = new Cn0Kernel(m_program);
|
||||
m_cn1 = new Cn1Kernel(m_program);
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ namespace xmrig {
|
|||
|
||||
|
||||
class Cn0Kernel;
|
||||
class Cn1Kernel;
|
||||
|
||||
|
||||
class OclCnRunner : public OclBaseRunner
|
||||
|
@ -62,6 +63,7 @@ private:
|
|||
cl_mem m_scratchpads = nullptr;
|
||||
cl_mem m_states = nullptr;
|
||||
Cn0Kernel *m_cn0 = nullptr;
|
||||
Cn1Kernel *m_cn1 = nullptr;
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue