mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-11 05:14:40 +00:00
Implemented cn1 kernel launch.
This commit is contained in:
parent
fdaa0b7ba1
commit
138304ff51
9 changed files with 1062 additions and 981 deletions
|
@ -74,7 +74,7 @@ inline ulong getIdx()
|
||||||
|
|
||||||
|
|
||||||
__attribute__((reqd_work_group_size(8, 8, 1)))
|
__attribute__((reqd_work_group_size(8, 8, 1)))
|
||||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||||
{
|
{
|
||||||
uint ExpandedKey1[40];
|
uint ExpandedKey1[40];
|
||||||
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
||||||
|
@ -94,10 +94,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
||||||
|
|
||||||
__local ulong State_buf[8 * 25];
|
__local ulong State_buf[8 * 25];
|
||||||
|
|
||||||
# if (COMP_MODE == 1)
|
|
||||||
// do not use early return here
|
|
||||||
if (gIdx < Threads)
|
|
||||||
# endif
|
|
||||||
{
|
{
|
||||||
states += 25 * gIdx;
|
states += 25 * gIdx;
|
||||||
|
|
||||||
|
@ -154,10 +150,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
||||||
|
|
||||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||||
|
|
||||||
# if (COMP_MODE == 1)
|
|
||||||
// do not use early return here
|
|
||||||
if (gIdx < Threads)
|
|
||||||
# endif
|
|
||||||
{
|
{
|
||||||
text = vload4(get_local_id(1) + 4, (__global uint *)(states));
|
text = vload4(get_local_id(1) + 4, (__global uint *)(states));
|
||||||
|
|
||||||
|
@ -198,10 +190,6 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if (COMP_MODE == 1)
|
|
||||||
// do not use early return here
|
|
||||||
if (gIdx < Threads)
|
|
||||||
# endif
|
|
||||||
{
|
{
|
||||||
const uint local_id1 = get_local_id(1);
|
const uint local_id1 = get_local_id(1);
|
||||||
#pragma unroll 2
|
#pragma unroll 2
|
||||||
|
@ -488,7 +476,7 @@ __kernel void cn1_v2(__global uint4 *Scratchpad, __global ulong *states, uint va
|
||||||
|
|
||||||
|
|
||||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
|
__kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||||
{
|
{
|
||||||
ulong a[2], b[2];
|
ulong a[2], b[2];
|
||||||
__local uint AES0[256], AES1[256];
|
__local uint AES0[256], AES1[256];
|
||||||
|
@ -504,10 +492,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
uint4 b_x;
|
uint4 b_x;
|
||||||
# if (COMP_MODE == 1)
|
|
||||||
// do not use early return here
|
|
||||||
if (gIdx < Threads)
|
|
||||||
# endif
|
|
||||||
{
|
{
|
||||||
states += 25 * gIdx;
|
states += 25 * gIdx;
|
||||||
# if (STRIDED_INDEX == 0)
|
# if (STRIDED_INDEX == 0)
|
||||||
|
@ -532,10 +517,6 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
||||||
|
|
||||||
mem_fence(CLK_LOCAL_MEM_FENCE);
|
mem_fence(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
# if (COMP_MODE == 1)
|
|
||||||
// do not use early return here
|
|
||||||
if (gIdx < Threads)
|
|
||||||
# endif
|
|
||||||
{
|
{
|
||||||
uint idx0 = a[0];
|
uint idx0 = a[0];
|
||||||
|
|
||||||
|
@ -576,6 +557,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
mem_fence(CLK_GLOBAL_MEM_FENCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -42,11 +42,10 @@ bool xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t th
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||||
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
|
||||||
{
|
{
|
||||||
return setArg(0, sizeof(cl_mem), &input) &&
|
return setArg(0, sizeof(cl_mem), &input) &&
|
||||||
setArg(1, sizeof(cl_mem), &scratchpads) &&
|
setArg(1, sizeof(cl_mem), &scratchpads) &&
|
||||||
setArg(2, sizeof(cl_mem), &states) &&
|
setArg(2, sizeof(cl_mem), &states);
|
||||||
setArg(3, sizeof(uint32_t), &threads);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ class Cn0Kernel : public OclKernel
|
||||||
public:
|
public:
|
||||||
Cn0Kernel(cl_program program);
|
Cn0Kernel(cl_program program);
|
||||||
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
||||||
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
51
src/backend/opencl/kernels/Cn1Kernel.cpp
Normal file
51
src/backend/opencl/kernels/Cn1Kernel.cpp
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "backend/opencl/kernels/Cn1Kernel.h"
|
||||||
|
#include "backend/opencl/wrappers/OclLib.h"
|
||||||
|
|
||||||
|
|
||||||
|
xmrig::Cn1Kernel::Cn1Kernel(cl_program program) : OclKernel(program, "cn1")
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool xmrig::Cn1Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
|
||||||
|
{
|
||||||
|
const size_t offset = nonce;
|
||||||
|
const size_t gthreads = threads;
|
||||||
|
const size_t lthreads = worksize;
|
||||||
|
|
||||||
|
return enqueueNDRange(queue, 1, &offset, >hreads, <hreads);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states)
|
||||||
|
bool xmrig::Cn1Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states)
|
||||||
|
{
|
||||||
|
return setArg(0, sizeof(cl_mem), &input) &&
|
||||||
|
setArg(1, sizeof(cl_mem), &scratchpads) &&
|
||||||
|
setArg(2, sizeof(cl_mem), &states);
|
||||||
|
}
|
47
src/backend/opencl/kernels/Cn1Kernel.h
Normal file
47
src/backend/opencl/kernels/Cn1Kernel.h
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_CN1KERNEL_H
|
||||||
|
#define XMRIG_CN1KERNEL_H
|
||||||
|
|
||||||
|
|
||||||
|
#include "backend/opencl/wrappers/OclKernel.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
class Cn1Kernel : public OclKernel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Cn1Kernel(cl_program program);
|
||||||
|
bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
|
||||||
|
bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace xmrig
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* XMRIG_CN1KERNEL_H */
|
|
@ -7,6 +7,7 @@ if (WITH_OPENCL)
|
||||||
src/backend/opencl/cl/OclSource.h
|
src/backend/opencl/cl/OclSource.h
|
||||||
src/backend/opencl/interfaces/IOclRunner.h
|
src/backend/opencl/interfaces/IOclRunner.h
|
||||||
src/backend/opencl/kernels/Cn0Kernel.h
|
src/backend/opencl/kernels/Cn0Kernel.h
|
||||||
|
src/backend/opencl/kernels/Cn1Kernel.h
|
||||||
src/backend/opencl/OclBackend.h
|
src/backend/opencl/OclBackend.h
|
||||||
src/backend/opencl/OclCache.h
|
src/backend/opencl/OclCache.h
|
||||||
src/backend/opencl/OclConfig.h
|
src/backend/opencl/OclConfig.h
|
||||||
|
@ -28,6 +29,7 @@ if (WITH_OPENCL)
|
||||||
set(SOURCES_BACKEND_OPENCL
|
set(SOURCES_BACKEND_OPENCL
|
||||||
src/backend/opencl/cl/OclSource.cpp
|
src/backend/opencl/cl/OclSource.cpp
|
||||||
src/backend/opencl/kernels/Cn0Kernel.cpp
|
src/backend/opencl/kernels/Cn0Kernel.cpp
|
||||||
|
src/backend/opencl/kernels/Cn1Kernel.cpp
|
||||||
src/backend/opencl/OclBackend.cpp
|
src/backend/opencl/OclBackend.cpp
|
||||||
src/backend/opencl/OclCache.cpp
|
src/backend/opencl/OclCache.cpp
|
||||||
src/backend/opencl/OclConfig.cpp
|
src/backend/opencl/OclConfig.cpp
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
|
|
||||||
#include "backend/opencl/kernels/Cn0Kernel.h"
|
#include "backend/opencl/kernels/Cn0Kernel.h"
|
||||||
|
#include "backend/opencl/kernels/Cn1Kernel.h"
|
||||||
#include "backend/opencl/OclLaunchData.h"
|
#include "backend/opencl/OclLaunchData.h"
|
||||||
#include "backend/opencl/runners/OclCnRunner.h"
|
#include "backend/opencl/runners/OclCnRunner.h"
|
||||||
#include "backend/opencl/wrappers/OclLib.h"
|
#include "backend/opencl/wrappers/OclLib.h"
|
||||||
|
@ -121,6 +122,8 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||||
const size_t w_size = data().thread.worksize();
|
const size_t w_size = data().thread.worksize();
|
||||||
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
|
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
|
||||||
|
|
||||||
|
assert(g_thd % w_size == 0);
|
||||||
|
|
||||||
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
for (size_t i = 0; i < BRANCH_MAX; ++i) {
|
||||||
if (OclLib::enqueueWriteBuffer(m_queue, m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
|
if (OclLib::enqueueWriteBuffer(m_queue, m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, nullptr, nullptr) != CL_SUCCESS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -135,6 +138,10 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!m_cn1->enqueue(m_queue, nonce, g_thd, w_size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
OclLib::finish(m_queue);
|
OclLib::finish(m_queue);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -143,7 +150,9 @@ bool xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||||
|
|
||||||
bool xmrig::OclCnRunner::selfTest() const
|
bool xmrig::OclCnRunner::selfTest() const
|
||||||
{
|
{
|
||||||
return OclBaseRunner::selfTest() && m_cn0->isValid();
|
return OclBaseRunner::selfTest() &&
|
||||||
|
m_cn0->isValid() &&
|
||||||
|
m_cn1->isValid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -160,7 +169,11 @@ bool xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity())) {
|
if (!m_cn0->setArgs(m_input, m_scratchpads, m_states)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_cn1->setArgs(m_input, m_scratchpads, m_states)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,4 +190,5 @@ void xmrig::OclCnRunner::build()
|
||||||
}
|
}
|
||||||
|
|
||||||
m_cn0 = new Cn0Kernel(m_program);
|
m_cn0 = new Cn0Kernel(m_program);
|
||||||
|
m_cn1 = new Cn1Kernel(m_program);
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
class Cn0Kernel;
|
class Cn0Kernel;
|
||||||
|
class Cn1Kernel;
|
||||||
|
|
||||||
|
|
||||||
class OclCnRunner : public OclBaseRunner
|
class OclCnRunner : public OclBaseRunner
|
||||||
|
@ -62,6 +63,7 @@ private:
|
||||||
cl_mem m_scratchpads = nullptr;
|
cl_mem m_scratchpads = nullptr;
|
||||||
cl_mem m_states = nullptr;
|
cl_mem m_states = nullptr;
|
||||||
Cn0Kernel *m_cn0 = nullptr;
|
Cn0Kernel *m_cn0 = nullptr;
|
||||||
|
Cn1Kernel *m_cn1 = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue