Implemented OpenCL JIT mode.

This commit is contained in:
XMRig 2019-09-12 13:10:50 +07:00
parent db79911c4b
commit 04a4a6cadc
10 changed files with 322 additions and 60 deletions

View file

@ -26,9 +26,6 @@
#include "backend/opencl/kernels/rx/InitVmKernel.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include <thread>
void xmrig::InitVmKernel::enqueue(cl_command_queue queue, size_t threads, uint32_t iteration)
{
@ -37,10 +34,6 @@ void xmrig::InitVmKernel::enqueue(cl_command_queue queue, size_t threads, uint32
const size_t gthreads = threads * 8;
static const size_t lthreads = 32;
// LOG_WARN("%zu %zu %u", gthreads, lthreads, iteration);
// std::this_thread::sleep_for(std::chrono::milliseconds(500));
enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
}

View file

@ -0,0 +1,50 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/kernels/rx/RxJitKernel.h"
#include "backend/opencl/wrappers/OclLib.h"
void xmrig::RxJitKernel::enqueue(cl_command_queue queue, size_t threads, uint32_t iteration)
{
setArg(6, sizeof(uint32_t), &iteration);
const size_t gthreads = threads * 32;
static const size_t lthreads = 64;
enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
}
// __kernel void randomx_jit(__global ulong* entropy, __global ulong* registers, __global uint2* intermediate_programs, __global uint* programs, uint batch_size, __global uint32_t* rounding, uint32_t iteration)
void xmrig::RxJitKernel::setArgs(cl_mem entropy, cl_mem registers, cl_mem intermediate_programs, cl_mem programs, uint32_t batch_size, cl_mem rounding)
{
setArg(0, sizeof(cl_mem), &entropy);
setArg(1, sizeof(cl_mem), &registers);
setArg(2, sizeof(cl_mem), &intermediate_programs);
setArg(3, sizeof(cl_mem), &programs);
setArg(4, sizeof(uint32_t), &batch_size);
setArg(5, sizeof(cl_mem), &rounding);
}

View file

@ -0,0 +1,48 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_RXJITKERNEL_H
#define XMRIG_RXJITKERNEL_H
#include "backend/opencl/wrappers/OclKernel.h"
namespace xmrig {
class RxJitKernel : public OclKernel
{
public:
inline RxJitKernel(cl_program program) : OclKernel(program, "randomx_jit") {}
void enqueue(cl_command_queue queue, size_t threads, uint32_t iteration);
void setArgs(cl_mem entropy, cl_mem registers, cl_mem intermediate_programs, cl_mem programs, uint32_t batch_size, cl_mem rounding);
};
} // namespace xmrig
#endif /* XMRIG_RXJITKERNEL_H */

View file

@ -0,0 +1,70 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/kernels/rx/RxRunKernel.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "crypto/common/Algorithm.h"
#include "crypto/randomx/randomx.h"
#include "crypto/rx/RxAlgo.h"
void xmrig::RxRunKernel::enqueue(cl_command_queue queue, size_t threads)
{
const size_t gthreads = threads * 64;
static const size_t lthreads = 64;
enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
}
void xmrig::RxRunKernel::setArgs(cl_mem dataset, cl_mem scratchpads, cl_mem registers, cl_mem rounding, cl_mem programs, uint32_t batch_size, const Algorithm &algorithm)
{
setArg(0, sizeof(cl_mem), &dataset);
setArg(1, sizeof(cl_mem), &scratchpads);
setArg(2, sizeof(cl_mem), &registers);
setArg(3, sizeof(cl_mem), &rounding);
setArg(4, sizeof(cl_mem), &programs);
setArg(5, sizeof(uint32_t), &batch_size);
auto PowerOf2 = [](size_t N)
{
uint32_t result = 0;
while (N > 1) {
++result;
N >>= 1;
}
return result;
};
const auto *rx_conf = RxAlgo::base(algorithm);
const uint32_t rx_parameters =
(PowerOf2(rx_conf->ScratchpadL1_Size) << 0) |
(PowerOf2(rx_conf->ScratchpadL2_Size) << 5) |
(PowerOf2(rx_conf->ScratchpadL3_Size) << 10) |
(PowerOf2(rx_conf->ProgramIterations) << 15);
setArg(6, sizeof(uint32_t), &rx_parameters);
}

View file

@ -0,0 +1,51 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_RXRUNKERNEL_H
#define XMRIG_RXRUNKERNEL_H
#include "backend/opencl/wrappers/OclKernel.h"
namespace xmrig {
class Algorithm;
class RxRunKernel : public OclKernel
{
public:
inline RxRunKernel(cl_program program) : OclKernel(program, "randomx_run") {}
void enqueue(cl_command_queue queue, size_t threads);
void setArgs(cl_mem dataset, cl_mem scratchpads, cl_mem registers, cl_mem rounding, cl_mem programs, uint32_t batch_size, const Algorithm &algorithm);
};
} // namespace xmrig
#endif /* XMRIG_RXRUNKERNEL_H */

View file

@ -73,6 +73,8 @@ if (WITH_OPENCL)
src/backend/opencl/kernels/rx/FindSharesKernel.h
src/backend/opencl/kernels/rx/HashAesKernel.cpp
src/backend/opencl/kernels/rx/InitVmKernel.h
src/backend/opencl/kernels/rx/RxJitKernel.h
src/backend/opencl/kernels/rx/RxRunKernel.h
src/backend/opencl/runners/OclRxBaseRunner.h
src/backend/opencl/runners/OclRxJitRunner.h
src/backend/opencl/runners/OclRxVmRunner.h
@ -87,6 +89,8 @@ if (WITH_OPENCL)
src/backend/opencl/kernels/rx/FindSharesKernel.cpp
src/backend/opencl/kernels/rx/HashAesKernel.cpp
src/backend/opencl/kernels/rx/InitVmKernel.cpp
src/backend/opencl/kernels/rx/RxJitKernel.cpp
src/backend/opencl/kernels/rx/RxRunKernel.cpp
src/backend/opencl/runners/OclRxBaseRunner.cpp
src/backend/opencl/runners/OclRxJitRunner.cpp
src/backend/opencl/runners/OclRxVmRunner.cpp

View file

@ -24,10 +24,15 @@
#include "backend/opencl/runners/OclRxJitRunner.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/kernels/rx/HashAesKernel.h"
#include "backend/opencl/cl/rx/randomx_run_gfx803.h"
#include "backend/opencl/cl/rx/randomx_run_gfx900.h"
#include "backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h"
#include "backend/opencl/kernels/rx/HashAesKernel.h"
#include "backend/opencl/kernels/rx/RxJitKernel.h"
#include "backend/opencl/kernels/rx/RxRunKernel.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "backend/opencl/wrappers/OclError.h"
xmrig::OclRxJitRunner::OclRxJitRunner(size_t index, const OclLaunchData &data) : OclRxBaseRunner(index, data)
@ -37,6 +42,10 @@ xmrig::OclRxJitRunner::OclRxJitRunner(size_t index, const OclLaunchData &data) :
xmrig::OclRxJitRunner::~OclRxJitRunner()
{
delete m_randomx_jit;
delete m_randomx_run;
OclLib::release(m_asmProgram);
OclLib::release(m_intermediate_programs);
OclLib::release(m_programs);
OclLib::release(m_registers);
@ -52,11 +61,28 @@ void xmrig::OclRxJitRunner::build()
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, batch_size);
m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256);
m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256);
m_randomx_jit = new RxJitKernel(m_program);
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, batch_size, m_rounding);
if (!loadAsmProgram()) {
throw std::runtime_error(OclError::toString(CL_INVALID_PROGRAM));
}
m_randomx_run = new RxRunKernel(m_asmProgram);
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, batch_size, m_algorithm);
}
void xmrig::OclRxJitRunner::execute(uint32_t iteration)
{
const uint32_t g_intensity = data().thread.intensity();
m_randomx_jit->enqueue(m_queue, g_intensity, iteration);
OclLib::finish(m_queue);
m_randomx_run->enqueue(m_queue, g_intensity);
}
@ -70,3 +96,46 @@ void xmrig::OclRxJitRunner::init()
m_intermediate_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr);
m_programs = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr);
}
bool xmrig::OclRxJitRunner::loadAsmProgram()
{
// Adrenaline drivers on Windows and amdgpu-pro drivers on Linux use ELF header's flags (offset 0x30) to store internal device ID
// Read it from compiled OpenCL code and substitute this ID into pre-compiled binary to make sure the driver accepts it
uint32_t elf_header_flags = 0;
const uint32_t elf_header_flags_offset = 0x30;
size_t bin_size;
if (OclLib::getProgramInfo(m_program, CL_PROGRAM_BINARY_SIZES, sizeof(bin_size), &bin_size) != CL_SUCCESS) {
return false;
}
std::vector<char> binary_data(bin_size);
char* tmp[1] = { binary_data.data() };
if (OclLib::getProgramInfo(m_program, CL_PROGRAM_BINARIES, sizeof(char*), tmp) != CL_SUCCESS) {
return false;
}
if (bin_size >= elf_header_flags_offset + sizeof(uint32_t)) {
elf_header_flags = *reinterpret_cast<uint32_t*>((binary_data.data() + elf_header_flags_offset));
}
const size_t len = (m_gcn_version == 14) ? randomx_run_gfx900_bin_size : randomx_run_gfx803_bin_size;
unsigned char *binary = (m_gcn_version == 14) ? randomx_run_gfx900_bin : randomx_run_gfx803_bin;
// Set correct internal device ID in the pre-compiled binary
if (elf_header_flags) {
*reinterpret_cast<uint32_t*>(binary + elf_header_flags_offset) = elf_header_flags;
}
cl_int status;
cl_int ret;
cl_device_id device = data().device.id();
m_asmProgram = OclLib::createProgramWithBinary(ctx(), 1, &device, &len, (const unsigned char**) &binary, &status, &ret);
if (ret != CL_SUCCESS) {
return false;
}
return OclLib::buildProgram(m_asmProgram, 1, &device) == CL_SUCCESS;
}

View file

@ -32,6 +32,10 @@
namespace xmrig {
class RxJitKernel;
class RxRunKernel;
class OclRxJitRunner : public OclRxBaseRunner
{
public:
@ -46,9 +50,14 @@ protected:
void init() override;
private:
bool loadAsmProgram();
cl_mem m_intermediate_programs = nullptr;
cl_mem m_programs = nullptr;
cl_mem m_registers = nullptr;
cl_program m_asmProgram = nullptr;
RxJitKernel *m_randomx_jit = nullptr;
RxRunKernel *m_randomx_run = nullptr;
};

View file

@ -30,22 +30,28 @@
xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm)
{
randomx_apply_config(*base(algorithm));
return algorithm;
}
RandomX_ConfigurationBase *xmrig::RxAlgo::base(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_WOW:
randomx_apply_config(RandomX_WowneroConfig);
break;
return &RandomX_WowneroConfig;
case Algorithm::RX_LOKI:
randomx_apply_config(RandomX_LokiConfig);
return &RandomX_LokiConfig;
break;
default:
randomx_apply_config(RandomX_MoneroConfig);
break;
}
return algorithm;
return &RandomX_MoneroConfig;
}
@ -57,59 +63,17 @@ uint32_t xmrig::RxAlgo::version(Algorithm::Id algorithm)
uint32_t xmrig::RxAlgo::programCount(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_0:
return RandomX_MoneroConfig.ProgramCount;
case Algorithm::RX_WOW:
return RandomX_WowneroConfig.ProgramCount;
case Algorithm::RX_LOKI:
return RandomX_LokiConfig.ProgramCount;
default:
break;
}
return 0;
return base(algorithm)->ProgramCount;
}
uint32_t xmrig::RxAlgo::programIterations(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_0:
return RandomX_MoneroConfig.ProgramIterations;
case Algorithm::RX_WOW:
return RandomX_WowneroConfig.ProgramIterations;
case Algorithm::RX_LOKI:
return RandomX_LokiConfig.ProgramIterations;
default:
break;
}
return 0;
return base(algorithm)->ProgramIterations;
}
uint32_t xmrig::RxAlgo::programSize(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_0:
return RandomX_MoneroConfig.ProgramSize;
case Algorithm::RX_WOW:
return RandomX_WowneroConfig.ProgramSize;
case Algorithm::RX_LOKI:
return RandomX_LokiConfig.ProgramSize;
default:
break;
}
return 0;
return base(algorithm)->ProgramSize;
}

View file

@ -35,6 +35,9 @@
#include "crypto/common/Algorithm.h"
struct RandomX_ConfigurationBase;
namespace xmrig
{
@ -43,6 +46,7 @@ class RxAlgo
{
public:
static Algorithm::Id apply(Algorithm::Id algorithm);
static RandomX_ConfigurationBase *base(Algorithm::Id algorithm);
static uint32_t programCount(Algorithm::Id algorithm);
static uint32_t programIterations(Algorithm::Id algorithm);
static uint32_t programSize(Algorithm::Id algorithm);