Merge branch 'evo' into beta

This commit is contained in:
XMRig 2019-09-20 15:44:21 +07:00
commit 3445f47482
42 changed files with 3043 additions and 2916 deletions

View file

@ -1,3 +1,9 @@
# v4.0.1-beta
- [#1177](https://github.com/xmrig/xmrig/issues/1177) Fixed compatibility with old AMD drivers.
- [#1180](https://github.com/xmrig/xmrig/issues/1180) Fixed possible duplicated shares after algorithm switching.
- Added support for case if not all backend threads successfully started.
- Fixed wrong config file permissions after write (only gcc builds on recent Windows 10 affected).
# v4.0.0-beta
- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.**
- [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh).

View file

@ -23,10 +23,10 @@
*/
#include <assert.h>
#include <cassert>
#include <cmath>
#include <memory.h>
#include <stdio.h>
#include <cstdio>
#include "backend/common/Hashrate.h"
@ -133,8 +133,8 @@ double xmrig::Hashrate::calc(size_t threadId, size_t ms) const
return nan("");
}
const double hashes = static_cast<double>(lastestHashCnt - earliestHashCount);
const double time = static_cast<double>(lastestStamp - earliestStamp) / 1000.0;
const auto hashes = static_cast<double>(lastestHashCnt - earliestHashCount);
const auto time = static_cast<double>(lastestStamp - earliestStamp) / 1000.0;
return hashes / time;
}
@ -175,3 +175,33 @@ rapidjson::Value xmrig::Hashrate::normalize(double d)
return Value(floor(d * 100.0) / 100.0);
}
#ifdef XMRIG_FEATURE_API
rapidjson::Value xmrig::Hashrate::toJSON(rapidjson::Document &doc) const
{
using namespace rapidjson;
auto &allocator = doc.GetAllocator();
Value out(kArrayType);
out.PushBack(normalize(calc(ShortInterval)), allocator);
out.PushBack(normalize(calc(MediumInterval)), allocator);
out.PushBack(normalize(calc(LargeInterval)), allocator);
return out;
}
rapidjson::Value xmrig::Hashrate::toJSON(size_t threadId, rapidjson::Document &doc) const
{
using namespace rapidjson;
auto &allocator = doc.GetAllocator();
Value out(kArrayType);
out.PushBack(normalize(calc(threadId, ShortInterval)), allocator);
out.PushBack(normalize(calc(threadId, MediumInterval)), allocator);
out.PushBack(normalize(calc(threadId, LargeInterval)), allocator);
return out;
}
#endif

View file

@ -26,10 +26,11 @@
#define XMRIG_HASHRATE_H
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include "base/tools/Object.h"
#include "rapidjson/fwd.h"
@ -39,6 +40,8 @@ namespace xmrig {
class Hashrate
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Hashrate)
enum Intervals {
ShortInterval = 10000,
MediumInterval = 60000,
@ -58,6 +61,11 @@ public:
static const char *format(double h, char *buf, size_t size);
static rapidjson::Value normalize(double d);
# ifdef XMRIG_FEATURE_API
rapidjson::Value toJSON(rapidjson::Document &doc) const;
rapidjson::Value toJSON(size_t threadId, rapidjson::Document &doc) const;
# endif
private:
constexpr static size_t kBucketSize = 2 << 11;
constexpr static size_t kBucketMask = kBucketSize - 1;

View file

@ -26,10 +26,11 @@
#define XMRIG_THREAD_H
#include <thread>
#include "backend/common/interfaces/IWorker.h"
#include "base/tools/Object.h"
#include <thread>
namespace xmrig {
@ -42,6 +43,8 @@ template<class T>
class Thread
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Thread)
inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {}
inline ~Thread() { m_thread.join(); delete m_worker; }

View file

@ -29,6 +29,7 @@
#include "backend/common/Workers.h"
#include "backend/cpu/CpuWorker.h"
#include "base/io/log/Log.h"
#include "base/tools/Object.h"
#ifdef XMRIG_FEATURE_OPENCL
@ -42,9 +43,10 @@ namespace xmrig {
class WorkersPrivate
{
public:
inline WorkersPrivate()
{
}
XMRIG_DISABLE_COPY_MOVE(WorkersPrivate)
WorkersPrivate() = default;
inline ~WorkersPrivate()
@ -131,7 +133,7 @@ void xmrig::Workers<T>::tick(uint64_t)
for (Thread<T> *handle : m_workers) {
if (!handle->worker()) {
return;
continue;
}
d_ptr->hashrate->add(handle->id(), handle->worker()->hashCount(), handle->worker()->timestamp());
@ -154,17 +156,21 @@ void xmrig::Workers<T>::onReady(void *arg)
auto handle = static_cast<Thread<T>* >(arg);
IWorker *worker = create(handle);
if (!worker || !worker->selfTest()) {
LOG_ERR("thread %zu error: \"hash self-test failed\".", worker->id());
assert(worker != nullptr);
if (!worker || !worker->selfTest()) {
LOG_ERR("%s " RED("thread ") RED_BOLD("#%zu") RED(" self-test failed"), T::tag(), worker->id());
handle->backend()->start(worker, false);
delete worker;
return;
}
assert(handle->backend() != nullptr);
handle->setWorker(worker);
handle->backend()->start(worker);
handle->backend()->start(worker, true);
}

View file

@ -29,6 +29,7 @@
#include "backend/common/Thread.h"
#include "backend/cpu/CpuLaunchData.h"
#include "base/tools/Object.h"
#ifdef XMRIG_FEATURE_OPENCL
@ -47,6 +48,8 @@ template<class T>
class Workers
{
public:
XMRIG_DISABLE_COPY_MOVE(Workers)
Workers();
~Workers();

View file

@ -26,7 +26,7 @@
#define XMRIG_IBACKEND_H
#include <stdint.h>
#include <cstdint>
#include "rapidjson/fwd.h"
@ -56,7 +56,7 @@ public:
virtual void prepare(const Job &nextJob) = 0;
virtual void printHashrate(bool details) = 0;
virtual void setJob(const Job &job) = 0;
virtual void start(IWorker *worker) = 0;
virtual void start(IWorker *worker, bool ready) = 0;
virtual void stop() = 0;
virtual void tick(uint64_t ticks) = 0;

View file

@ -26,8 +26,8 @@
#define XMRIG_IWORKER_H
#include <stdint.h>
#include <stddef.h>
#include <cstdint>
#include <cstddef>
namespace xmrig {
@ -44,6 +44,7 @@ public:
virtual bool selfTest() = 0;
virtual const VirtualMemory *memory() const = 0;
virtual size_t id() const = 0;
virtual size_t intensity() const = 0;
virtual uint64_t hashCount() const = 0;
virtual uint64_t timestamp() const = 0;
virtual void start() = 0;

View file

@ -60,7 +60,7 @@ namespace xmrig {
extern template class Threads<CpuThreads>;
static const char *tag = CYAN_BG_BOLD(" cpu ");
static const char *tag = CYAN_BG_BOLD(WHITE_BOLD_S " cpu ");
static const String kType = "cpu";
static std::mutex mutex;
@ -80,38 +80,51 @@ public:
m_memory = memory;
m_pages = 0;
m_started = 0;
m_errors = 0;
m_threads = threads.size();
m_ways = 0;
m_ts = Chrono::steadyMSecs();
for (const CpuLaunchData &data : threads) {
m_ways += data.intensity;
}
}
inline bool started(const std::pair<size_t, size_t> &hugePages)
inline bool started(IWorker *worker, bool ready)
{
if (ready) {
auto hugePages = worker->memory()->hugePages();
m_started++;
m_hugePages += hugePages.first;
m_pages += hugePages.second;
m_ways += worker->intensity();
}
else {
m_errors++;
}
return m_started == m_threads;
return (m_started + m_errors) == m_threads;
}
inline void print() const
{
LOG_INFO("%s" GREEN_BOLD(" READY") " threads " CYAN_BOLD("%zu(%zu)") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"),
if (m_started == 0) {
LOG_ERR("%s " RED_BOLD("disabled") YELLOW(" (failed to start threads)"), tag);
return;
}
LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%zu/%zu %1.0f%%" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"),
tag,
m_threads, m_ways,
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
m_started, m_threads, m_ways,
(m_hugePages == m_pages ? GREEN_BOLD_S : (m_hugePages == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
m_hugePages, m_pages,
m_hugePages == 0 ? 0.0 : static_cast<double>(m_hugePages) / m_pages * 100.0,
m_ways * m_memory / 1024,
memory() / 1024,
Chrono::steadyMSecs() - m_ts
);
}
private:
size_t m_errors = 0;
size_t m_hugePages = 0;
size_t m_memory = 0;
size_t m_pages = 0;
@ -322,17 +335,19 @@ void xmrig::CpuBackend::setJob(const Job &job)
}
void xmrig::CpuBackend::start(IWorker *worker)
void xmrig::CpuBackend::start(IWorker *worker, bool ready)
{
mutex.lock();
if (d_ptr->status.started(worker->memory()->hugePages())) {
if (d_ptr->status.started(worker, ready)) {
d_ptr->status.print();
}
mutex.unlock();
if (ready) {
worker->start();
}
}
@ -390,8 +405,9 @@ rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const
return out;
}
out.AddMember("hashrate", hashrate()->toJSON(doc), allocator);
Value threads(kArrayType);
const Hashrate *hr = hashrate();
size_t i = 0;
for (const CpuLaunchData &data : d_ptr->threads) {
@ -399,15 +415,9 @@ rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const
thread.AddMember("intensity", data.intensity, allocator);
thread.AddMember("affinity", data.affinity, allocator);
thread.AddMember("av", data.av(), allocator);
Value hashrate(kArrayType);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::ShortInterval)), allocator);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::MediumInterval)), allocator);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::LargeInterval)), allocator);
thread.AddMember("hashrate", hashrate()->toJSON(i, doc), allocator);
i++;
thread.AddMember("hashrate", hashrate, allocator);
threads.PushBack(thread, allocator);
}

View file

@ -26,10 +26,11 @@
#define XMRIG_CPUBACKEND_H
#include <utility>
#include "backend/common/interfaces/IBackend.h"
#include "base/tools/Object.h"
#include <utility>
namespace xmrig {
@ -43,6 +44,8 @@ class Miner;
class CpuBackend : public IBackend
{
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(CpuBackend)
CpuBackend(Controller *controller);
~CpuBackend() override;
@ -55,7 +58,7 @@ protected:
void prepare(const Job &nextJob) override;
void printHashrate(bool details) override;
void setJob(const Job &job) override;
void start(IWorker *worker) override;
void start(IWorker *worker, bool ready) override;
void stop() override;
void tick(uint64_t ticks) override;

View file

@ -24,13 +24,15 @@
*/
#include <algorithm>
#include "backend/cpu/CpuLaunchData.h"
#include "backend/common/Tags.h"
#include "backend/cpu/CpuConfig.h"
#include <algorithm>
xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread) :
algorithm(algorithm),
assembly(config.assembly()),
@ -65,3 +67,9 @@ xmrig::CnHash::AlgoVariant xmrig::CpuLaunchData::av() const
return static_cast<CnHash::AlgoVariant>(!hwAES ? (intensity + 5) : (intensity + 2));
}
const char *xmrig::CpuLaunchData::tag()
{
return cpu_tag();
}

View file

@ -54,6 +54,8 @@ public:
inline bool operator!=(const CpuLaunchData &other) const { return !isEqual(other); }
inline bool operator==(const CpuLaunchData &other) const { return isEqual(other); }
static const char *tag();
const Algorithm algorithm;
const Assembly assembly;
const bool hugePages;

View file

@ -54,6 +54,7 @@ protected:
void start() override;
inline const VirtualMemory *memory() const override { return m_memory; }
inline size_t intensity() const override { return N; }
private:
inline cn_hash_fun fn(const Algorithm &algorithm) const { return CnHash::fn(algorithm, m_av, m_assembly); }

View file

@ -72,12 +72,19 @@ static void printDisabled(const char *reason)
struct OclLaunchStatus
{
public:
inline bool started() { m_started++; return m_started == m_threads; }
inline size_t threads() const { return m_threads; }
inline bool started(bool ready)
{
ready ? m_started++ : m_errors++;
return (m_started + m_errors) == m_threads;
}
inline void start(size_t threads)
{
m_started = 0;
m_errors = 0;
m_threads = threads;
m_ts = Chrono::steadyMSecs();
OclWorker::ready = false;
@ -85,14 +92,23 @@ public:
inline void print() const
{
LOG_INFO("%s" GREEN_BOLD(" READY") " threads " CYAN_BOLD("%zu") BLACK_BOLD(" (%" PRIu64 " ms)"),
if (m_started == 0) {
LOG_ERR("%s " RED_BOLD("disabled") YELLOW(" (failed to start threads)"), tag);
return;
}
LOG_INFO("%s" GREEN_BOLD(" READY") " threads " "%s%zu/%zu" BLACK_BOLD(" (%" PRIu64 " ms)"),
tag,
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
m_started,
m_threads,
Chrono::steadyMSecs() - m_ts
);
}
private:
size_t m_errors = 0;
size_t m_started = 0;
size_t m_threads = 0;
uint64_t m_ts = 0;
@ -319,11 +335,11 @@ void xmrig::OclBackend::setJob(const Job &job)
}
void xmrig::OclBackend::start(IWorker *worker)
void xmrig::OclBackend::start(IWorker *worker, bool ready)
{
mutex.lock();
if (d_ptr->status.started()) {
if (d_ptr->status.started(ready)) {
d_ptr->status.print();
OclWorker::ready = true;
@ -331,7 +347,9 @@ void xmrig::OclBackend::start(IWorker *worker)
mutex.unlock();
if (ready) {
worker->start();
}
}
@ -373,22 +391,17 @@ rapidjson::Value xmrig::OclBackend::toJSON(rapidjson::Document &doc) const
return out;
}
out.AddMember("hashrate", hashrate()->toJSON(doc), allocator);
Value threads(kArrayType);
const Hashrate *hr = hashrate();
size_t i = 0;
for (const OclLaunchData &data : d_ptr->threads) {
Value thread = data.thread.toJSON(doc);
thread.AddMember("affinity", data.affinity, allocator);
Value hashrate(kArrayType);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::ShortInterval)), allocator);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::MediumInterval)), allocator);
hashrate.PushBack(Hashrate::normalize(hr->calc(i, Hashrate::LargeInterval)), allocator);
thread.AddMember("hashrate", hashrate()->toJSON(i, doc), allocator);
i++;
thread.AddMember("hashrate", hashrate, allocator);
threads.PushBack(thread, allocator);
}

View file

@ -62,7 +62,7 @@ protected:
void prepare(const Job &nextJob) override;
void printHashrate(bool details) override;
void setJob(const Job &job) override;
void start(IWorker *worker) override;
void start(IWorker *worker, bool ready) override;
void stop() override;
void tick(uint64_t ticks) override;

View file

@ -25,6 +25,8 @@
#include "backend/opencl/OclLaunchData.h"
#include "backend/common/Tags.h"
#include "backend/opencl/OclConfig.h"
@ -45,3 +47,9 @@ bool xmrig::OclLaunchData::isEqual(const OclLaunchData &other) const
return (other.algorithm == algorithm &&
other.thread == thread);
}
const char *xmrig::OclLaunchData::tag()
{
return ocl_tag();
}

View file

@ -62,6 +62,8 @@ public:
inline bool operator!=(const OclLaunchData &other) const { return !isEqual(other); }
inline bool operator==(const OclLaunchData &other) const { return isEqual(other); }
static const char *tag();
cl_context ctx = nullptr;
const Algorithm algorithm;
const bool cache;

View file

@ -137,6 +137,12 @@ bool xmrig::OclWorker::selfTest()
}
size_t xmrig::OclWorker::intensity() const
{
return m_runner ? m_runner->intensity() : 0;
}
void xmrig::OclWorker::start()
{
cl_uint results[0x100];

View file

@ -56,6 +56,7 @@ public:
protected:
bool selfTest() override;
size_t intensity() const override;
void start() override;
private:

View file

@ -604,7 +604,7 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint
__local const uint* const t2 = ((sub & 1) == 0) ? (T + 512) : (T + 1536);
__local const uint* const t3 = ((sub & 1) == 0) ? (T + 768) : (T + 1280);
#pragma unroll(8)
#pragma unroll 8
for (uint i = 0; i < inputSize / sizeof(uint4); i += 4, p += 4)
{
uint k[4], y[4];

View file

@ -72,7 +72,7 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_
const __local uint* const t2 = (sub & 1) ? (T + 512) : (T + 1536);
const __local uint* const t3 = (sub & 1) ? (T + 768) : (T + 1280);
#pragma unroll(unroll_factor)
#pragma unroll unroll_factor
for (uint i = 0; i < outputSize / sizeof(uint4); i += 4, p += 4)
{
uint y[4];

File diff suppressed because it is too large Load diff

View file

@ -902,7 +902,7 @@ __global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global
{
int prefetch_data_count;
#pragma unroll(1)
#pragma unroll 1
for (int pass = 0; pass < 2; ++pass)
{
#if RANDOMX_PROGRAM_SIZE > 256
@ -929,7 +929,7 @@ __global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global
prefetch_data_count = 0;
#pragma unroll(1)
#pragma unroll 1
for (uint i = 0; i < RANDOMX_PROGRAM_SIZE; ++i)
{
// Clean flags
@ -1314,7 +1314,7 @@ __global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global
// Sort p0
uint prev = p0[0].x;
#pragma unroll(1)
#pragma unroll 1
for (int j = 1; j < prefetch_data_count; ++j)
{
uint2 cur = p0[j];
@ -1344,7 +1344,7 @@ __global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global
__global int* prefetched_vgprs = prefecth_vgprs_stack + num_prefetch_vgprs;
#pragma unroll(8)
#pragma unroll 8
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i)
prefetched_vgprs[i] = 0;
@ -1359,7 +1359,7 @@ __global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global
const uint size_limit = (COMPILED_PROGRAM_SIZE - 200) / sizeof(uint);
__global uint* start_p = p;
#pragma unroll(1)
#pragma unroll 1
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i)
{
const uint2 inst = e[i];

View file

@ -1658,7 +1658,7 @@ uint32_t inner_loop(
const int32_t sub2 = sub >> 1;
imm_buf[IMM_INDEX_COUNT + 1] = fprc;
#pragma unroll(1)
#pragma unroll 1
for (int32_t ip = 0; ip < program_length;)
{
imm_buf[IMM_INDEX_COUNT] = ip;
@ -1934,7 +1934,7 @@ __kernel void execute_vm(__global void* vm_states, __global void* rounding, __gl
const uint32_t workers_mask = ((1 << WORKERS_PER_HASH) - 1) << ((get_local_id(0) / IDX_WIDTH) * IDX_WIDTH);
const uint32_t fp_workers_mask = 3 << (((sub >> 1) << 1) + (get_local_id(0) / IDX_WIDTH) * IDX_WIDTH);
#pragma unroll(1)
#pragma unroll 1
for (int ic = 0; ic < num_iterations; ++ic)
{
__local uint64_t *r;

View file

@ -45,7 +45,13 @@ static inline uint32_t getMaxThreads(const OclDevice &device, const Algorithm &a
return 40000u;
}
return ((algorithm.l3() <= oneMiB) ? 2u : 1u) * 1000u;
const uint32_t ratio = (algorithm.l3() <= oneMiB) ? 2u : 1u;
if (device.vendorId() == OCL_VENDOR_INTEL) {
return ratio * device.computeUnits() * 8;
}
return ratio * 1000u;
}
@ -107,7 +113,7 @@ bool ocl_generic_cn_generator(const OclDevice &device, const Algorithm &algorith
return false;
}
const uint32_t threadCount = ((device.globalMemSize() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1;
const uint32_t threadCount = (device.vendorId() == OCL_VENDOR_AMD && (device.globalMemSize() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1;
threads.add(OclThread(device.index(), intensity, 8, getStridedIndex(device, algorithm), 2, threadCount, 8));

View file

@ -86,7 +86,7 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith
return false;
}
threads.add(OclThread(device.index(), intensity, 8, 2, gcnAsm, datasetHost, 6));
threads.add(OclThread(device.index(), intensity, 8, device.vendorId() == OCL_VENDOR_AMD ? 2 : 1, gcnAsm, datasetHost, 6));
return true;
}

View file

@ -51,18 +51,19 @@ public:
IOclRunner() = default;
virtual ~IOclRunner() = default;
virtual void run(uint32_t nonce, uint32_t *hashOutput) = 0;
virtual void set(const Job &job, uint8_t *blob) = 0;
virtual cl_context ctx() const = 0;
virtual const Algorithm &algorithm() const = 0;
virtual const char *buildOptions() const = 0;
virtual const char *deviceKey() const = 0;
virtual const char *source() const = 0;
virtual const OclLaunchData &data() const = 0;
virtual size_t intensity() const = 0;
virtual size_t threadId() const = 0;
virtual uint32_t deviceIndex() const = 0;
virtual void build() = 0;
virtual void init() = 0;
virtual void run(uint32_t nonce, uint32_t *hashOutput) = 0;
virtual void set(const Job &job, uint8_t *blob) = 0;
protected:
virtual size_t bufferSize() const = 0;

View file

@ -40,7 +40,8 @@ xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
m_source(OclSource::get(data.algorithm)),
m_data(data),
m_align(OclLib::getUint(data.device.id(), CL_DEVICE_MEM_BASE_ADDR_ALIGN)),
m_threadId(id)
m_threadId(id),
m_intensity(data.thread.intensity())
{
m_deviceKey = data.device.name();
@ -97,7 +98,7 @@ void xmrig::OclBaseRunner::init()
constexpr size_t oneGiB = 1024 * 1024 * 1024;
size_t size = bufferSize();
if (size < oneGiB && data().device.freeMemSize() >= oneGiB) {
if (size < oneGiB && data().device.vendorId() == OCL_VENDOR_AMD && data().device.freeMemSize() >= oneGiB) {
size = oneGiB;
}

View file

@ -55,6 +55,7 @@ protected:
inline const char *deviceKey() const override { return m_deviceKey.c_str(); }
inline const char *source() const override { return m_source; }
inline const OclLaunchData &data() const override { return m_data; }
inline size_t intensity() const override { return m_intensity; }
inline size_t threadId() const override { return m_threadId; }
size_t bufferSize() const override;
@ -83,6 +84,7 @@ protected:
size_t m_offset = 0;
std::string m_deviceKey;
std::string m_options;
uint32_t m_intensity;
};

View file

@ -83,12 +83,10 @@ xmrig::OclCnRunner::~OclCnRunner()
size_t xmrig::OclCnRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align(m_algorithm.l3() * g_thd) +
align(200 * g_thd) +
(align(sizeof(cl_uint) * (g_thd + 2)) * BRANCH_MAX);
align(m_algorithm.l3() * m_intensity) +
align(200 * m_intensity) +
(align(sizeof(cl_uint) * (m_intensity + 2)) * BRANCH_MAX);
}
@ -96,14 +94,13 @@ void xmrig::OclCnRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
const size_t g_intensity = data().thread.intensity();
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
assert(g_thd % w_size == 0);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero);
enqueueWriteBuffer(m_branches[i], CL_FALSE, sizeof(cl_uint) * m_intensity, sizeof(cl_uint), &zero);
}
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero);
@ -135,9 +132,14 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
delete m_cn1;
m_height = job.height();
m_cnr = OclCnR::get(*this, m_height);
m_cn1 = new Cn1Kernel(m_cnr, m_height);
m_cn1->setArgs(m_input, m_scratchpads, m_states, data().thread.intensity());
auto program = OclCnR::get(*this, m_height);
m_cn1 = new Cn1Kernel(program, m_height);
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
if (m_cnr != program) {
OclLib::release(m_cnr);
m_cnr = OclLib::retain(program);
}
}
for (auto kernel : m_branchKernels) {
@ -150,22 +152,20 @@ void xmrig::OclCnRunner::build()
{
OclBaseRunner::build();
const uint32_t intensity = data().thread.intensity();
m_cn0 = new Cn0Kernel(m_program);
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
m_cn2 = new Cn2Kernel(m_program);
m_cn2->setArgs(m_scratchpads, m_states, m_branches, intensity);
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
if (m_algorithm != Algorithm::CN_R) {
m_cn1 = new Cn1Kernel(m_program);
m_cn1->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn1->setArgs(m_input, m_scratchpads, m_states, m_intensity);
}
for (size_t i = 0; i < BRANCH_MAX; ++i) {
auto kernel = new CnBranchKernel(i, m_program);
kernel->setArgs(m_states, m_branches[i], m_output, intensity);
kernel->setArgs(m_states, m_branches[i], m_output, m_intensity);
m_branchKernels[i] = kernel;
}
@ -176,12 +176,10 @@ void xmrig::OclCnRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, m_algorithm.l3() * m_intensity);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
for (size_t i = 0; i < BRANCH_MAX; ++i) {
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2));
m_branches[i] = createSubBuffer(CL_MEM_READ_WRITE, sizeof(cl_uint) * (m_intensity + 2));
}
}

View file

@ -87,28 +87,26 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
const uint32_t g_intensity = data().thread.intensity();
m_blake2b_initial_hash->enqueue(m_queue, g_intensity);
m_fillAes1Rx4_scratchpad->enqueue(m_queue, g_intensity);
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity);
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
for (uint32_t i = 0; i < programCount; ++i) {
m_fillAes4Rx4_entropy->enqueue(m_queue, g_intensity);
m_fillAes4Rx4_entropy->enqueue(m_queue, m_intensity);
execute(i);
if (i == programCount - 1) {
m_hashAes1Rx4->enqueue(m_queue, g_intensity);
m_blake2b_hash_registers_32->enqueue(m_queue, g_intensity);
m_hashAes1Rx4->enqueue(m_queue, m_intensity);
m_blake2b_hash_registers_32->enqueue(m_queue, m_intensity);
}
else {
m_blake2b_hash_registers_64->enqueue(m_queue, g_intensity);
m_blake2b_hash_registers_64->enqueue(m_queue, m_intensity);
}
}
m_find_shares->enqueue(m_queue, g_intensity);
m_find_shares->enqueue(m_queue, m_intensity);
finalize(hashOutput);
@ -138,13 +136,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
size_t xmrig::OclRxBaseRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() +
align((m_algorithm.l3() + 64) * g_thd) +
align(64 * g_thd) +
align((128 + 2560) * g_thd) +
align(sizeof(uint32_t) * g_thd);
align((m_algorithm.l3() + 64) * m_intensity) +
align(64 * m_intensity) +
align((128 + 2560) * m_intensity) +
align(sizeof(uint32_t) * m_intensity);
}
@ -152,14 +148,13 @@ void xmrig::OclRxBaseRunner::build()
{
OclBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
const uint32_t rx_version = RxAlgo::version(m_algorithm);
m_fillAes1Rx4_scratchpad = new FillAesKernel(m_program, "fillAes1Rx4_scratchpad");
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, batch_size, rx_version);
m_fillAes1Rx4_scratchpad->setArgs(m_hashes, m_scratchpads, m_intensity, rx_version);
m_fillAes4Rx4_entropy = new FillAesKernel(m_program, "fillAes4Rx4_entropy");
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, batch_size, rx_version);
m_fillAes4Rx4_entropy->setArgs(m_hashes, m_entropy, m_intensity, rx_version);
m_hashAes1Rx4 = new HashAesKernel(m_program);
@ -178,10 +173,8 @@ void xmrig::OclRxBaseRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * g_thd);
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * g_thd);
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * g_thd);
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (m_algorithm.l3() + 64) * m_intensity);
m_hashes = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 64 * m_intensity);
m_entropy = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, (128 + 2560) * m_intensity);
m_rounding = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, sizeof(uint32_t) * m_intensity);
}

View file

@ -54,9 +54,7 @@ xmrig::OclRxJitRunner::~OclRxJitRunner()
size_t xmrig::OclRxJitRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclRxBaseRunner::bufferSize() + align(256 * g_thd) + align(5120 * g_thd) + align(10048 * g_thd);
return OclRxBaseRunner::bufferSize() + align(256 * m_intensity) + align(5120 * m_intensity) + align(10048 * m_intensity);
}
@ -64,33 +62,29 @@ void xmrig::OclRxJitRunner::build()
{
OclRxBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, batch_size);
m_hashAes1Rx4->setArgs(m_scratchpads, m_registers, 256, m_intensity);
m_blake2b_hash_registers_32->setArgs(m_hashes, m_registers, 256);
m_blake2b_hash_registers_64->setArgs(m_hashes, m_registers, 256);
m_randomx_jit = new RxJitKernel(m_program);
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, batch_size, m_rounding);
m_randomx_jit->setArgs(m_entropy, m_registers, m_intermediate_programs, m_programs, m_intensity, m_rounding);
if (!loadAsmProgram()) {
throw std::runtime_error(OclError::toString(CL_INVALID_PROGRAM));
}
m_randomx_run = new RxRunKernel(m_asmProgram);
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, batch_size, m_algorithm);
m_randomx_run->setArgs(data().dataset->get(), m_scratchpads, m_registers, m_rounding, m_programs, m_intensity, m_algorithm);
}
void xmrig::OclRxJitRunner::execute(uint32_t iteration)
{
const uint32_t g_intensity = data().thread.intensity();
m_randomx_jit->enqueue(m_queue, g_intensity, iteration);
m_randomx_jit->enqueue(m_queue, m_intensity, iteration);
OclLib::finish(m_queue);
m_randomx_run->enqueue(m_queue, g_intensity);
m_randomx_run->enqueue(m_queue, m_intensity);
}
@ -98,11 +92,9 @@ void xmrig::OclRxJitRunner::init()
{
OclRxBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * g_thd);
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * g_thd);
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * g_thd);
m_registers = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 256 * m_intensity);
m_intermediate_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 5120 * m_intensity);
m_programs = createSubBuffer(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 10048 * m_intensity);
}

View file

@ -52,7 +52,7 @@ xmrig::OclRxVmRunner::~OclRxVmRunner()
size_t xmrig::OclRxVmRunner::bufferSize() const
{
return OclRxBaseRunner::bufferSize() + (align(2560 * data().thread.intensity()));
return OclRxBaseRunner::bufferSize() + (align(2560 * m_intensity));
}
@ -60,10 +60,9 @@ void xmrig::OclRxVmRunner::build()
{
OclRxBaseRunner::build();
const uint32_t batch_size = data().thread.intensity();
const uint32_t hashStrideBytes = RxAlgo::programSize(m_algorithm) * 8;
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, batch_size);
m_hashAes1Rx4->setArgs(m_scratchpads, m_vm_states, hashStrideBytes, m_intensity);
m_blake2b_hash_registers_32->setArgs(m_hashes, m_vm_states, hashStrideBytes);
m_blake2b_hash_registers_64->setArgs(m_hashes, m_vm_states, hashStrideBytes);
@ -71,7 +70,7 @@ void xmrig::OclRxVmRunner::build()
m_init_vm->setArgs(m_entropy, m_vm_states, m_rounding);
m_execute_vm = new ExecuteVmKernel(m_program);
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), batch_size);
m_execute_vm->setArgs(m_vm_states, m_rounding, m_scratchpads, data().dataset->get(), m_intensity);
}
@ -79,9 +78,8 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
{
const uint32_t bfactor = std::min(data().thread.bfactor(), 8u);
const uint32_t num_iterations = RxAlgo::programIterations(m_algorithm) >> bfactor;
const uint32_t g_intensity = data().thread.intensity();
m_init_vm->enqueue(m_queue, g_intensity, iteration);
m_init_vm->enqueue(m_queue, m_intensity, iteration);
m_execute_vm->setIterations(num_iterations);
@ -90,7 +88,7 @@ void xmrig::OclRxVmRunner::execute(uint32_t iteration)
m_execute_vm->setLast(1);
}
m_execute_vm->enqueue(m_queue, g_intensity, m_worksize);
m_execute_vm->enqueue(m_queue, m_intensity, m_worksize);
if (j == 0) {
m_execute_vm->setFirst(0);
@ -103,5 +101,5 @@ void xmrig::OclRxVmRunner::init()
{
OclRxBaseRunner::init();
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * data().thread.intensity());
m_vm_states = createSubBuffer(CL_MEM_READ_WRITE, 2560 * m_intensity);
}

View file

@ -63,9 +63,7 @@ xmrig::OclRyoRunner::~OclRyoRunner()
size_t xmrig::OclRyoRunner::bufferSize() const
{
const size_t g_thd = data().thread.intensity();
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * g_thd) + align(200 * g_thd);
return OclBaseRunner::bufferSize() + align(data().algorithm.l3() * m_intensity) + align(200 * m_intensity);
}
@ -73,9 +71,8 @@ void xmrig::OclRyoRunner::run(uint32_t nonce, uint32_t *hashOutput)
{
static const cl_uint zero = 0;
const size_t g_intensity = data().thread.intensity();
const size_t w_size = data().thread.worksize();
const size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
const size_t g_thd = ((m_intensity + w_size - 1u) / w_size) * w_size;
assert(g_thd % w_size == 0);
@ -109,19 +106,17 @@ void xmrig::OclRyoRunner::build()
{
OclBaseRunner::build();
const uint32_t intensity = data().thread.intensity();
m_cn00 = new Cn00RyoKernel(m_program);
m_cn00->setArgs(m_scratchpads, m_states);
m_cn0 = new Cn0Kernel(m_program);
m_cn0->setArgs(m_input, m_scratchpads, m_states, intensity);
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
m_cn1 = new Cn1RyoKernel(m_program);
m_cn1->setArgs(m_scratchpads, m_states, intensity);
m_cn1->setArgs(m_scratchpads, m_states, m_intensity);
m_cn2 = new Cn2RyoKernel(m_program);
m_cn2->setArgs(m_scratchpads, m_states, m_output, intensity);
m_cn2->setArgs(m_scratchpads, m_states, m_output, m_intensity);
}
@ -129,8 +124,6 @@ void xmrig::OclRyoRunner::init()
{
OclBaseRunner::init();
const size_t g_thd = data().thread.intensity();
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * g_thd);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * g_thd);
m_scratchpads = createSubBuffer(CL_MEM_READ_WRITE, data().algorithm.l3() * m_intensity);
m_states = createSubBuffer(CL_MEM_READ_WRITE, 200 * m_intensity);
}

View file

@ -73,6 +73,7 @@ static const char *kReleaseDevice = "clReleaseDevice";
static const char *kReleaseKernel = "clReleaseKernel";
static const char *kReleaseMemObject = "clReleaseMemObject";
static const char *kReleaseProgram = "clReleaseProgram";
static const char *kRetainProgram = "clRetainProgram";
static const char *kSetKernelArg = "clSetKernelArg";
static const char *kSetMemObjectDestructorCallback = "clSetMemObjectDestructorCallback";
static const char *kUnloadPlatformCompiler = "clUnloadPlatformCompiler";
@ -105,6 +106,7 @@ typedef cl_int (CL_API_CALL *releaseDevice_t)(cl_device_id device);
typedef cl_int (CL_API_CALL *releaseKernel_t)(cl_kernel);
typedef cl_int (CL_API_CALL *releaseMemObject_t)(cl_mem);
typedef cl_int (CL_API_CALL *releaseProgram_t)(cl_program);
typedef cl_int (CL_API_CALL *retainProgram_t)(cl_program);
typedef cl_int (CL_API_CALL *setKernelArg_t)(cl_kernel, cl_uint, size_t, const void *);
typedef cl_int (CL_API_CALL *setMemObjectDestructorCallback_t)(cl_mem, void (CL_CALLBACK *)(cl_mem, void *), void *);
typedef cl_int (CL_API_CALL *unloadPlatformCompiler_t)(cl_platform_id);
@ -146,6 +148,7 @@ static releaseDevice_t pReleaseDevice = nu
static releaseKernel_t pReleaseKernel = nullptr;
static releaseMemObject_t pReleaseMemObject = nullptr;
static releaseProgram_t pReleaseProgram = nullptr;
static retainProgram_t pRetainProgram = nullptr;
static setKernelArg_t pSetKernelArg = nullptr;
static setMemObjectDestructorCallback_t pSetMemObjectDestructorCallback = nullptr;
static unloadPlatformCompiler_t pUnloadPlatformCompiler = nullptr;
@ -235,6 +238,7 @@ bool xmrig::OclLib::load()
DLSYM(UnloadPlatformCompiler);
DLSYM(SetMemObjectDestructorCallback);
DLSYM(CreateSubBuffer);
DLSYM(RetainProgram);
# if defined(CL_VERSION_2_0)
uv_dlsym(&oclLib, kCreateCommandQueueWithProperties, reinterpret_cast<void**>(&pCreateCommandQueueWithProperties));
@ -696,6 +700,18 @@ cl_program xmrig::OclLib::createProgramWithSource(cl_context context, cl_uint co
}
cl_program xmrig::OclLib::retain(cl_program program) noexcept
{
assert(pRetainProgram != nullptr);
if (program != nullptr) {
pRetainProgram(program);
}
return program;
}
cl_uint xmrig::OclLib::getNumPlatforms() noexcept
{
cl_uint count = 0;

View file

@ -81,6 +81,7 @@ public:
static cl_mem createSubBuffer(cl_mem buffer, cl_mem_flags flags, size_t offset, size_t size);
static cl_program createProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) noexcept;
static cl_program createProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) noexcept;
static cl_program retain(cl_program program) noexcept;
static cl_uint getNumPlatforms() noexcept;
static cl_uint getUint(cl_command_queue command_queue, cl_command_queue_info param_name, cl_uint defaultValue = 0) noexcept;
static cl_uint getUint(cl_context context, cl_context_info param_name, cl_uint defaultValue = 0) noexcept;

View file

@ -28,6 +28,7 @@
#ifdef __GNUC__
# include <fcntl.h>
# include <sys/stat.h>
# include <ext/stdio_filebuf.h>
#endif
@ -102,7 +103,7 @@ bool xmrig::Json::save(const char *fileName, const rapidjson::Document &doc)
return false;
}
# elif defined(__GNUC__)
const int fd = _wopen(toUtf16(fileName).c_str(), _O_WRONLY | _O_BINARY | _O_CREAT | _O_TRUNC);
const int fd = _wopen(toUtf16(fileName).c_str(), _O_WRONLY | _O_BINARY | _O_CREAT | _O_TRUNC, _S_IWRITE);
if (fd == -1) {
return false;
}

View file

@ -28,6 +28,14 @@
"cn/0": false,
"cn-lite/0": false
},
"opencl": {
"enabled": true,
"cache": true,
"loader": null,
"platform": "AMD",
"cn/0": false,
"cn-lite/0": false
},
"donate-level": 5,
"donate-over-proxy": 1,
"log-file": null,

View file

@ -124,16 +124,15 @@ public:
{
active = true;
if (reset) {
Nonce::reset(job.index());
}
for (IBackend *backend : backends) {
backend->setJob(job);
}
if (reset) {
Nonce::reset(job.index());
}
else {
Nonce::touch();
}
if (enabled) {
Nonce::pause(false);;

View file

@ -62,6 +62,14 @@ R"===(
"cn/0": false,
"cn-lite/0": false
},
"opencl": {
"enabled": true,
"cache": true,
"loader": null,
"platform": "AMD",
"cn/0": false,
"cn-lite/0": false
},
"donate-level": 5,
"donate-over-proxy": 1,
"log-file": null,

View file

@ -78,7 +78,6 @@ void xmrig::Nonce::reset(uint8_t index)
std::lock_guard<std::mutex> lock(mutex);
m_nonces[index] = 0;
touch();
}

View file

@ -28,7 +28,7 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "4.0.0-beta"
#define APP_VERSION "4.0.1-evo"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
@ -36,7 +36,7 @@
#define APP_VER_MAJOR 4
#define APP_VER_MINOR 0
#define APP_VER_PATCH 0
#define APP_VER_PATCH 1
#ifdef _MSC_VER
# if (_MSC_VER >= 1920)