KawPow: tuned work group size for OpenCL

This commit is contained in:
SChernykh 2020-05-28 10:58:06 +02:00
parent 22b937cc1c
commit e3d727cdb6
11 changed files with 162 additions and 117 deletions

View file

@ -56,7 +56,7 @@ xmrig::OclThread::OclThread(const rapidjson::Value &value)
} }
m_index = Json::getUint(value, kIndex); m_index = Json::getUint(value, kIndex);
m_worksize = std::max(std::min(Json::getUint(value, kWorksize), 128u), 1u); m_worksize = std::max(std::min(Json::getUint(value, kWorksize), 512u), 1u);
m_unrollFactor = std::max(std::min(Json::getUint(value, kUnroll, m_unrollFactor), 128u), 1u); m_unrollFactor = std::max(std::min(Json::getUint(value, kUnroll, m_unrollFactor), 128u), 1u);
setIntensity(Json::getUint(value, kIntensity)); setIntensity(Json::getUint(value, kIntensity));
@ -151,7 +151,7 @@ rapidjson::Value xmrig::OclThread::toJSON(rapidjson::Document &doc) const
out.AddMember(StringRef(kDatasetHost), isDatasetHost(), allocator); out.AddMember(StringRef(kDatasetHost), isDatasetHost(), allocator);
# endif # endif
} }
else if (!m_fields.test(ASTROBWT_FIELDS)) { else if (!m_fields.test(ASTROBWT_FIELDS) && !m_fields.test(KAWPOW_FIELDS)) {
out.AddMember(StringRef(kUnroll), unrollFactor(), allocator); out.AddMember(StringRef(kUnroll), unrollFactor(), allocator);
} }

View file

@ -67,7 +67,7 @@ public:
} }
# endif # endif
# if defined XMRIG_ALGO_ASTROBWT || defined XMRIG_ALGO_KAWPOW # ifdef XMRIG_ALGO_ASTROBWT
OclThread(uint32_t index, uint32_t intensity, uint32_t threads) : OclThread(uint32_t index, uint32_t intensity, uint32_t threads) :
m_fields(4), m_fields(4),
m_threads(threads, -1), m_threads(threads, -1),
@ -81,6 +81,20 @@ public:
} }
# endif # endif
# ifdef XMRIG_ALGO_KAWPOW
OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t threads) :
m_fields(8),
m_threads(threads, -1),
m_index(index),
m_memChunk(0),
m_stridedIndex(0),
m_unrollFactor(1),
m_worksize(worksize)
{
setIntensity(intensity);
}
# endif
OclThread(const rapidjson::Value &value); OclThread(const rapidjson::Value &value);
inline bool isAsm() const { return m_gcnAsm; } inline bool isAsm() const { return m_gcnAsm; }
@ -106,6 +120,7 @@ private:
STRIDED_INDEX_FIELD, STRIDED_INDEX_FIELD,
RANDOMX_FIELDS, RANDOMX_FIELDS,
ASTROBWT_FIELDS, ASTROBWT_FIELDS,
KAWPOW_FIELDS,
FIELD_MAX FIELD_MAX
}; };

View file

@ -3,7 +3,7 @@
#endif #endif
#ifndef GROUP_SIZE #ifndef GROUP_SIZE
#define GROUP_SIZE 128 #define GROUP_SIZE 256
#endif #endif
#define GROUP_SHARE (GROUP_SIZE / 16) #define GROUP_SHARE (GROUP_SIZE / 16)

View file

@ -6,7 +6,7 @@ inline void progPowLoop(const uint32_t loop,
volatile uint32_t mix_arg[PROGPOW_REGS], volatile uint32_t mix_arg[PROGPOW_REGS],
__global const dag_t *g_dag, __global const dag_t *g_dag,
__local const uint32_t c_dag[PROGPOW_CACHE_WORDS], __local const uint32_t c_dag[PROGPOW_CACHE_WORDS],
__local uint64_t share[GROUP_SHARE], __local uint32_t share[GROUP_SHARE],
const bool hack_false) const bool hack_false)
{ {
dag_t data_dag; dag_t data_dag;
@ -166,7 +166,6 @@ void fill_mix(local uint32_t* seed, uint32_t lane_id, uint32_t* mix)
typedef struct typedef struct
{ {
uint32_t uint32s[PROGPOW_LANES]; uint32_t uint32s[PROGPOW_LANES];
uint64_t uint64s[PROGPOW_LANES / 2];
} shuffle_t; } shuffle_t;
typedef struct typedef struct
@ -247,7 +246,7 @@ __kernel void progpow_search(__global dag_t const* g_dag, __global uint* job_blo
#pragma unroll 1 #pragma unroll 1
for (uint32_t l = 0; l < PROGPOW_CNT_DAG; l++) for (uint32_t l = 0; l < PROGPOW_CNT_DAG; l++)
progPowLoop(l, mix, g_dag, c_dag, share[0].uint64s, hack_false); progPowLoop(l, mix, g_dag, c_dag, share[0].uint32s, hack_false);
// Reduce mix data to a per-lane 32-bit digest // Reduce mix data to a per-lane 32-bit digest
uint32_t mix_hash = FNV_OFFSET_BASIS; uint32_t mix_hash = FNV_OFFSET_BASIS;

View file

@ -2,12 +2,12 @@
namespace xmrig { namespace xmrig {
static char kawpow_cl[6348] = { static char kawpow_cl[6313] = {
0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70, 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f, 0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69, 0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55, 0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55,
0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x31,0x32,0x38,0x0a,0x23,0x65, 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x32,0x35,0x36,0x0a,0x23,0x65,
0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f, 0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,
0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20, 0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,
0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20, 0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20,
@ -39,7 +39,7 @@ static char kawpow_cl[6348] = {
0x47,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x2a,0x67,0x5f,0x64,0x61,0x67, 0x47,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x2a,0x67,0x5f,0x64,0x61,0x67,
0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67,0x5b,0x50, 0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67,0x5b,0x50,
0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e, 0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
0x74,0x36,0x34,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x5d,0x2c,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20, 0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x5d,0x2c,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,
0x62,0x6f,0x6f,0x6c,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x0a,0x7b,0x0a,0x64,0x61,0x67,0x5f,0x74,0x20,0x64,0x61,0x74,0x61,0x5f,0x64,0x61, 0x62,0x6f,0x6f,0x6c,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x0a,0x7b,0x0a,0x64,0x61,0x67,0x5f,0x74,0x20,0x64,0x61,0x74,0x61,0x5f,0x64,0x61,
0x67,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x64,0x61,0x74,0x61,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f, 0x67,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x64,0x61,0x74,0x61,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
0x74,0x20,0x6d,0x69,0x78,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30, 0x74,0x20,0x6d,0x69,0x78,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,
@ -128,80 +128,79 @@ static char kawpow_cl[6348] = {
0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f, 0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,
0x57,0x5f,0x52,0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x6d,0x69,0x78,0x5b,0x69,0x5d,0x3d,0x6b,0x69,0x73,0x73,0x39,0x39,0x28,0x26,0x73,0x74,0x29,0x3b, 0x57,0x5f,0x52,0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x6d,0x69,0x78,0x5b,0x69,0x5d,0x3d,0x6b,0x69,0x73,0x73,0x39,0x39,0x28,0x26,0x73,0x74,0x29,0x3b,
0x0a,0x7d,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75,0x69,0x6e, 0x0a,0x7d,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75,0x69,0x6e,
0x74,0x33,0x32,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x75,0x69, 0x74,0x33,0x32,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x5d,0x3b,0x0a,0x7d,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74,
0x6e,0x74,0x36,0x34,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2f,0x32,0x5d,0x3b,0x0a,0x7d,0x20,0x73,0x68,0x75,0x66,0x66,0x6c, 0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75,0x69,0x6e,0x74,
0x65,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75, 0x33,0x32,0x73,0x5b,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68,
0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68, 0x33,0x32,0x5f,0x74,0x3b,0x0a,0x23,0x69,0x66,0x20,0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x20,0x21,0x3d,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x5f,0x50,0x4c,0x41,
0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x23,0x69,0x66,0x20,0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x20,0x21,0x3d,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x5f, 0x54,0x46,0x4f,0x52,0x4d,0x5f,0x4e,0x56,0x49,0x44,0x49,0x41,0x20,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,
0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x5f,0x4e,0x56,0x49,0x44,0x49,0x41,0x20,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28, 0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2c,0x31,0x2c,0x31,
0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2c, 0x29,0x29,0x29,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x72,0x6f,0x67,0x70,0x6f,0x77,
0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x72,0x6f,0x67, 0x5f,0x73,0x65,0x61,0x72,0x63,0x68,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x2a,0x20,0x67,0x5f,
0x70,0x6f,0x77,0x5f,0x73,0x65,0x61,0x72,0x63,0x68,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x2a, 0x64,0x61,0x67,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x2c,0x75,0x6c,0x6f,0x6e,
0x20,0x67,0x5f,0x64,0x61,0x67,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x2c,0x75, 0x67,0x20,0x74,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x2c,0x76,0x6f,0x6c,0x61,0x74,0x69,0x6c,
0x6c,0x6f,0x6e,0x67,0x20,0x74,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x2c,0x76,0x6f,0x6c,0x61, 0x65,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x0a,0x7b,0x0a,0x5f,0x5f,0x6c,0x6f,
0x74,0x69,0x6c,0x65,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x0a,0x7b,0x0a,0x5f, 0x63,0x61,0x6c,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x48,0x41,0x53,0x48,0x45,0x53,0x5f,0x50,0x45,0x52,0x5f,0x47,
0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x48,0x41,0x53,0x48,0x45,0x53,0x5f,0x50,0x45, 0x52,0x4f,0x55,0x50,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67,0x5b,0x50,0x52,
0x52,0x5f,0x47,0x52,0x4f,0x55,0x50,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67, 0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x6f,0x6e,
0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, 0x73,0x74,0x20,0x6c,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x6c,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33, 0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x67,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,
0x32,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x67,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a, 0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x26,0x28,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x26,0x28,0x50,0x52,0x4f,0x47, 0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2d,0x31,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x72,0x6f,0x75,0x70,0x5f,
0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2d,0x31,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x72,0x6f, 0x69,0x64,0x3d,0x6c,0x69,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,
0x75,0x70,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69, 0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x3d,0x6c,0x69,0x64,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20,
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x3d,0x6c,0x69,0x64,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44, 0x77,0x6f,0x72,0x64,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x3b,0x20,0x77,0x6f,0x72,0x64,0x2b,0x3d,
0x53,0x3b,0x20,0x77,0x6f,0x72,0x64,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x3b,0x20,0x77,0x6f,0x72, 0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x29,0x0a,0x7b,0x0a,
0x64,0x2b,0x3d,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x29, 0x64,0x61,0x67,0x5f,0x74,0x20,0x6c,0x6f,0x61,0x64,0x3d,0x67,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,
0x0a,0x7b,0x0a,0x64,0x61,0x67,0x5f,0x74,0x20,0x6c,0x6f,0x61,0x64,0x3d,0x67,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57, 0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,
0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f, 0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x63,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2b,0x69,0x5d,0x3d,
0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x63,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2b, 0x6c,0x6f,0x61,0x64,0x2e,0x73,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,
0x69,0x5d,0x3d,0x6c,0x6f,0x61,0x64,0x2e,0x73,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41, 0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x61,0x73,0x68,0x5f,0x73,0x65,0x65,0x64,0x5b,0x32,0x5d,
0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x61,0x73,0x68,0x5f,0x73,0x65,0x65,0x64, 0x3b,0x20,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x3b,0x20,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,
0x5b,0x32,0x5d,0x3b,0x20,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x3b,0x20,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74, 0x61,0x74,0x65,0x32,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d,0x3b,0x20,0x0a,
0x20,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d, 0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x31,0x30,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,
0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x31,0x30,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74, 0x5d,0x3d,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x5b,0x69,0x5d,0x3b,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a,0x66,0x6f,0x72,
0x65,0x5b,0x69,0x5d,0x3d,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x5b,0x69,0x5d,0x3b,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a, 0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x30,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,
0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x30,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b, 0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x30,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30,
0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x30,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f, 0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,
0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20, 0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,
0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x23,0x70,0x72,0x61,0x67, 0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x3d,0x30,0x3b,0x20,0x68,0x3c,0x50,0x52,
0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x3d,0x30,0x3b,0x20,0x68, 0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6d,0x69,0x78,
0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, 0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x69,0x66,0x28,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x3d,0x68,0x29,0x20,0x7b,
0x6d,0x69,0x78,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x69,0x66,0x28,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x3d,0x68, 0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x30,0x5d,0x3d,0x73,0x74,0x61,0x74,
0x29,0x20,0x7b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x30,0x5d,0x3d,0x73, 0x65,0x32,0x5b,0x30,0x5d,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x31,
0x74,0x61,0x74,0x65,0x32,0x5b,0x30,0x5d,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32, 0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,
0x73,0x5b,0x31,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f, 0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x66,0x69,0x6c,0x6c,0x5f,0x6d,0x69,0x78,0x28,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,
0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x66,0x69,0x6c,0x6c,0x5f,0x6d,0x69,0x78,0x28,0x73,0x68,0x61,0x72,0x65,0x5b,0x67, 0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x2c,0x6d,0x69,0x78,0x29,0x3b,0x0a,0x23,0x70,0x72,0x61,
0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x2c,0x6d,0x69,0x78,0x29,0x3b,0x0a,0x23, 0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x3d,0x30,0x3b,0x20,
0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x3d, 0x6c,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x4e,0x54,0x5f,0x44,0x41,0x47,0x3b,0x20,0x6c,0x2b,0x2b,0x29,0x0a,0x70,0x72,0x6f,0x67,0x50,0x6f,0x77,0x4c,
0x30,0x3b,0x20,0x6c,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x4e,0x54,0x5f,0x44,0x41,0x47,0x3b,0x20,0x6c,0x2b,0x2b,0x29,0x0a,0x70,0x72,0x6f,0x67,0x50, 0x6f,0x6f,0x70,0x28,0x6c,0x2c,0x6d,0x69,0x78,0x2c,0x67,0x5f,0x64,0x61,0x67,0x2c,0x63,0x5f,0x64,0x61,0x67,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x30,0x5d,0x2e,0x75,
0x6f,0x77,0x4c,0x6f,0x6f,0x70,0x28,0x6c,0x2c,0x6d,0x69,0x78,0x2c,0x67,0x5f,0x64,0x61,0x67,0x2c,0x63,0x5f,0x64,0x61,0x67,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x30, 0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6d,0x69,0x78,
0x5d,0x2e,0x75,0x69,0x6e,0x74,0x36,0x34,0x73,0x2c,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, 0x5f,0x68,0x61,0x73,0x68,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,
0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67, 0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,
0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f, 0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x2c,0x6d,0x69,0x78,0x5b,0x69,0x5d,0x29,
0x57,0x5f,0x52,0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x2c,0x6d,0x69,0x78,0x5b, 0x3b,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,
0x69,0x5d,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28, 0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74,
0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75, 0x33,0x32,0x73,0x5b,0x69,0x5d,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,
0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x73,0x68,0x61,0x72, 0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x5d,0x3d,0x6d,0x69,0x78,0x5f,0x68,0x61,
0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x5d,0x3d,0x6d,0x69,0x78, 0x73,0x68,0x3b,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,
0x5f,0x68,0x61,0x73,0x68,0x3b,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e, 0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,
0x43,0x45,0x29,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30, 0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x64,0x69,0x67,0x65,0x73,
0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x64,0x69, 0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x20,0x25,0x20,0x38,0x5d,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,
0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x20,0x25,0x20,0x38,0x5d,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x67, 0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x68,0x3d,0x3d,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,
0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x68,0x3d,0x3d,0x6c,0x61,0x6e,0x65, 0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
0x5f,0x69,0x64,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36, 0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x30,
0x34,0x5f,0x74,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d, 0x78,0x30,0x7d,0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,
0x3d,0x7b,0x30,0x78,0x30,0x7d,0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29, 0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x38,0x3b,0x20,
0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d, 0x69,0x3c,0x31,0x36,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x2e,0x75,0x69,0x6e,0x74,0x33,
0x38,0x3b,0x20,0x69,0x3c,0x31,0x36,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x2e,0x75,0x69, 0x32,0x73,0x5b,0x69,0x2d,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x36,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b,
0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x2d,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x36,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b, 0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x36,0x5d,
0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d, 0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,
0x31,0x36,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f, 0x65,0x73,0x3d,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x73,0x74,0x61,0x74,0x65,0x5b,0x31,0x5d,0x3c,0x3c,0x33,0x32,0x7c,0x73,0x74,0x61,0x74,0x65,0x5b,
0x74,0x20,0x72,0x65,0x73,0x3d,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x73,0x74,0x61,0x74,0x65,0x5b,0x31,0x5d,0x3c,0x3c,0x33,0x32,0x7c,0x73,0x74,0x61, 0x30,0x5d,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x3d,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x61,0x73,0x5f,0x75,0x63,0x68,0x61,0x72,0x38,0x28,0x72,0x65,
0x74,0x65,0x5b,0x30,0x5d,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x3d,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x61,0x73,0x5f,0x75,0x63,0x68,0x61,0x72,0x38, 0x73,0x29,0x2e,0x73,0x37,0x36,0x35,0x34,0x33,0x32,0x31,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x3c,0x3d,0x74,0x61,0x72,0x67,
0x28,0x72,0x65,0x73,0x29,0x2e,0x73,0x37,0x36,0x35,0x34,0x33,0x32,0x31,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x3c,0x3d,0x74, 0x65,0x74,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6b,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x72,0x65,
0x61,0x72,0x67,0x65,0x74,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6b,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63, 0x73,0x75,0x6c,0x74,0x73,0x29,0x2b,0x31,0x3b,0x0a,0x69,0x66,0x28,0x6b,0x3c,0x3d,0x31,0x35,0x29,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x5b,0x6b,0x5d,0x3d,0x67,
0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x2b,0x31,0x3b,0x0a,0x69,0x66,0x28,0x6b,0x3c,0x3d,0x31,0x35,0x29,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x5b,0x6b, 0x69,0x64,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
}; };
} // namespace xmrig } // namespace xmrig

View file

@ -7,7 +7,7 @@ static char kawpow_dag_cl[5990] = {
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f, 0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69, 0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55, 0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55,
0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x31,0x32,0x38,0x0a,0x23,0x65, 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x32,0x35,0x36,0x0a,0x23,0x65,
0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f, 0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,
0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20, 0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,
0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20, 0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20,

View file

@ -40,7 +40,20 @@ bool ocl_generic_kawpow_generator(const OclDevice &device, const Algorithm &algo
return false; return false;
} }
threads.add(OclThread(device.index(), device.computeUnits() * 262144, 1)); bool isNavi = false;
switch (device.type()) {
case OclDevice::Navi_10:
case OclDevice::Navi_12:
case OclDevice::Navi_14:
isNavi = true;
break;
default:
break;
}
threads.add(OclThread(device.index(), device.computeUnits() * 262144, isNavi ? 128 : 256, 1));
return true; return true;
} }

View file

@ -47,9 +47,19 @@ namespace xmrig {
OclKawPowRunner::OclKawPowRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data) OclKawPowRunner::OclKawPowRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
{ {
switch (data.thread.worksize())
{
case 64:
case 128:
case 256:
case 512:
m_workGroupSize = data.thread.worksize();
break;
}
if (data.device.vendorId() == OclVendor::OCL_VENDOR_NVIDIA) { if (data.device.vendorId() == OclVendor::OCL_VENDOR_NVIDIA) {
m_options += " -DPLATFORM=OPENCL_PLATFORM_NVIDIA"; m_options += " -DPLATFORM=OPENCL_PLATFORM_NVIDIA";
m_workGroupSize = 32; m_dagWorkGroupSize = 32;
} }
} }
@ -69,9 +79,9 @@ OclKawPowRunner::~OclKawPowRunner()
void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput) void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput)
{ {
const size_t local_work_size = 128; const size_t local_work_size = m_workGroupSize;
const size_t global_work_offset = nonce; const size_t global_work_offset = nonce;
const size_t global_work_size = m_intensity - (m_intensity % local_work_size); const size_t global_work_size = m_intensity - (m_intensity % m_workGroupSize);
enqueueWriteBuffer(m_input, CL_FALSE, 0, 40, m_blob); enqueueWriteBuffer(m_input, CL_FALSE, 0, 40, m_blob);
@ -101,7 +111,7 @@ void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput)
void OclKawPowRunner::set(const Job &job, uint8_t *blob) void OclKawPowRunner::set(const Job &job, uint8_t *blob)
{ {
m_blockHeight = static_cast<uint32_t>(job.height()); m_blockHeight = static_cast<uint32_t>(job.height());
m_searchProgram = OclKawPow::get(*this, m_blockHeight); m_searchProgram = OclKawPow::get(*this, m_blockHeight, m_workGroupSize);
m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search"); m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search");
const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH; const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH;
@ -138,11 +148,11 @@ void OclKawPowRunner::set(const Job &job, uint8_t *blob)
const uint32_t dag_words = dag_size / sizeof(node); const uint32_t dag_words = dag_size / sizeof(node);
m_calculateDagKernel->setArgs(0, m_lightCache, m_dag, dag_words, m_lightCacheSize / sizeof(node)); m_calculateDagKernel->setArgs(0, m_lightCache, m_dag, dag_words, m_lightCacheSize / sizeof(node));
constexpr uint32_t N = 1 << 18; constexpr uint32_t N = 1 << 20;
for (uint32_t start = 0; start < dag_words; start += N) { for (uint32_t start = 0; start < dag_words; start += N) {
m_calculateDagKernel->setArg(0, sizeof(start), &start); m_calculateDagKernel->setArg(0, sizeof(start), &start);
m_calculateDagKernel->enqueue(m_queue, N, m_workGroupSize); m_calculateDagKernel->enqueue(m_queue, N, m_dagWorkGroupSize);
} }
OclLib::finish(m_queue); OclLib::finish(m_queue);

View file

@ -52,7 +52,7 @@ protected:
void init() override; void init() override;
private: private:
uint8_t* m_blob; uint8_t* m_blob = nullptr;
uint32_t m_blockHeight = 0; uint32_t m_blockHeight = 0;
uint32_t m_epoch = 0xFFFFFFFFUL; uint32_t m_epoch = 0xFFFFFFFFUL;
@ -69,7 +69,8 @@ private:
cl_program m_searchProgram = nullptr; cl_program m_searchProgram = nullptr;
cl_kernel m_searchKernel = nullptr; cl_kernel m_searchKernel = nullptr;
size_t m_workGroupSize = 64; size_t m_workGroupSize = 256;
size_t m_dagWorkGroupSize = 64;
}; };

View file

@ -55,17 +55,18 @@ namespace xmrig {
class KawPowCacheEntry class KawPowCacheEntry
{ {
public: public:
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program) : inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) :
program(program), program(program),
m_algo(algo), m_algo(algo),
m_index(index), m_index(index),
m_period(period) m_period(period),
m_worksize(worksize)
{} {}
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; } inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
inline bool match(const Algorithm &algo, uint64_t period, uint32_t index) const { return m_algo == algo && m_period == period && m_index == index; } inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; }
inline bool match(const IOclRunner &runner, uint64_t period) const { return match(runner.algorithm(), period, runner.deviceIndex()); } inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); }
inline void release() { OclLib::release(program); } inline void release() { OclLib::release(program); }
cl_program program; cl_program program;
@ -73,6 +74,7 @@ private:
Algorithm m_algo; Algorithm m_algo;
uint32_t m_index; uint32_t m_index;
uint64_t m_period; uint64_t m_period;
uint32_t m_worksize;
}; };
@ -81,15 +83,15 @@ class KawPowCache
public: public:
KawPowCache() = default; KawPowCache() = default;
inline cl_program search(const IOclRunner &runner, uint64_t period) { return search(runner.algorithm(), period, runner.deviceIndex()); } inline cl_program search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t index) inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
{ {
std::lock_guard<std::mutex> lock(m_mutex); std::lock_guard<std::mutex> lock(m_mutex);
for (const auto &entry : m_data) { for (const auto &entry : m_data) {
if (entry.match(algo, period, index)) { if (entry.match(algo, period, worksize, index)) {
return entry.program; return entry.program;
} }
} }
@ -98,9 +100,9 @@ public:
} }
void add(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program) void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program)
{ {
if (search(algo, period, index)) { if (search(algo, period, worksize, index)) {
OclLib::release(program); OclLib::release(program);
return; return;
} }
@ -108,7 +110,7 @@ public:
std::lock_guard<std::mutex> lock(m_mutex); std::lock_guard<std::mutex> lock(m_mutex);
gc(period); gc(period);
m_data.emplace_back(algo, period, index, program); m_data.emplace_back(algo, period, worksize, index, program);
} }
@ -158,13 +160,13 @@ static KawPowCache cache;
class KawPowBuilder class KawPowBuilder
{ {
public: public:
cl_program build(const IOclRunner &runner, uint64_t period) cl_program build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
{ {
std::lock_guard<std::mutex> lock(m_mutex); std::lock_guard<std::mutex> lock(m_mutex);
const uint64_t ts = Chrono::steadyMSecs(); const uint64_t ts = Chrono::steadyMSecs();
cl_program program = cache.search(runner, period); cl_program program = cache.search(runner, period, worksize);
if (program) { if (program) {
return program; return program;
} }
@ -185,6 +187,10 @@ public:
const uint64_t dag_elements = dag_sizes[epoch] / 256; const uint64_t dag_elements = dag_sizes[epoch] / 256;
options += std::to_string(dag_elements); options += std::to_string(dag_elements);
options += " -DGROUP_SIZE=";
options += std::to_string(worksize);
options += runner.buildOptions(); options += runner.buildOptions();
if (OclLib::buildProgram(program, 1, &device, options.c_str()) != CL_SUCCESS) { if (OclLib::buildProgram(program, 1, &device, options.c_str()) != CL_SUCCESS) {
@ -196,7 +202,7 @@ public:
LOG_INFO("KawPow program for period %" PRIu64 " compiled. (%" PRIu64 "ms)", period, Chrono::steadyMSecs() - ts); LOG_INFO("KawPow program for period %" PRIu64 " compiled. (%" PRIu64 "ms)", period, Chrono::steadyMSecs() - ts);
cache.add(runner.algorithm(), period, runner.deviceIndex(), program); cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program);
return program; return program;
} }
@ -362,39 +368,41 @@ private:
class KawPowBaton : public Baton<uv_work_t> class KawPowBaton : public Baton<uv_work_t>
{ {
public: public:
inline KawPowBaton(const IOclRunner &runner, uint64_t period) : inline KawPowBaton(const IOclRunner &runner, uint64_t period, uint32_t worksize) :
runner(runner), runner(runner),
period(period) period(period),
worksize(worksize)
{} {}
const IOclRunner &runner; const IOclRunner &runner;
const uint64_t period; const uint64_t period;
const uint32_t worksize;
}; };
static KawPowBuilder builder; static KawPowBuilder builder;
cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height) cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
{ {
const uint64_t period = height / KPHash::PERIOD_LENGTH; const uint64_t period = height / KPHash::PERIOD_LENGTH;
KawPowBaton* baton = new KawPowBaton(runner, period + 1); KawPowBaton* baton = new KawPowBaton(runner, period + 1, worksize);
uv_queue_work(uv_default_loop(), &baton->req, uv_queue_work(uv_default_loop(), &baton->req,
[](uv_work_t *req) { [](uv_work_t *req) {
KawPowBaton* baton = static_cast<KawPowBaton*>(req->data); KawPowBaton* baton = static_cast<KawPowBaton*>(req->data);
builder.build(baton->runner, baton->period); builder.build(baton->runner, baton->period, baton->worksize);
}, },
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); } [](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
); );
cl_program program = cache.search(runner, period); cl_program program = cache.search(runner, period, worksize);
if (program) { if (program) {
return program; return program;
} }
return builder.build(runner, period); return builder.build(runner, period, worksize);
} }

View file

@ -42,7 +42,7 @@ class IOclRunner;
class OclKawPow class OclKawPow
{ {
public: public:
static cl_program get(const IOclRunner &runner, uint64_t height); static cl_program get(const IOclRunner &runner, uint64_t height, uint32_t worksize);
static void clear(); static void clear();
}; };