diff --git a/src/backend/opencl/OclThread.cpp b/src/backend/opencl/OclThread.cpp index 087b2cf7e..77f6ef11b 100644 --- a/src/backend/opencl/OclThread.cpp +++ b/src/backend/opencl/OclThread.cpp @@ -56,7 +56,7 @@ xmrig::OclThread::OclThread(const rapidjson::Value &value) } m_index = Json::getUint(value, kIndex); - m_worksize = std::max(std::min(Json::getUint(value, kWorksize), 128u), 1u); + m_worksize = std::max(std::min(Json::getUint(value, kWorksize), 512u), 1u); m_unrollFactor = std::max(std::min(Json::getUint(value, kUnroll, m_unrollFactor), 128u), 1u); setIntensity(Json::getUint(value, kIntensity)); @@ -151,7 +151,7 @@ rapidjson::Value xmrig::OclThread::toJSON(rapidjson::Document &doc) const out.AddMember(StringRef(kDatasetHost), isDatasetHost(), allocator); # endif } - else if (!m_fields.test(ASTROBWT_FIELDS)) { + else if (!m_fields.test(ASTROBWT_FIELDS) && !m_fields.test(KAWPOW_FIELDS)) { out.AddMember(StringRef(kUnroll), unrollFactor(), allocator); } diff --git a/src/backend/opencl/OclThread.h b/src/backend/opencl/OclThread.h index 628e69b9a..0c3f03e23 100644 --- a/src/backend/opencl/OclThread.h +++ b/src/backend/opencl/OclThread.h @@ -67,7 +67,7 @@ public: } # endif -# if defined XMRIG_ALGO_ASTROBWT || defined XMRIG_ALGO_KAWPOW +# ifdef XMRIG_ALGO_ASTROBWT OclThread(uint32_t index, uint32_t intensity, uint32_t threads) : m_fields(4), m_threads(threads, -1), @@ -81,6 +81,20 @@ public: } # endif +# ifdef XMRIG_ALGO_KAWPOW + OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t threads) : + m_fields(8), + m_threads(threads, -1), + m_index(index), + m_memChunk(0), + m_stridedIndex(0), + m_unrollFactor(1), + m_worksize(worksize) + { + setIntensity(intensity); + } +# endif + OclThread(const rapidjson::Value &value); inline bool isAsm() const { return m_gcnAsm; } @@ -106,6 +120,7 @@ private: STRIDED_INDEX_FIELD, RANDOMX_FIELDS, ASTROBWT_FIELDS, + KAWPOW_FIELDS, FIELD_MAX }; diff --git a/src/backend/opencl/cl/kawpow/defs.h b/src/backend/opencl/cl/kawpow/defs.h index 0664f486d..bd8985d60 100644 --- a/src/backend/opencl/cl/kawpow/defs.h +++ b/src/backend/opencl/cl/kawpow/defs.h @@ -3,7 +3,7 @@ #endif #ifndef GROUP_SIZE -#define GROUP_SIZE 128 +#define GROUP_SIZE 256 #endif #define GROUP_SHARE (GROUP_SIZE / 16) diff --git a/src/backend/opencl/cl/kawpow/kawpow.cl b/src/backend/opencl/cl/kawpow/kawpow.cl index f9bc9e466..b726240f2 100644 --- a/src/backend/opencl/cl/kawpow/kawpow.cl +++ b/src/backend/opencl/cl/kawpow/kawpow.cl @@ -6,7 +6,7 @@ inline void progPowLoop(const uint32_t loop, volatile uint32_t mix_arg[PROGPOW_REGS], __global const dag_t *g_dag, __local const uint32_t c_dag[PROGPOW_CACHE_WORDS], - __local uint64_t share[GROUP_SHARE], + __local uint32_t share[GROUP_SHARE], const bool hack_false) { dag_t data_dag; @@ -166,7 +166,6 @@ void fill_mix(local uint32_t* seed, uint32_t lane_id, uint32_t* mix) typedef struct { uint32_t uint32s[PROGPOW_LANES]; - uint64_t uint64s[PROGPOW_LANES / 2]; } shuffle_t; typedef struct @@ -247,7 +246,7 @@ __kernel void progpow_search(__global dag_t const* g_dag, __global uint* job_blo #pragma unroll 1 for (uint32_t l = 0; l < PROGPOW_CNT_DAG; l++) - progPowLoop(l, mix, g_dag, c_dag, share[0].uint64s, hack_false); + progPowLoop(l, mix, g_dag, c_dag, share[0].uint32s, hack_false); // Reduce mix data to a per-lane 32-bit digest uint32_t mix_hash = FNV_OFFSET_BASIS; diff --git a/src/backend/opencl/cl/kawpow/kawpow_cl.h b/src/backend/opencl/cl/kawpow/kawpow_cl.h index 0aabd16d6..64ef8b5bb 100644 --- a/src/backend/opencl/cl/kawpow/kawpow_cl.h +++ b/src/backend/opencl/cl/kawpow/kawpow_cl.h @@ -2,12 +2,12 @@ namespace xmrig { -static char kawpow_cl[6348] = { +static char kawpow_cl[6313] = { 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70, 0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f, 0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69, 0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55, - 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x31,0x32,0x38,0x0a,0x23,0x65, + 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x32,0x35,0x36,0x0a,0x23,0x65, 0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f, 0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20, 0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20, @@ -39,7 +39,7 @@ static char kawpow_cl[6348] = { 0x47,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x2a,0x67,0x5f,0x64,0x61,0x67, 0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67,0x5b,0x50, 0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x2c,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e, - 0x74,0x36,0x34,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x5d,0x2c,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20, + 0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x5d,0x2c,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20, 0x62,0x6f,0x6f,0x6c,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x0a,0x7b,0x0a,0x64,0x61,0x67,0x5f,0x74,0x20,0x64,0x61,0x74,0x61,0x5f,0x64,0x61, 0x67,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6f,0x66,0x66,0x73,0x65,0x74,0x2c,0x64,0x61,0x74,0x61,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f, 0x74,0x20,0x6d,0x69,0x78,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30, @@ -128,80 +128,79 @@ static char kawpow_cl[6348] = { 0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f, 0x57,0x5f,0x52,0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x6d,0x69,0x78,0x5b,0x69,0x5d,0x3d,0x6b,0x69,0x73,0x73,0x39,0x39,0x28,0x26,0x73,0x74,0x29,0x3b, 0x0a,0x7d,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75,0x69,0x6e, - 0x74,0x33,0x32,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x75,0x69, - 0x6e,0x74,0x36,0x34,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2f,0x32,0x5d,0x3b,0x0a,0x7d,0x20,0x73,0x68,0x75,0x66,0x66,0x6c, - 0x65,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75, - 0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68, - 0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x23,0x69,0x66,0x20,0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x20,0x21,0x3d,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x5f, - 0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x5f,0x4e,0x56,0x49,0x44,0x49,0x41,0x20,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28, - 0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2c, - 0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x72,0x6f,0x67, - 0x70,0x6f,0x77,0x5f,0x73,0x65,0x61,0x72,0x63,0x68,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x2a, - 0x20,0x67,0x5f,0x64,0x61,0x67,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x2c,0x75, - 0x6c,0x6f,0x6e,0x67,0x20,0x74,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x2c,0x76,0x6f,0x6c,0x61, - 0x74,0x69,0x6c,0x65,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x0a,0x7b,0x0a,0x5f, - 0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x48,0x41,0x53,0x48,0x45,0x53,0x5f,0x50,0x45, - 0x52,0x5f,0x47,0x52,0x4f,0x55,0x50,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67, - 0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, - 0x63,0x6f,0x6e,0x73,0x74,0x20,0x6c,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33, - 0x32,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x67,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a, - 0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x26,0x28,0x50,0x52,0x4f,0x47, - 0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2d,0x31,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x72,0x6f, - 0x75,0x70,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69, - 0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x3d,0x6c,0x69,0x64,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44, - 0x53,0x3b,0x20,0x77,0x6f,0x72,0x64,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x3b,0x20,0x77,0x6f,0x72, - 0x64,0x2b,0x3d,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x29, - 0x0a,0x7b,0x0a,0x64,0x61,0x67,0x5f,0x74,0x20,0x6c,0x6f,0x61,0x64,0x3d,0x67,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57, - 0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f, - 0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x63,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2b, - 0x69,0x5d,0x3d,0x6c,0x6f,0x61,0x64,0x2e,0x73,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41, - 0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x61,0x73,0x68,0x5f,0x73,0x65,0x65,0x64, - 0x5b,0x32,0x5d,0x3b,0x20,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x3b,0x20,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74, - 0x20,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d, - 0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x31,0x30,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74, - 0x65,0x5b,0x69,0x5d,0x3d,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x5b,0x69,0x5d,0x3b,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a, - 0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x30,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b, - 0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x30,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f, - 0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20, - 0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x23,0x70,0x72,0x61,0x67, - 0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x3d,0x30,0x3b,0x20,0x68, - 0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, - 0x6d,0x69,0x78,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x69,0x66,0x28,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x3d,0x68, - 0x29,0x20,0x7b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x30,0x5d,0x3d,0x73, - 0x74,0x61,0x74,0x65,0x32,0x5b,0x30,0x5d,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32, - 0x73,0x5b,0x31,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f, - 0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x66,0x69,0x6c,0x6c,0x5f,0x6d,0x69,0x78,0x28,0x73,0x68,0x61,0x72,0x65,0x5b,0x67, - 0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x2c,0x6d,0x69,0x78,0x29,0x3b,0x0a,0x23, - 0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x3d, - 0x30,0x3b,0x20,0x6c,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x4e,0x54,0x5f,0x44,0x41,0x47,0x3b,0x20,0x6c,0x2b,0x2b,0x29,0x0a,0x70,0x72,0x6f,0x67,0x50, - 0x6f,0x77,0x4c,0x6f,0x6f,0x70,0x28,0x6c,0x2c,0x6d,0x69,0x78,0x2c,0x67,0x5f,0x64,0x61,0x67,0x2c,0x63,0x5f,0x64,0x61,0x67,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x30, - 0x5d,0x2e,0x75,0x69,0x6e,0x74,0x36,0x34,0x73,0x2c,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20, - 0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67, - 0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f, - 0x57,0x5f,0x52,0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x2c,0x6d,0x69,0x78,0x5b, - 0x69,0x5d,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28, - 0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75, - 0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x73,0x68,0x61,0x72, - 0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x5d,0x3d,0x6d,0x69,0x78, - 0x5f,0x68,0x61,0x73,0x68,0x3b,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e, - 0x43,0x45,0x29,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30, - 0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x64,0x69, - 0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x20,0x25,0x20,0x38,0x5d,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x67, - 0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x68,0x3d,0x3d,0x6c,0x61,0x6e,0x65, - 0x5f,0x69,0x64,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36, - 0x34,0x5f,0x74,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d, - 0x3d,0x7b,0x30,0x78,0x30,0x7d,0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29, - 0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d, - 0x38,0x3b,0x20,0x69,0x3c,0x31,0x36,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x2e,0x75,0x69, - 0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x2d,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x36,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b, - 0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d, - 0x31,0x36,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f, - 0x74,0x20,0x72,0x65,0x73,0x3d,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x73,0x74,0x61,0x74,0x65,0x5b,0x31,0x5d,0x3c,0x3c,0x33,0x32,0x7c,0x73,0x74,0x61, - 0x74,0x65,0x5b,0x30,0x5d,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x3d,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x61,0x73,0x5f,0x75,0x63,0x68,0x61,0x72,0x38, - 0x28,0x72,0x65,0x73,0x29,0x2e,0x73,0x37,0x36,0x35,0x34,0x33,0x32,0x31,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x3c,0x3d,0x74, - 0x61,0x72,0x67,0x65,0x74,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6b,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63, - 0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x2b,0x31,0x3b,0x0a,0x69,0x66,0x28,0x6b,0x3c,0x3d,0x31,0x35,0x29,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x5b,0x6b, - 0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00 + 0x74,0x33,0x32,0x73,0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x5d,0x3b,0x0a,0x7d,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74, + 0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x73,0x74,0x72,0x75,0x63,0x74,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x75,0x69,0x6e,0x74, + 0x33,0x32,0x73,0x5b,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x29,0x5d,0x3b,0x0a,0x7d,0x20,0x68,0x61,0x73,0x68, + 0x33,0x32,0x5f,0x74,0x3b,0x0a,0x23,0x69,0x66,0x20,0x50,0x4c,0x41,0x54,0x46,0x4f,0x52,0x4d,0x20,0x21,0x3d,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x5f,0x50,0x4c,0x41, + 0x54,0x46,0x4f,0x52,0x4d,0x5f,0x4e,0x56,0x49,0x44,0x49,0x41,0x20,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71, + 0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2c,0x31,0x2c,0x31, + 0x29,0x29,0x29,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x70,0x72,0x6f,0x67,0x70,0x6f,0x77, + 0x5f,0x73,0x65,0x61,0x72,0x63,0x68,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x64,0x61,0x67,0x5f,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x2a,0x20,0x67,0x5f, + 0x64,0x61,0x67,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x2c,0x75,0x6c,0x6f,0x6e, + 0x67,0x20,0x74,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x2c,0x76,0x6f,0x6c,0x61,0x74,0x69,0x6c, + 0x65,0x20,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x2a,0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x29,0x0a,0x7b,0x0a,0x5f,0x5f,0x6c,0x6f, + 0x63,0x61,0x6c,0x20,0x73,0x68,0x75,0x66,0x66,0x6c,0x65,0x5f,0x74,0x20,0x73,0x68,0x61,0x72,0x65,0x5b,0x48,0x41,0x53,0x48,0x45,0x53,0x5f,0x50,0x45,0x52,0x5f,0x47, + 0x52,0x4f,0x55,0x50,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x5f,0x64,0x61,0x67,0x5b,0x50,0x52, + 0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x63,0x6f,0x6e, + 0x73,0x74,0x20,0x6c,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74, + 0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x67,0x69,0x64,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e, + 0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x6c,0x69,0x64,0x26,0x28,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57, + 0x5f,0x4c,0x41,0x4e,0x45,0x53,0x2d,0x31,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x72,0x6f,0x75,0x70,0x5f, + 0x69,0x64,0x3d,0x6c,0x69,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33, + 0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x3d,0x6c,0x69,0x64,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20, + 0x77,0x6f,0x72,0x64,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x41,0x43,0x48,0x45,0x5f,0x57,0x4f,0x52,0x44,0x53,0x3b,0x20,0x77,0x6f,0x72,0x64,0x2b,0x3d, + 0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x2a,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x29,0x0a,0x7b,0x0a, + 0x64,0x61,0x67,0x5f,0x74,0x20,0x6c,0x6f,0x61,0x64,0x3d,0x67,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2f,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x44,0x41, + 0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f, + 0x57,0x5f,0x44,0x41,0x47,0x5f,0x4c,0x4f,0x41,0x44,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x63,0x5f,0x64,0x61,0x67,0x5b,0x77,0x6f,0x72,0x64,0x2b,0x69,0x5d,0x3d, + 0x6c,0x6f,0x61,0x64,0x2e,0x73,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d, + 0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x61,0x73,0x68,0x5f,0x73,0x65,0x65,0x64,0x5b,0x32,0x5d, + 0x3b,0x20,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x3b,0x20,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74, + 0x61,0x74,0x65,0x32,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d,0x3b,0x20,0x0a, + 0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x31,0x30,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69, + 0x5d,0x3d,0x6a,0x6f,0x62,0x5f,0x62,0x6c,0x6f,0x62,0x5b,0x69,0x5d,0x3b,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x67,0x69,0x64,0x3b,0x0a,0x66,0x6f,0x72, + 0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x30,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d, + 0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x30,0x5d,0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30, + 0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b, + 0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20, + 0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x68,0x3d,0x30,0x3b,0x20,0x68,0x3c,0x50,0x52, + 0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x68,0x2b,0x2b,0x29,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6d,0x69,0x78, + 0x5b,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52,0x45,0x47,0x53,0x5d,0x3b,0x0a,0x69,0x66,0x28,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x3d,0x3d,0x68,0x29,0x20,0x7b, + 0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x30,0x5d,0x3d,0x73,0x74,0x61,0x74, + 0x65,0x32,0x5b,0x30,0x5d,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x31, + 0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c, + 0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29,0x3b,0x0a,0x66,0x69,0x6c,0x6c,0x5f,0x6d,0x69,0x78,0x28,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75, + 0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x2c,0x6d,0x69,0x78,0x29,0x3b,0x0a,0x23,0x70,0x72,0x61, + 0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6c,0x3d,0x30,0x3b,0x20, + 0x6c,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x43,0x4e,0x54,0x5f,0x44,0x41,0x47,0x3b,0x20,0x6c,0x2b,0x2b,0x29,0x0a,0x70,0x72,0x6f,0x67,0x50,0x6f,0x77,0x4c, + 0x6f,0x6f,0x70,0x28,0x6c,0x2c,0x6d,0x69,0x78,0x2c,0x67,0x5f,0x64,0x61,0x67,0x2c,0x63,0x5f,0x64,0x61,0x67,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x30,0x5d,0x2e,0x75, + 0x69,0x6e,0x74,0x33,0x32,0x73,0x2c,0x68,0x61,0x63,0x6b,0x5f,0x66,0x61,0x6c,0x73,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6d,0x69,0x78, + 0x5f,0x68,0x61,0x73,0x68,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20, + 0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x52, + 0x45,0x47,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x6d,0x69,0x78,0x5f,0x68,0x61,0x73,0x68,0x2c,0x6d,0x69,0x78,0x5b,0x69,0x5d,0x29, + 0x3b,0x0a,0x68,0x61,0x73,0x68,0x33,0x32,0x5f,0x74,0x20,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74, + 0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74, + 0x33,0x32,0x73,0x5b,0x69,0x5d,0x3d,0x46,0x4e,0x56,0x5f,0x4f,0x46,0x46,0x53,0x45,0x54,0x5f,0x42,0x41,0x53,0x49,0x53,0x3b,0x0a,0x73,0x68,0x61,0x72,0x65,0x5b,0x67, + 0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64,0x5d,0x3d,0x6d,0x69,0x78,0x5f,0x68,0x61, + 0x73,0x68,0x3b,0x0a,0x62,0x61,0x72,0x72,0x69,0x65,0x72,0x28,0x43,0x4c,0x4b,0x5f,0x4c,0x4f,0x43,0x41,0x4c,0x5f,0x4d,0x45,0x4d,0x5f,0x46,0x45,0x4e,0x43,0x45,0x29, + 0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69, + 0x3c,0x50,0x52,0x4f,0x47,0x50,0x4f,0x57,0x5f,0x4c,0x41,0x4e,0x45,0x53,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x66,0x6e,0x76,0x31,0x61,0x28,0x64,0x69,0x67,0x65,0x73, + 0x74,0x5f,0x74,0x65,0x6d,0x70,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x20,0x25,0x20,0x38,0x5d,0x2c,0x73,0x68,0x61,0x72,0x65,0x5b,0x67,0x72,0x6f,0x75, + 0x70,0x5f,0x69,0x64,0x5d,0x2e,0x75,0x69,0x6e,0x74,0x33,0x32,0x73,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x68,0x3d,0x3d,0x6c,0x61,0x6e,0x65,0x5f,0x69,0x64, + 0x29,0x0a,0x64,0x69,0x67,0x65,0x73,0x74,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x5f,0x74,0x65,0x6d,0x70,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74, + 0x20,0x72,0x65,0x73,0x75,0x6c,0x74,0x3b,0x0a,0x7b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x74,0x61,0x74,0x65,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x30, + 0x78,0x30,0x7d,0x3b,0x20,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74, + 0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x32,0x5b,0x69,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x38,0x3b,0x20, + 0x69,0x3c,0x31,0x36,0x3b,0x20,0x69,0x2b,0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x64,0x69,0x67,0x65,0x73,0x74,0x2e,0x75,0x69,0x6e,0x74,0x33, + 0x32,0x73,0x5b,0x69,0x2d,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x31,0x36,0x3b,0x20,0x69,0x3c,0x32,0x35,0x3b,0x20,0x69,0x2b, + 0x2b,0x29,0x0a,0x73,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x3d,0x72,0x61,0x76,0x65,0x6e,0x63,0x6f,0x69,0x6e,0x5f,0x72,0x6e,0x64,0x63,0x5b,0x69,0x2d,0x31,0x36,0x5d, + 0x3b,0x0a,0x6b,0x65,0x63,0x63,0x61,0x6b,0x5f,0x66,0x38,0x30,0x30,0x28,0x73,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72, + 0x65,0x73,0x3d,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x73,0x74,0x61,0x74,0x65,0x5b,0x31,0x5d,0x3c,0x3c,0x33,0x32,0x7c,0x73,0x74,0x61,0x74,0x65,0x5b, + 0x30,0x5d,0x3b,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x3d,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x61,0x73,0x5f,0x75,0x63,0x68,0x61,0x72,0x38,0x28,0x72,0x65, + 0x73,0x29,0x2e,0x73,0x37,0x36,0x35,0x34,0x33,0x32,0x31,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x72,0x65,0x73,0x75,0x6c,0x74,0x3c,0x3d,0x74,0x61,0x72,0x67, + 0x65,0x74,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x6b,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x72,0x65, + 0x73,0x75,0x6c,0x74,0x73,0x29,0x2b,0x31,0x3b,0x0a,0x69,0x66,0x28,0x6b,0x3c,0x3d,0x31,0x35,0x29,0x0a,0x72,0x65,0x73,0x75,0x6c,0x74,0x73,0x5b,0x6b,0x5d,0x3d,0x67, + 0x69,0x64,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00 }; } // namespace xmrig diff --git a/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h b/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h index 26cc5ef2d..03edb8b62 100644 --- a/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h +++ b/src/backend/opencl/cl/kawpow/kawpow_dag_cl.h @@ -7,7 +7,7 @@ static char kawpow_dag_cl[5990] = { 0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f, 0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69, 0x65,0x72,0x73,0x20,0x3a,0x20,0x65,0x6e,0x61,0x62,0x6c,0x65,0x0a,0x23,0x65,0x6e,0x64,0x69,0x66,0x0a,0x23,0x69,0x66,0x6e,0x64,0x65,0x66,0x20,0x47,0x52,0x4f,0x55, - 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x31,0x32,0x38,0x0a,0x23,0x65, + 0x50,0x5f,0x53,0x49,0x5a,0x45,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x49,0x5a,0x45,0x20,0x32,0x35,0x36,0x0a,0x23,0x65, 0x6e,0x64,0x69,0x66,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x47,0x52,0x4f,0x55,0x50,0x5f,0x53,0x48,0x41,0x52,0x45,0x20,0x28,0x47,0x52,0x4f,0x55,0x50,0x5f, 0x53,0x49,0x5a,0x45,0x20,0x2f,0x20,0x31,0x36,0x29,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20, 0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x6c,0x6f,0x6e,0x67,0x20, diff --git a/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp b/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp index a03d6c2dd..5356a16b5 100644 --- a/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp +++ b/src/backend/opencl/generators/ocl_generic_kawpow_generator.cpp @@ -40,7 +40,20 @@ bool ocl_generic_kawpow_generator(const OclDevice &device, const Algorithm &algo return false; } - threads.add(OclThread(device.index(), device.computeUnits() * 262144, 1)); + bool isNavi = false; + + switch (device.type()) { + case OclDevice::Navi_10: + case OclDevice::Navi_12: + case OclDevice::Navi_14: + isNavi = true; + break; + + default: + break; + } + + threads.add(OclThread(device.index(), device.computeUnits() * 262144, isNavi ? 128 : 256, 1)); return true; } diff --git a/src/backend/opencl/runners/OclKawPowRunner.cpp b/src/backend/opencl/runners/OclKawPowRunner.cpp index 3dc49cace..131582766 100644 --- a/src/backend/opencl/runners/OclKawPowRunner.cpp +++ b/src/backend/opencl/runners/OclKawPowRunner.cpp @@ -46,9 +46,19 @@ namespace xmrig { OclKawPowRunner::OclKawPowRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data) { + switch (data.thread.worksize()) + { + case 64: + case 128: + case 256: + case 512: + m_workGroupSize = data.thread.worksize(); + break; + } + if (data.device.vendorId() == OclVendor::OCL_VENDOR_NVIDIA) { m_options += " -DPLATFORM=OPENCL_PLATFORM_NVIDIA"; - m_workGroupSize = 32; + m_dagWorkGroupSize = 32; } } @@ -68,9 +78,9 @@ OclKawPowRunner::~OclKawPowRunner() void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput) { - const size_t local_work_size = 128; + const size_t local_work_size = m_workGroupSize; const size_t global_work_offset = nonce; - const size_t global_work_size = m_intensity - (m_intensity % local_work_size); + const size_t global_work_size = m_intensity - (m_intensity % m_workGroupSize); enqueueWriteBuffer(m_input, CL_FALSE, 0, 40, m_blob); @@ -100,7 +110,7 @@ void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput) void OclKawPowRunner::set(const Job &job, uint8_t *blob) { m_blockHeight = static_cast(job.height()); - m_searchProgram = OclKawPow::get(*this, m_blockHeight); + m_searchProgram = OclKawPow::get(*this, m_blockHeight, m_workGroupSize); m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search"); const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH; @@ -137,11 +147,11 @@ void OclKawPowRunner::set(const Job &job, uint8_t *blob) const uint32_t dag_words = dag_size / sizeof(node); m_calculateDagKernel->setArgs(0, m_lightCache, m_dag, dag_words, m_lightCacheSize / sizeof(node)); - constexpr uint32_t N = 1 << 18; + constexpr uint32_t N = 1 << 20; for (uint32_t start = 0; start < dag_words; start += N) { m_calculateDagKernel->setArg(0, sizeof(start), &start); - m_calculateDagKernel->enqueue(m_queue, N, m_workGroupSize); + m_calculateDagKernel->enqueue(m_queue, N, m_dagWorkGroupSize); } OclLib::finish(m_queue); diff --git a/src/backend/opencl/runners/OclKawPowRunner.h b/src/backend/opencl/runners/OclKawPowRunner.h index 4d869a3e1..81b827073 100644 --- a/src/backend/opencl/runners/OclKawPowRunner.h +++ b/src/backend/opencl/runners/OclKawPowRunner.h @@ -52,7 +52,7 @@ protected: void init() override; private: - uint8_t* m_blob; + uint8_t* m_blob = nullptr; uint32_t m_blockHeight = 0; uint32_t m_epoch = 0xFFFFFFFFUL; @@ -69,7 +69,8 @@ private: cl_program m_searchProgram = nullptr; cl_kernel m_searchKernel = nullptr; - size_t m_workGroupSize = 64; + size_t m_workGroupSize = 256; + size_t m_dagWorkGroupSize = 64; }; diff --git a/src/backend/opencl/runners/tools/OclKawPow.cpp b/src/backend/opencl/runners/tools/OclKawPow.cpp index 99369a70d..8051f5293 100644 --- a/src/backend/opencl/runners/tools/OclKawPow.cpp +++ b/src/backend/opencl/runners/tools/OclKawPow.cpp @@ -55,17 +55,18 @@ namespace xmrig { class KawPowCacheEntry { public: - inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program) : + inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) : program(program), m_algo(algo), m_index(index), - m_period(period) + m_period(period), + m_worksize(worksize) {} - inline bool isExpired(uint64_t period) const { return m_period + 1 < period; } - inline bool match(const Algorithm &algo, uint64_t period, uint32_t index) const { return m_algo == algo && m_period == period && m_index == index; } - inline bool match(const IOclRunner &runner, uint64_t period) const { return match(runner.algorithm(), period, runner.deviceIndex()); } - inline void release() { OclLib::release(program); } + inline bool isExpired(uint64_t period) const { return m_period + 1 < period; } + inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; } + inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); } + inline void release() { OclLib::release(program); } cl_program program; @@ -73,6 +74,7 @@ private: Algorithm m_algo; uint32_t m_index; uint64_t m_period; + uint32_t m_worksize; }; @@ -81,15 +83,15 @@ class KawPowCache public: KawPowCache() = default; - inline cl_program search(const IOclRunner &runner, uint64_t period) { return search(runner.algorithm(), period, runner.deviceIndex()); } + inline cl_program search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); } - inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t index) + inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) { std::lock_guard lock(m_mutex); for (const auto &entry : m_data) { - if (entry.match(algo, period, index)) { + if (entry.match(algo, period, worksize, index)) { return entry.program; } } @@ -98,9 +100,9 @@ public: } - void add(const Algorithm &algo, uint64_t period, uint32_t index, cl_program program) + void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) { - if (search(algo, period, index)) { + if (search(algo, period, worksize, index)) { OclLib::release(program); return; } @@ -108,7 +110,7 @@ public: std::lock_guard lock(m_mutex); gc(period); - m_data.emplace_back(algo, period, index, program); + m_data.emplace_back(algo, period, worksize, index, program); } @@ -158,13 +160,13 @@ static KawPowCache cache; class KawPowBuilder { public: - cl_program build(const IOclRunner &runner, uint64_t period) + cl_program build(const IOclRunner &runner, uint64_t period, uint32_t worksize) { std::lock_guard lock(m_mutex); const uint64_t ts = Chrono::steadyMSecs(); - cl_program program = cache.search(runner, period); + cl_program program = cache.search(runner, period, worksize); if (program) { return program; } @@ -185,6 +187,10 @@ public: const uint64_t dag_elements = dag_sizes[epoch] / 256; options += std::to_string(dag_elements); + + options += " -DGROUP_SIZE="; + options += std::to_string(worksize); + options += runner.buildOptions(); if (OclLib::buildProgram(program, 1, &device, options.c_str()) != CL_SUCCESS) { @@ -196,7 +202,7 @@ public: LOG_INFO("KawPow program for period %" PRIu64 " compiled. (%" PRIu64 "ms)", period, Chrono::steadyMSecs() - ts); - cache.add(runner.algorithm(), period, runner.deviceIndex(), program); + cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program); return program; } @@ -362,39 +368,41 @@ private: class KawPowBaton : public Baton { public: - inline KawPowBaton(const IOclRunner &runner, uint64_t period) : + inline KawPowBaton(const IOclRunner &runner, uint64_t period, uint32_t worksize) : runner(runner), - period(period) + period(period), + worksize(worksize) {} const IOclRunner &runner; const uint64_t period; + const uint32_t worksize; }; static KawPowBuilder builder; -cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height) +cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize) { const uint64_t period = height / KPHash::PERIOD_LENGTH; - KawPowBaton* baton = new KawPowBaton(runner, period + 1); + KawPowBaton* baton = new KawPowBaton(runner, period + 1, worksize); uv_queue_work(uv_default_loop(), &baton->req, [](uv_work_t *req) { KawPowBaton* baton = static_cast(req->data); - builder.build(baton->runner, baton->period); + builder.build(baton->runner, baton->period, baton->worksize); }, [](uv_work_t *req, int) { delete static_cast(req->data); } ); - cl_program program = cache.search(runner, period); + cl_program program = cache.search(runner, period, worksize); if (program) { return program; } - return builder.build(runner, period); + return builder.build(runner, period, worksize); } diff --git a/src/backend/opencl/runners/tools/OclKawPow.h b/src/backend/opencl/runners/tools/OclKawPow.h index 53bb8b90a..9e07d70ce 100644 --- a/src/backend/opencl/runners/tools/OclKawPow.h +++ b/src/backend/opencl/runners/tools/OclKawPow.h @@ -42,7 +42,7 @@ class IOclRunner; class OclKawPow { public: - static cl_program get(const IOclRunner &runner, uint64_t height); + static cl_program get(const IOclRunner &runner, uint64_t height, uint32_t worksize); static void clear(); };