Restored all cn/0 based algorithms (cn/0, cn-lite/0, cn/xao, cn-heavy/0, cn-heavy/xhv)

This commit is contained in:
XMRig 2019-09-02 12:55:41 +07:00
parent fc4f43ac7f
commit 28d1eaf8da
5 changed files with 2618 additions and 2657 deletions

View file

@ -1,38 +1,31 @@
enum Algorithm { #define ALGO_CN_0 0
ALGO_INVALID = -1, #define ALGO_CN_1 1
ALGO_CN_0, // "cn/0" CryptoNight (original). #define ALGO_CN_2 2
ALGO_CN_1, // "cn/1" CryptoNight variant 1 also known as Monero7 and CryptoNightV7. #define ALGO_CN_R 3
ALGO_CN_2, // "cn/2" CryptoNight variant 2. #define ALGO_CN_FAST 4
ALGO_CN_R, // "cn/r" CryptoNightR (Monero's variant 4). #define ALGO_CN_HALF 5
ALGO_CN_FAST, // "cn/fast" CryptoNight variant 1 with half iterations. #define ALGO_CN_XAO 6
ALGO_CN_HALF, // "cn/half" CryptoNight variant 2 with half iterations (Masari/Torque). #define ALGO_CN_RTO 7
ALGO_CN_XAO, // "cn/xao" CryptoNight variant 0 (modified, Alloy only). #define ALGO_CN_RWZ 8
ALGO_CN_RTO, // "cn/rto" CryptoNight variant 1 (modified, Arto only). #define ALGO_CN_ZLS 9
ALGO_CN_RWZ, // "cn/rwz" CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft). #define ALGO_CN_DOUBLE 10
ALGO_CN_ZLS, // "cn/zls" CryptoNight variant 2 with 3/4 iterations (Zelerius). #define ALGO_CN_GPU 11
ALGO_CN_DOUBLE, // "cn/double" CryptoNight variant 2 with double iterations (X-CASH). #define ALGO_CN_LITE_0 12
ALGO_CN_GPU, // "cn/gpu" CryptoNight-GPU (Ryo). #define ALGO_CN_LITE_1 13
ALGO_CN_LITE_0, // "cn-lite/0" CryptoNight-Lite variant 0. #define ALGO_CN_HEAVY_0 14
ALGO_CN_LITE_1, // "cn-lite/1" CryptoNight-Lite variant 1. #define ALGO_CN_HEAVY_TUBE 15
ALGO_CN_HEAVY_0, // "cn-heavy/0" CryptoNight-Heavy (4 MB). #define ALGO_CN_HEAVY_XHV 16
ALGO_CN_HEAVY_TUBE, // "cn-heavy/tube" CryptoNight-Heavy (modified, TUBE only). #define ALGO_CN_PICO_0 17
ALGO_CN_HEAVY_XHV, // "cn-heavy/xhv" CryptoNight-Heavy (modified, Haven Protocol only). #define ALGO_RX_0 18
ALGO_CN_PICO_0, // "cn-pico" CryptoNight Turtle (TRTL) #define ALGO_RX_WOW 19
ALGO_RX_0, // "rx/0" RandomX (reference configuration). #define ALGO_RX_LOKI 20
ALGO_RX_WOW, // "rx/wow" RandomWOW (Wownero). #define ALGO_AR2_CHUKWA 21
ALGO_RX_LOKI, // "rx/loki" RandomXL (Loki). #define ALGO_AR2_WRKZ 22
ALGO_AR2_CHUKWA, // "argon2/chukwa" Argon2id (Chukwa).
ALGO_AR2_WRKZ, // "argon2/wrkz" Argon2id (WRKZ)
ALGO_MAX
};
#define FAMILY_UNKNOWN 0
enum AlgorithmFamily { #define FAMILY_CN 1
FAMILY_UNKNOWN, #define FAMILY_CN_LITE 2
FAMILY_CN, #define FAMILY_CN_HEAVY 3
FAMILY_CN_LITE, #define FAMILY_CN_PICO 4
FAMILY_CN_HEAVY, #define FAMILY_RANDOM_X 5
FAMILY_CN_PICO, #define FAMILY_ARGON2 6
FAMILY_RANDOM_X,
FAMILY_ARGON2
};

View file

@ -51,7 +51,7 @@
#if (STRIDED_INDEX == 0) #if (STRIDED_INDEX == 0)
# define IDX(x) (x) # define IDX(x) (x)
#elif (STRIDED_INDEX == 1) #elif (STRIDED_INDEX == 1)
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
# define IDX(x) ((x) * WORKSIZE) # define IDX(x) ((x) * WORKSIZE)
# else # else
# define IDX(x) mul24((x), Threads) # define IDX(x) mul24((x), Threads)
@ -100,7 +100,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
# if (STRIDED_INDEX == 0) # if (STRIDED_INDEX == 0)
Scratchpad += gIdx * (MEMORY >> 4); Scratchpad += gIdx * (MEMORY >> 4);
# elif (STRIDED_INDEX == 1) # elif (STRIDED_INDEX == 1)
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE); Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE);
# else # else
Scratchpad += gIdx; Scratchpad += gIdx;
@ -163,7 +163,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
mem_fence(CLK_LOCAL_MEM_FENCE); mem_fence(CLK_LOCAL_MEM_FENCE);
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
{ {
__local uint4 xin[8][8]; __local uint4 xin[8][8];
@ -256,7 +256,7 @@ __kernel void cn1_v1(__global uint4 *Scratchpad, __global ulong *states, uint va
# if (STRIDED_INDEX == 0) # if (STRIDED_INDEX == 0)
Scratchpad += gIdx * (MEMORY >> 4); Scratchpad += gIdx * (MEMORY >> 4);
# elif (STRIDED_INDEX == 1) # elif (STRIDED_INDEX == 1)
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE); Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE);
# else # else
Scratchpad += gIdx; Scratchpad += gIdx;
@ -498,7 +498,7 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
# if (STRIDED_INDEX == 0) # if (STRIDED_INDEX == 0)
Scratchpad += gIdx * (MEMORY >> 4); Scratchpad += gIdx * (MEMORY >> 4);
# elif (STRIDED_INDEX == 1) # elif (STRIDED_INDEX == 1)
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + get_local_id(0); Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + get_local_id(0);
# else # else
Scratchpad += gIdx; Scratchpad += gIdx;
@ -542,13 +542,13 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
b_x = ((uint4 *)c)[0]; b_x = ((uint4 *)c)[0];
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
{ {
const long2 n = *((__global long2*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))); const long2 n = *((__global long2*)(Scratchpad + (IDX((idx0 & MASK) >> 4))));
long q = fast_div_heavy(n.s0, as_int4(n).s2 | 0x5); long q = fast_div_heavy(n.s0, as_int4(n).s2 | 0x5);
*((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n.s0 ^ q; *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n.s0 ^ q;
# if (ALGO == ALGO_CN_HEAVY_XHV) { # if (ALGO == ALGO_CN_HEAVY_XHV)
idx0 = (~as_int4(n).s2) ^ q; idx0 = (~as_int4(n).s2) ^ q;
# else # else
idx0 = as_int4(n).s2 ^ q; idx0 = as_int4(n).s2 ^ q;
@ -590,7 +590,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
# if (STRIDED_INDEX == 0) # if (STRIDED_INDEX == 0)
Scratchpad += gIdx * (MEMORY >> 4); Scratchpad += gIdx * (MEMORY >> 4);
# elif (STRIDED_INDEX == 1) # elif (STRIDED_INDEX == 1)
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE); Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + (gIdx % WORKSIZE);
# else # else
Scratchpad += gIdx; Scratchpad += gIdx;
@ -616,7 +616,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
__local uint4 xin1[8][8]; __local uint4 xin1[8][8];
__local uint4 xin2[8][8]; __local uint4 xin2[8][8];
__local uint4* xin1_store = &xin1[get_local_id(1)][get_local_id(0)]; __local uint4* xin1_store = &xin1[get_local_id(1)][get_local_id(0)];
@ -631,7 +631,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
if (gIdx < Threads) if (gIdx < Threads)
# endif # endif
{ {
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
#pragma unroll 2 #pragma unroll 2
for(int i = 0, i1 = get_local_id(1); i < (MEMORY >> 7); ++i, i1 = (i1 + 16) % (MEMORY >> 4)) for(int i = 0, i1 = get_local_id(1); i < (MEMORY >> 7); ++i, i1 = (i1 + 16) % (MEMORY >> 4))
{ {
@ -672,7 +672,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
# endif # endif
} }
# if (ALGO_FAMILY == CN_HEAVY) # if (ALGO_FAMILY == FAMILY_CN_HEAVY)
/* Also left over threads performe this loop. /* Also left over threads performe this loop.
* The left over thread results will be ignored * The left over thread results will be ignored
*/ */

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,8 @@
#ifndef FAST_DIV_HEAVY_CL #ifndef FAST_DIV_HEAVY_CL
#define FAST_DIV_HEAVY_CL #define FAST_DIV_HEAVY_CL
#if (ALGO_FAMILY == FAMILY_CN_HEAVY)
inline long fast_div_heavy(long _a, int _b) inline long fast_div_heavy(long _a, int _b)
{ {
long a = abs(_a); long a = abs(_a);
@ -25,3 +27,5 @@ inline long fast_div_heavy(long _a, int _b)
} }
#endif #endif
#endif

View file

@ -74,7 +74,6 @@ xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBa
m_options += " -DWORKSIZE=" + std::to_string(data.thread.worksize()) + "U"; m_options += " -DWORKSIZE=" + std::to_string(data.thread.worksize()) + "U";
m_options += " -DSTRIDED_INDEX=" + std::to_string(stridedIndex) + "U"; m_options += " -DSTRIDED_INDEX=" + std::to_string(stridedIndex) + "U";
m_options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(1u << data.thread.memChunk()) + "U"; m_options += " -DMEM_CHUNK_EXPONENT=" + std::to_string(1u << data.thread.memChunk()) + "U";
m_options += " -DCOMP_MODE=" + std::to_string(data.thread.isCompMode() && g_thd % data.thread.worksize() != 0 ? 1u : 0u) + "U";
m_options += " -DMEMORY=" + std::to_string(m_algorithm.l3()) + "LU"; m_options += " -DMEMORY=" + std::to_string(m_algorithm.l3()) + "LU";
m_options += " -DALGO=" + std::to_string(m_algorithm.id()); m_options += " -DALGO=" + std::to_string(m_algorithm.id());
m_options += " -DALGO_FAMILY=" + std::to_string(m_algorithm.family()); m_options += " -DALGO_FAMILY=" + std::to_string(m_algorithm.family());