Cryptonight variant 2 - final version

Reference code: https://github.com/monero-project/monero/pull/4404

I tested it on x86 with av=1-10 and on ARM with av=1-4, self test passed.
This commit is contained in:
SChernykh 2018-09-22 17:21:32 +02:00
parent 0adad68471
commit d8ca5ef205
4 changed files with 41 additions and 17 deletions

View file

@ -487,7 +487,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
else {
lo = __umul128(idx0, cl, &hi);
@ -619,7 +619,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
@ -662,7 +662,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}

View file

@ -93,6 +93,18 @@
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
do { \
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
#else
# define VARIANT2_INIT(part) \
uint64_t division_result_##part = h##part[12]; \
@ -122,5 +134,17 @@
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
do { \
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View file

@ -86,16 +86,16 @@ const static uint8_t test_output_v1[160] = {
// Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[160] = {
0x6E, 0xEE, 0x53, 0xA3, 0xDA, 0xD1, 0x8C, 0x05, 0xB8, 0xCB, 0x32, 0x17, 0xAA, 0xEA, 0xEA, 0xB4,
0x16, 0x11, 0x01, 0xA9, 0x08, 0x76, 0x37, 0x36, 0x6F, 0xDC, 0xCA, 0xC6, 0x92, 0x0D, 0xEA, 0x09,
0x91, 0x03, 0x2F, 0x5B, 0x27, 0x4D, 0x94, 0x1D, 0x60, 0x50, 0xDC, 0x1F, 0x35, 0x57, 0xEC, 0x20,
0xA6, 0xAC, 0x10, 0xDB, 0xCF, 0x36, 0x23, 0x8F, 0x96, 0xC7, 0x72, 0x8B, 0xF9, 0xE7, 0x30, 0xEB,
0x50, 0x58, 0x4B, 0xFE, 0xAD, 0xC5, 0x13, 0x79, 0x50, 0x98, 0x1C, 0x67, 0xB2, 0xEB, 0xDA, 0x64,
0xD4, 0xAA, 0xC4, 0xE8, 0xE5, 0xC9, 0xE7, 0x6B, 0x84, 0xC2, 0xD2, 0xE9, 0x1F, 0xA1, 0x0F, 0xDF,
0x45, 0x06, 0x80, 0x25, 0x32, 0x6B, 0xC4, 0x66, 0x2A, 0x69, 0x9F, 0x1E, 0x1F, 0x4C, 0xBE, 0x89,
0xFE, 0x61, 0xBB, 0x04, 0x79, 0xB5, 0x3B, 0x45, 0x58, 0xD9, 0x9C, 0x18, 0x7C, 0x48, 0x1B, 0x44,
0x92, 0xC4, 0x4C, 0xD0, 0x8F, 0x16, 0x44, 0x79, 0x71, 0x48, 0x63, 0x0B, 0x51, 0xB6, 0x33, 0x8B,
0x6B, 0x3F, 0xCC, 0x0A, 0x3A, 0x14, 0x3B, 0x49, 0x68, 0x46, 0xB9, 0x46, 0xC6, 0xA3, 0x03, 0x41
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78,
0xE6, 0x0D, 0x24, 0x0F, 0x65, 0x85, 0x60, 0x3A, 0x4A, 0xE5, 0x5F, 0x54, 0x9B, 0xC8, 0x79, 0x93,
0xEB, 0x3D, 0x98, 0x2C, 0xFE, 0x9B, 0xFB, 0x15, 0xB6, 0x88, 0x21, 0x94, 0xB0, 0x05, 0x86, 0x5C,
0x59, 0x8B, 0x93, 0x7A, 0xDA, 0xD2, 0xA2, 0x14, 0xED, 0xB7, 0xC4, 0x5D, 0xA1, 0xEF, 0x26, 0xF3,
0xC7, 0x73, 0x29, 0x4D, 0xF1, 0xC8, 0x2C, 0xE0, 0xD0, 0xE9, 0xED, 0x0C, 0x70, 0x75, 0x05, 0x3E,
0x5B, 0xF6, 0xA0, 0x6E, 0xEA, 0xDE, 0x87, 0x0B, 0x06, 0x29, 0x03, 0xBF, 0xB4, 0x85, 0x9D, 0x04,
0x75, 0x1A, 0xCD, 0x1E, 0xD6, 0xAA, 0x1B, 0x05, 0x24, 0x6A, 0x2C, 0x80, 0x69, 0x68, 0xDC, 0x97
};

View file

@ -512,7 +512,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
else {
lo = __umul128(idx0, cl, &hi);
@ -643,7 +643,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
@ -684,7 +684,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}
@ -772,7 +772,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
lo = __umul128(idx, cl##part, &hi); \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1); \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \
} else { \
lo = __umul128(idx, cl##part, &hi); \
} \