Merge pull request #761 from SChernykh/dev

Cryptonight variant 2 - final version
This commit is contained in:
xmrig 2018-09-23 07:47:02 +03:00 committed by GitHub
commit cf76d9254a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 17 deletions

View file

@ -487,7 +487,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
else {
lo = __umul128(idx0, cl, &hi);
@ -619,7 +619,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
@ -662,7 +662,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}

View file

@ -93,6 +93,18 @@
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
do { \
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
#else
# define VARIANT2_INIT(part) \
uint64_t division_result_##part = h##part[12]; \
@ -122,5 +134,17 @@
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
do { \
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View file

@ -86,16 +86,16 @@ const static uint8_t test_output_v1[160] = {
// Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[160] = {
0x6E, 0xEE, 0x53, 0xA3, 0xDA, 0xD1, 0x8C, 0x05, 0xB8, 0xCB, 0x32, 0x17, 0xAA, 0xEA, 0xEA, 0xB4,
0x16, 0x11, 0x01, 0xA9, 0x08, 0x76, 0x37, 0x36, 0x6F, 0xDC, 0xCA, 0xC6, 0x92, 0x0D, 0xEA, 0x09,
0x91, 0x03, 0x2F, 0x5B, 0x27, 0x4D, 0x94, 0x1D, 0x60, 0x50, 0xDC, 0x1F, 0x35, 0x57, 0xEC, 0x20,
0xA6, 0xAC, 0x10, 0xDB, 0xCF, 0x36, 0x23, 0x8F, 0x96, 0xC7, 0x72, 0x8B, 0xF9, 0xE7, 0x30, 0xEB,
0x50, 0x58, 0x4B, 0xFE, 0xAD, 0xC5, 0x13, 0x79, 0x50, 0x98, 0x1C, 0x67, 0xB2, 0xEB, 0xDA, 0x64,
0xD4, 0xAA, 0xC4, 0xE8, 0xE5, 0xC9, 0xE7, 0x6B, 0x84, 0xC2, 0xD2, 0xE9, 0x1F, 0xA1, 0x0F, 0xDF,
0x45, 0x06, 0x80, 0x25, 0x32, 0x6B, 0xC4, 0x66, 0x2A, 0x69, 0x9F, 0x1E, 0x1F, 0x4C, 0xBE, 0x89,
0xFE, 0x61, 0xBB, 0x04, 0x79, 0xB5, 0x3B, 0x45, 0x58, 0xD9, 0x9C, 0x18, 0x7C, 0x48, 0x1B, 0x44,
0x92, 0xC4, 0x4C, 0xD0, 0x8F, 0x16, 0x44, 0x79, 0x71, 0x48, 0x63, 0x0B, 0x51, 0xB6, 0x33, 0x8B,
0x6B, 0x3F, 0xCC, 0x0A, 0x3A, 0x14, 0x3B, 0x49, 0x68, 0x46, 0xB9, 0x46, 0xC6, 0xA3, 0x03, 0x41
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78,
0xE6, 0x0D, 0x24, 0x0F, 0x65, 0x85, 0x60, 0x3A, 0x4A, 0xE5, 0x5F, 0x54, 0x9B, 0xC8, 0x79, 0x93,
0xEB, 0x3D, 0x98, 0x2C, 0xFE, 0x9B, 0xFB, 0x15, 0xB6, 0x88, 0x21, 0x94, 0xB0, 0x05, 0x86, 0x5C,
0x59, 0x8B, 0x93, 0x7A, 0xDA, 0xD2, 0xA2, 0x14, 0xED, 0xB7, 0xC4, 0x5D, 0xA1, 0xEF, 0x26, 0xF3,
0xC7, 0x73, 0x29, 0x4D, 0xF1, 0xC8, 0x2C, 0xE0, 0xD0, 0xE9, 0xED, 0x0C, 0x70, 0x75, 0x05, 0x3E,
0x5B, 0xF6, 0xA0, 0x6E, 0xEA, 0xDE, 0x87, 0x0B, 0x06, 0x29, 0x03, 0xBF, 0xB4, 0x85, 0x9D, 0x04,
0x75, 0x1A, 0xCD, 0x1E, 0xD6, 0xAA, 0x1B, 0x05, 0x24, 0x6A, 0x2C, 0x80, 0x69, 0x68, 0xDC, 0x97
};

View file

@ -512,7 +512,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
}
else {
lo = __umul128(idx0, cl, &hi);
@ -643,7 +643,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
} else {
lo = __umul128(idx0, cl, &hi);
}
@ -684,7 +684,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
} else {
lo = __umul128(idx1, cl, &hi);
}
@ -772,7 +772,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (VARIANT == xmrig::VARIANT_2) { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
lo = __umul128(idx, cl##part, &hi); \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1); \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \
} else { \
lo = __umul128(idx, cl##part, &hi); \
} \