mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-12 05:44:52 +00:00
KawPow: optimized CPU share verification
- 2 times faster CPU share verification (11 -> 5 ms) - 1.5 times faster light cache initialization
This commit is contained in:
parent
03e9797b92
commit
5724d8beb6
4 changed files with 61 additions and 38 deletions
81
src/3rdparty/libethash/ethash_internal.c
vendored
81
src/3rdparty/libethash/ethash_internal.c
vendored
|
@ -33,6 +33,18 @@
|
||||||
#include "data_sizes.h"
|
#include "data_sizes.h"
|
||||||
#include "base/crypto/sha3.h"
|
#include "base/crypto/sha3.h"
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__) || defined(__SSE2__)
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#else
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define kp_prefetch(x) _mm_prefetch((x), _MM_HINT_T0);
|
||||||
|
#else
|
||||||
|
#define kp_prefetch(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SHA3_256(a, b, c) sha3_HashBuffer(256, SHA3_FLAGS_KECCAK, b, c, a, 32)
|
#define SHA3_256(a, b, c) sha3_HashBuffer(256, SHA3_FLAGS_KECCAK, b, c, a, 32)
|
||||||
#define SHA3_512(a, b, c) sha3_HashBuffer(512, SHA3_FLAGS_KECCAK, b, c, a, 64)
|
#define SHA3_512(a, b, c) sha3_HashBuffer(512, SHA3_FLAGS_KECCAK, b, c, a, 64)
|
||||||
|
|
||||||
|
@ -157,46 +169,53 @@ void ethash_calculate_dag_item_opt(
|
||||||
memcpy(ret, init, sizeof(node));
|
memcpy(ret, init, sizeof(node));
|
||||||
ret->words[0] ^= node_index;
|
ret->words[0] ^= node_index;
|
||||||
SHA3_512(ret->bytes, ret->bytes, sizeof(node));
|
SHA3_512(ret->bytes, ret->bytes, sizeof(node));
|
||||||
#if defined(_M_X64) && ENABLE_SSE
|
|
||||||
__m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME);
|
|
||||||
__m128i xmm0 = ret->xmm[0];
|
|
||||||
__m128i xmm1 = ret->xmm[1];
|
|
||||||
__m128i xmm2 = ret->xmm[2];
|
|
||||||
__m128i xmm3 = ret->xmm[3];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i != num_parents; ++i) {
|
for (uint32_t i = 0; i != num_parents; ++i) {
|
||||||
uint32_t parent_index = fast_mod(fnv_hash(node_index ^ i, ret->words[i % NODE_WORDS]), light->num_parent_nodes, light->reciprocal, light->increment, light->shift);
|
uint32_t parent_index = fast_mod(fnv_hash(node_index ^ i, ret->words[i % NODE_WORDS]), light->num_parent_nodes, light->reciprocal, light->increment, light->shift);
|
||||||
node const* parent = &cache_nodes[parent_index];
|
node const* parent = &cache_nodes[parent_index];
|
||||||
|
for (unsigned w = 0; w != NODE_WORDS; ++w) {
|
||||||
#if defined(_M_X64) && ENABLE_SSE
|
ret->words[w] = fnv_hash(ret->words[w], parent->words[w]);
|
||||||
{
|
|
||||||
xmm0 = _mm_mullo_epi32(xmm0, fnv_prime);
|
|
||||||
xmm1 = _mm_mullo_epi32(xmm1, fnv_prime);
|
|
||||||
xmm2 = _mm_mullo_epi32(xmm2, fnv_prime);
|
|
||||||
xmm3 = _mm_mullo_epi32(xmm3, fnv_prime);
|
|
||||||
xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]);
|
|
||||||
xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]);
|
|
||||||
xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]);
|
|
||||||
xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]);
|
|
||||||
|
|
||||||
// have to write to ret as values are used to compute index
|
|
||||||
ret->xmm[0] = xmm0;
|
|
||||||
ret->xmm[1] = xmm1;
|
|
||||||
ret->xmm[2] = xmm2;
|
|
||||||
ret->xmm[3] = xmm3;
|
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
{
|
|
||||||
for (unsigned w = 0; w != NODE_WORDS; ++w) {
|
|
||||||
ret->words[w] = fnv_hash(ret->words[w], parent->words[w]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
SHA3_512(ret->bytes, ret->bytes, sizeof(node));
|
SHA3_512(ret->bytes, ret->bytes, sizeof(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ethash_calculate_dag_item4_opt(
|
||||||
|
node* ret,
|
||||||
|
uint32_t node_index,
|
||||||
|
uint32_t num_parents,
|
||||||
|
ethash_light_t const light
|
||||||
|
)
|
||||||
|
{
|
||||||
|
node const* cache_nodes = (node const*)light->cache;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
node const* init = &cache_nodes[fast_mod(node_index + i, light->num_parent_nodes, light->reciprocal, light->increment, light->shift)];
|
||||||
|
memcpy(ret + i, init, sizeof(node));
|
||||||
|
ret[i].words[0] ^= node_index + i;
|
||||||
|
SHA3_512(ret[i].bytes, ret[i].bytes, sizeof(node));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i != num_parents; ++i) {
|
||||||
|
node* parent[4];
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < 4; ++j) {
|
||||||
|
const uint32_t parent_index = fast_mod(fnv_hash((node_index + j) ^ i, ret[j].words[i % NODE_WORDS]), light->num_parent_nodes, light->reciprocal, light->increment, light->shift);
|
||||||
|
parent[j] = &cache_nodes[parent_index];
|
||||||
|
kp_prefetch(parent[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned w = 0; w != NODE_WORDS; ++w) ret[0].words[w] = fnv_hash(ret[0].words[w], parent[0]->words[w]);
|
||||||
|
for (unsigned w = 0; w != NODE_WORDS; ++w) ret[1].words[w] = fnv_hash(ret[1].words[w], parent[1]->words[w]);
|
||||||
|
for (unsigned w = 0; w != NODE_WORDS; ++w) ret[2].words[w] = fnv_hash(ret[2].words[w], parent[2]->words[w]);
|
||||||
|
for (unsigned w = 0; w != NODE_WORDS; ++w) ret[3].words[w] = fnv_hash(ret[3].words[w], parent[3]->words[w]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
SHA3_512(ret[i].bytes, ret[i].bytes, sizeof(node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ethash_compute_full_data(
|
bool ethash_compute_full_data(
|
||||||
void* mem,
|
void* mem,
|
||||||
uint64_t full_size,
|
uint64_t full_size,
|
||||||
|
|
7
src/3rdparty/libethash/ethash_internal.h
vendored
7
src/3rdparty/libethash/ethash_internal.h
vendored
|
@ -161,6 +161,13 @@ void ethash_calculate_dag_item_opt(
|
||||||
ethash_light_t const cache
|
ethash_light_t const cache
|
||||||
);
|
);
|
||||||
|
|
||||||
|
void ethash_calculate_dag_item4_opt(
|
||||||
|
node* ret,
|
||||||
|
uint32_t node_index,
|
||||||
|
uint32_t num_parents,
|
||||||
|
ethash_light_t const cache
|
||||||
|
);
|
||||||
|
|
||||||
void ethash_quick_hash(
|
void ethash_quick_hash(
|
||||||
ethash_h256_t* return_hash,
|
ethash_h256_t* return_hash,
|
||||||
ethash_h256_t const* header_hash,
|
ethash_h256_t const* header_hash,
|
||||||
|
|
|
@ -92,9 +92,9 @@ bool KPCache::init(uint32_t epoch)
|
||||||
const uint32_t b = (cache_nodes * (i + 1)) / n;
|
const uint32_t b = (cache_nodes * (i + 1)) / n;
|
||||||
|
|
||||||
threads.emplace_back([this, a, b, cache_nodes, &cache]() {
|
threads.emplace_back([this, a, b, cache_nodes, &cache]() {
|
||||||
for (uint32_t j = a; j < b; ++j) {
|
uint32_t j = a;
|
||||||
ethash_calculate_dag_item_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache);
|
for (; j + 4 <= b; j += 4) ethash_calculate_dag_item4_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache);
|
||||||
}
|
for (; j < b; ++j) ethash_calculate_dag_item_opt(((node*)m_DAGCache.data()) + j, j, num_dataset_parents, &cache);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -285,10 +285,7 @@ void KPHash::calculate(const KPCache& light_cache, uint32_t block_height, const
|
||||||
uint32_t item_index = (mix[r % LANES][0] % num_items) * 4;
|
uint32_t item_index = (mix[r % LANES][0] % num_items) * 4;
|
||||||
|
|
||||||
node item[4];
|
node item[4];
|
||||||
ethash_calculate_dag_item_opt(item + 0, item_index + 0, KPCache::num_dataset_parents, &cache);
|
ethash_calculate_dag_item4_opt(item, item_index, KPCache::num_dataset_parents, &cache);
|
||||||
ethash_calculate_dag_item_opt(item + 1, item_index + 1, KPCache::num_dataset_parents, &cache);
|
|
||||||
ethash_calculate_dag_item_opt(item + 2, item_index + 2, KPCache::num_dataset_parents, &cache);
|
|
||||||
ethash_calculate_dag_item_opt(item + 3, item_index + 3, KPCache::num_dataset_parents, &cache);
|
|
||||||
|
|
||||||
uint32_t dst_counter = 0;
|
uint32_t dst_counter = 0;
|
||||||
uint32_t src_counter = 0;
|
uint32_t src_counter = 0;
|
||||||
|
|
Loading…
Reference in a new issue