mirror of
https://github.com/SChernykh/p2pool.git
synced 2024-11-16 15:57:39 +00:00
Faster Keccak code (BMI instructions)
This commit is contained in:
parent
33e1ebd3fe
commit
bd6f68790e
9 changed files with 194 additions and 5 deletions
|
@ -146,6 +146,13 @@ set(SOURCES
|
||||||
src/zmq_reader.cpp
|
src/zmq_reader.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (AMD64)
|
||||||
|
set(SOURCES ${SOURCES} src/keccak_bmi.cpp)
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||||
|
set_source_files_properties(src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if (WITH_RANDOMX)
|
if (WITH_RANDOMX)
|
||||||
set(HEADERS ${HEADERS} src/miner.h)
|
set(HEADERS ${HEADERS} src/miner.h)
|
||||||
set(SOURCES ${SOURCES} src/miner.cpp)
|
set(SOURCES ${SOURCES} src/miner.cpp)
|
||||||
|
|
|
@ -15,3 +15,7 @@ endif()
|
||||||
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
|
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
|
||||||
set(ARMv8 1)
|
set(ARMv8 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$")
|
||||||
|
set(AMD64 1)
|
||||||
|
endif()
|
||||||
|
|
2
external/src/RandomX
vendored
2
external/src/RandomX
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 121f6dda58e6b3d0bc428f0dede1a11728acd8fe
|
Subproject commit 7ee603afebc063fa9c28a5350daee40be831d468
|
|
@ -17,6 +17,9 @@
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "keccak.h"
|
#include "keccak.h"
|
||||||
|
#ifdef WITH_RANDOMX
|
||||||
|
#include "RandomX/src/cpu.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace p2pool {
|
namespace p2pool {
|
||||||
|
|
||||||
|
@ -24,7 +27,7 @@ namespace p2pool {
|
||||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const uint64_t keccakf_rndc[24] =
|
const uint64_t keccakf_rndc[24] =
|
||||||
{
|
{
|
||||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||||
|
@ -36,7 +39,7 @@ static const uint64_t keccakf_rndc[24] =
|
||||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||||
};
|
};
|
||||||
|
|
||||||
NOINLINE void keccakf(std::array<uint64_t, 25>& st)
|
NOINLINE void keccakf_plain(std::array<uint64_t, 25>& st)
|
||||||
{
|
{
|
||||||
for (int round = 0; round < KeccakParams::ROUNDS; ++round) {
|
for (int round = 0; round < KeccakParams::ROUNDS; ++round) {
|
||||||
uint64_t bc[5];
|
uint64_t bc[5];
|
||||||
|
@ -115,6 +118,18 @@ NOINLINE void keccakf(std::array<uint64_t, 25>& st)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void (*keccakf)(std::array<uint64_t, 25>&) = keccakf_plain;
|
||||||
|
|
||||||
|
#if defined(WITH_RANDOMX) && (defined(__x86_64__) || defined(_M_AMD64))
|
||||||
|
static struct KeccakBMI_Check {
|
||||||
|
KeccakBMI_Check() {
|
||||||
|
if (randomx::Cpu().hasBmi()) {
|
||||||
|
keccakf = keccakf_bmi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} keccak_bmi_check;
|
||||||
|
#endif
|
||||||
|
|
||||||
NOINLINE void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st)
|
NOINLINE void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st)
|
||||||
{
|
{
|
||||||
constexpr int rsiz = KeccakParams::HASH_DATA_AREA;
|
constexpr int rsiz = KeccakParams::HASH_DATA_AREA;
|
||||||
|
|
|
@ -24,7 +24,12 @@ enum KeccakParams {
|
||||||
ROUNDS = 24,
|
ROUNDS = 24,
|
||||||
};
|
};
|
||||||
|
|
||||||
void keccakf(std::array<uint64_t, 25> &st);
|
extern const uint64_t keccakf_rndc[24];
|
||||||
|
extern void (*keccakf)(std::array<uint64_t, 25>& st);
|
||||||
|
|
||||||
|
void keccakf_plain(std::array<uint64_t, 25>& st);
|
||||||
|
void keccakf_bmi(std::array<uint64_t, 25>& st);
|
||||||
|
|
||||||
void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st);
|
void keccak_step(const uint8_t* &in, int &inlen, std::array<uint64_t, 25>& st);
|
||||||
void keccak_finish(const uint8_t* in, int inlen, std::array<uint64_t, 25>& st);
|
void keccak_finish(const uint8_t* in, int inlen, std::array<uint64_t, 25>& st);
|
||||||
|
|
||||||
|
|
112
src/keccak_bmi.cpp
Normal file
112
src/keccak_bmi.cpp
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
/*
|
||||||
|
* This file is part of the Monero P2Pool <https://github.com/SChernykh/p2pool>
|
||||||
|
* Copyright (c) 2021-2024 SChernykh <https://github.com/SChernykh>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, version 3.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but
|
||||||
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "keccak.h"
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
namespace p2pool {
|
||||||
|
|
||||||
|
#ifndef ROTL64
|
||||||
|
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && (__GNUC__ < 12) && !defined(_andn_u64)
|
||||||
|
#define _andn_u64 __andn_u64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NOINLINE void keccakf_bmi(std::array<uint64_t, 25>& st)
|
||||||
|
{
|
||||||
|
for (int round = 0; round < KeccakParams::ROUNDS; ++round) {
|
||||||
|
uint64_t bc[5];
|
||||||
|
|
||||||
|
// Theta
|
||||||
|
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
|
||||||
|
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
|
||||||
|
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
|
||||||
|
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
|
||||||
|
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
|
||||||
|
|
||||||
|
#define THETA(i) { \
|
||||||
|
const uint64_t t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \
|
||||||
|
st[i + 0 ] ^= t; \
|
||||||
|
st[i + 5] ^= t; \
|
||||||
|
st[i + 10] ^= t; \
|
||||||
|
st[i + 15] ^= t; \
|
||||||
|
st[i + 20] ^= t; \
|
||||||
|
}
|
||||||
|
|
||||||
|
THETA(0);
|
||||||
|
THETA(1);
|
||||||
|
THETA(2);
|
||||||
|
THETA(3);
|
||||||
|
THETA(4);
|
||||||
|
|
||||||
|
// Rho Pi
|
||||||
|
const uint64_t t = st[1];
|
||||||
|
st[1] = ROTL64(st[6], 44);
|
||||||
|
st[6] = ROTL64(st[9], 20);
|
||||||
|
st[9] = ROTL64(st[22], 61);
|
||||||
|
st[22] = ROTL64(st[14], 39);
|
||||||
|
st[14] = ROTL64(st[20], 18);
|
||||||
|
st[20] = ROTL64(st[2], 62);
|
||||||
|
st[2] = ROTL64(st[12], 43);
|
||||||
|
st[12] = ROTL64(st[13], 25);
|
||||||
|
st[13] = ROTL64(st[19], 8);
|
||||||
|
st[19] = ROTL64(st[23], 56);
|
||||||
|
st[23] = ROTL64(st[15], 41);
|
||||||
|
st[15] = ROTL64(st[4], 27);
|
||||||
|
st[4] = ROTL64(st[24], 14);
|
||||||
|
st[24] = ROTL64(st[21], 2);
|
||||||
|
st[21] = ROTL64(st[8], 55);
|
||||||
|
st[8] = ROTL64(st[16], 45);
|
||||||
|
st[16] = ROTL64(st[5], 36);
|
||||||
|
st[5] = ROTL64(st[3], 28);
|
||||||
|
st[3] = ROTL64(st[18], 21);
|
||||||
|
st[18] = ROTL64(st[17], 15);
|
||||||
|
st[17] = ROTL64(st[11], 10);
|
||||||
|
st[11] = ROTL64(st[7], 6);
|
||||||
|
st[7] = ROTL64(st[10], 3);
|
||||||
|
st[10] = ROTL64(t, 1);
|
||||||
|
|
||||||
|
// Chi
|
||||||
|
#define CHI(j) { \
|
||||||
|
const uint64_t st0 = st[j ]; \
|
||||||
|
const uint64_t st1 = st[j + 1]; \
|
||||||
|
const uint64_t st2 = st[j + 2]; \
|
||||||
|
const uint64_t st3 = st[j + 3]; \
|
||||||
|
const uint64_t st4 = st[j + 4]; \
|
||||||
|
st[j ] ^= _andn_u64(st1, st2); \
|
||||||
|
st[j + 1] ^= _andn_u64(st2, st3); \
|
||||||
|
st[j + 2] ^= _andn_u64(st3, st4); \
|
||||||
|
st[j + 3] ^= _andn_u64(st4, st0); \
|
||||||
|
st[j + 4] ^= _andn_u64(st0, st1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CHI(0);
|
||||||
|
CHI(5);
|
||||||
|
CHI(10);
|
||||||
|
CHI(15);
|
||||||
|
CHI(20);
|
||||||
|
|
||||||
|
// Iota
|
||||||
|
st[0] ^= keccakf_rndc[round];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace p2pool
|
|
@ -74,6 +74,13 @@ set(SOURCES
|
||||||
../src/zmq_reader.cpp
|
../src/zmq_reader.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (AMD64)
|
||||||
|
set(SOURCES ${SOURCES} ../src/keccak_bmi.cpp)
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||||
|
set_source_files_properties(../src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NOT STATIC_BINARY AND NOT STATIC_LIBS)
|
if (NOT STATIC_BINARY AND NOT STATIC_LIBS)
|
||||||
include(FindCURL)
|
include(FindCURL)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -15,3 +15,7 @@ endif()
|
||||||
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
|
if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$")
|
||||||
set(ARMv8 1)
|
set(ARMv8 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$")
|
||||||
|
set(AMD64 1)
|
||||||
|
endif()
|
||||||
|
|
|
@ -17,11 +17,12 @@
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "keccak.h"
|
#include "keccak.h"
|
||||||
|
#include "RandomX/src/cpu.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
namespace p2pool {
|
namespace p2pool {
|
||||||
|
|
||||||
TEST(keccak, hashing)
|
static void test_keccak()
|
||||||
{
|
{
|
||||||
auto check = [](const void* input, size_t size, const char* expected_output) {
|
auto check = [](const void* input, size_t size, const char* expected_output) {
|
||||||
hash output;
|
hash output;
|
||||||
|
@ -53,6 +54,40 @@ TEST(keccak, hashing)
|
||||||
|
|
||||||
std::vector<uint8_t> v(1000000, 'a');
|
std::vector<uint8_t> v(1000000, 'a');
|
||||||
check(v.data(), v.size(), "fadae6b49f129bbb812be8407b7b2894f34aecf6dbd1f9b0f0c7e9853098fc96");
|
check(v.data(), v.size(), "fadae6b49f129bbb812be8407b7b2894f34aecf6dbd1f9b0f0c7e9853098fc96");
|
||||||
|
|
||||||
|
hash test;
|
||||||
|
for (int i = 0; i < 1000000; ++i) {
|
||||||
|
keccak(test.h, HASH_SIZE, test.h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char buf[log::Stream::BUF_SIZE + 1];
|
||||||
|
log::Stream s(buf);
|
||||||
|
s << test;
|
||||||
|
ASSERT_EQ(memcmp(buf, "16e199635319b8c568a0405a570382994a90a56d5f116892d8cbcb3b13cda0eb", HASH_SIZE * 2), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(keccak, hashing)
|
||||||
|
{
|
||||||
|
auto t = keccakf;
|
||||||
|
keccakf = keccakf_plain;
|
||||||
|
|
||||||
|
test_keccak();
|
||||||
|
|
||||||
|
keccakf = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||||
|
TEST(keccak, hashing_bmi)
|
||||||
|
{
|
||||||
|
if (randomx::Cpu().hasBmi()) {
|
||||||
|
auto t = keccakf;
|
||||||
|
keccakf = keccakf_bmi;
|
||||||
|
|
||||||
|
test_keccak();
|
||||||
|
|
||||||
|
keccakf = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue