From bd6f68790e42e021b8d6dd97a435bf823bf2b4b8 Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Thu, 30 May 2024 23:11:14 +0200 Subject: [PATCH] Faster Keccak code (BMI instructions) --- CMakeLists.txt | 7 +++ cmake/standard.cmake | 4 ++ external/src/RandomX | 2 +- src/keccak.cpp | 19 ++++++- src/keccak.h | 7 ++- src/keccak_bmi.cpp | 112 +++++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 7 +++ tests/cmake/standard.cmake | 4 ++ tests/src/keccak_tests.cpp | 37 +++++++++++- 9 files changed, 194 insertions(+), 5 deletions(-) create mode 100644 src/keccak_bmi.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 15b5c81..a68018a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,6 +146,13 @@ set(SOURCES src/zmq_reader.cpp ) +if (AMD64) + set(SOURCES ${SOURCES} src/keccak_bmi.cpp) + if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang) + set_source_files_properties(src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi") + endif() +endif() + if (WITH_RANDOMX) set(HEADERS ${HEADERS} src/miner.h) set(SOURCES ${SOURCES} src/miner.cpp) diff --git a/cmake/standard.cmake b/cmake/standard.cmake index 6b679f5..d2f1e1b 100644 --- a/cmake/standard.cmake +++ b/cmake/standard.cmake @@ -15,3 +15,7 @@ endif() if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$") set(ARMv8 1) endif() + +if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$") + set(AMD64 1) +endif() diff --git a/external/src/RandomX b/external/src/RandomX index 121f6dd..7ee603a 160000 --- a/external/src/RandomX +++ b/external/src/RandomX @@ -1 +1 @@ -Subproject commit 121f6dda58e6b3d0bc428f0dede1a11728acd8fe +Subproject commit 7ee603afebc063fa9c28a5350daee40be831d468 diff --git a/src/keccak.cpp b/src/keccak.cpp index 6c90292..96046ce 100644 --- a/src/keccak.cpp +++ b/src/keccak.cpp @@ -17,6 +17,9 @@ #include "common.h" #include "keccak.h" +#ifdef WITH_RANDOMX +#include "RandomX/src/cpu.hpp" +#endif namespace p2pool { @@ -24,7 +27,7 @@ namespace p2pool { #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) #endif -static const uint64_t keccakf_rndc[24] = +const uint64_t keccakf_rndc[24] = { 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, @@ -36,7 +39,7 @@ static const uint64_t keccakf_rndc[24] = 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 }; -NOINLINE void keccakf(std::array& st) +NOINLINE void keccakf_plain(std::array& st) { for (int round = 0; round < KeccakParams::ROUNDS; ++round) { uint64_t bc[5]; @@ -115,6 +118,18 @@ NOINLINE void keccakf(std::array& st) } } +void (*keccakf)(std::array&) = keccakf_plain; + +#if defined(WITH_RANDOMX) && (defined(__x86_64__) || defined(_M_AMD64)) +static struct KeccakBMI_Check { + KeccakBMI_Check() { + if (randomx::Cpu().hasBmi()) { + keccakf = keccakf_bmi; + } + } +} keccak_bmi_check; +#endif + NOINLINE void keccak_step(const uint8_t* &in, int &inlen, std::array& st) { constexpr int rsiz = KeccakParams::HASH_DATA_AREA; diff --git a/src/keccak.h b/src/keccak.h index d839359..b735ed9 100644 --- a/src/keccak.h +++ b/src/keccak.h @@ -24,7 +24,12 @@ enum KeccakParams { ROUNDS = 24, }; -void keccakf(std::array &st); +extern const uint64_t keccakf_rndc[24]; +extern void (*keccakf)(std::array& st); + +void keccakf_plain(std::array& st); +void keccakf_bmi(std::array& st); + void keccak_step(const uint8_t* &in, int &inlen, std::array& st); void keccak_finish(const uint8_t* in, int inlen, std::array& st); diff --git a/src/keccak_bmi.cpp b/src/keccak_bmi.cpp new file mode 100644 index 0000000..63f3fac --- /dev/null +++ b/src/keccak_bmi.cpp @@ -0,0 +1,112 @@ +/* + * This file is part of the Monero P2Pool + * Copyright (c) 2021-2024 SChernykh + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "common.h" +#include "keccak.h" + +#include + +namespace p2pool { + +#ifndef ROTL64 +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) +#endif + +#if defined(__GNUC__) && (__GNUC__ < 12) && !defined(_andn_u64) +#define _andn_u64 __andn_u64 +#endif + +NOINLINE void keccakf_bmi(std::array& st) +{ + for (int round = 0; round < KeccakParams::ROUNDS; ++round) { + uint64_t bc[5]; + + // Theta + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + +#define THETA(i) { \ + const uint64_t t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \ + st[i + 0 ] ^= t; \ + st[i + 5] ^= t; \ + st[i + 10] ^= t; \ + st[i + 15] ^= t; \ + st[i + 20] ^= t; \ + } + + THETA(0); + THETA(1); + THETA(2); + THETA(3); + THETA(4); + + // Rho Pi + const uint64_t t = st[1]; + st[1] = ROTL64(st[6], 44); + st[6] = ROTL64(st[9], 20); + st[9] = ROTL64(st[22], 61); + st[22] = ROTL64(st[14], 39); + st[14] = ROTL64(st[20], 18); + st[20] = ROTL64(st[2], 62); + st[2] = ROTL64(st[12], 43); + st[12] = ROTL64(st[13], 25); + st[13] = ROTL64(st[19], 8); + st[19] = ROTL64(st[23], 56); + st[23] = ROTL64(st[15], 41); + st[15] = ROTL64(st[4], 27); + st[4] = ROTL64(st[24], 14); + st[24] = ROTL64(st[21], 2); + st[21] = ROTL64(st[8], 55); + st[8] = ROTL64(st[16], 45); + st[16] = ROTL64(st[5], 36); + st[5] = ROTL64(st[3], 28); + st[3] = ROTL64(st[18], 21); + st[18] = ROTL64(st[17], 15); + st[17] = ROTL64(st[11], 10); + st[11] = ROTL64(st[7], 6); + st[7] = ROTL64(st[10], 3); + st[10] = ROTL64(t, 1); + + // Chi +#define CHI(j) { \ + const uint64_t st0 = st[j ]; \ + const uint64_t st1 = st[j + 1]; \ + const uint64_t st2 = st[j + 2]; \ + const uint64_t st3 = st[j + 3]; \ + const uint64_t st4 = st[j + 4]; \ + st[j ] ^= _andn_u64(st1, st2); \ + st[j + 1] ^= _andn_u64(st2, st3); \ + st[j + 2] ^= _andn_u64(st3, st4); \ + st[j + 3] ^= _andn_u64(st4, st0); \ + st[j + 4] ^= _andn_u64(st0, st1); \ + } + + CHI(0); + CHI(5); + CHI(10); + CHI(15); + CHI(20); + + // Iota + st[0] ^= keccakf_rndc[round]; + } +} + +} // namespace p2pool diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b5e5d8b..2d04ebe 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -74,6 +74,13 @@ set(SOURCES ../src/zmq_reader.cpp ) +if (AMD64) + set(SOURCES ${SOURCES} ../src/keccak_bmi.cpp) + if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang) + set_source_files_properties(../src/keccak_bmi.cpp PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -mbmi") + endif() +endif() + if (NOT STATIC_BINARY AND NOT STATIC_LIBS) include(FindCURL) endif() diff --git a/tests/cmake/standard.cmake b/tests/cmake/standard.cmake index 6b679f5..d2f1e1b 100644 --- a/tests/cmake/standard.cmake +++ b/tests/cmake/standard.cmake @@ -15,3 +15,7 @@ endif() if (ARCH_ID MATCHES "^(aarch64|arm64|armv8-a)$") set(ARMv8 1) endif() + +if (ARCH_ID MATCHES "^(x86_64|x86-64|amd64)$") + set(AMD64 1) +endif() diff --git a/tests/src/keccak_tests.cpp b/tests/src/keccak_tests.cpp index 211d41f..a390b8f 100644 --- a/tests/src/keccak_tests.cpp +++ b/tests/src/keccak_tests.cpp @@ -17,11 +17,12 @@ #include "common.h" #include "keccak.h" +#include "RandomX/src/cpu.hpp" #include "gtest/gtest.h" namespace p2pool { -TEST(keccak, hashing) +static void test_keccak() { auto check = [](const void* input, size_t size, const char* expected_output) { hash output; @@ -53,6 +54,40 @@ TEST(keccak, hashing) std::vector v(1000000, 'a'); check(v.data(), v.size(), "fadae6b49f129bbb812be8407b7b2894f34aecf6dbd1f9b0f0c7e9853098fc96"); + + hash test; + for (int i = 0; i < 1000000; ++i) { + keccak(test.h, HASH_SIZE, test.h); + } + + char buf[log::Stream::BUF_SIZE + 1]; + log::Stream s(buf); + s << test; + ASSERT_EQ(memcmp(buf, "16e199635319b8c568a0405a570382994a90a56d5f116892d8cbcb3b13cda0eb", HASH_SIZE * 2), 0); } +TEST(keccak, hashing) +{ + auto t = keccakf; + keccakf = keccakf_plain; + + test_keccak(); + + keccakf = t; +} + +#if defined(__x86_64__) || defined(_M_AMD64) +TEST(keccak, hashing_bmi) +{ + if (randomx::Cpu().hasBmi()) { + auto t = keccakf; + keccakf = keccakf_bmi; + + test_keccak(); + + keccakf = t; + } +} +#endif + }