diff --git a/CMakeLists.txt b/CMakeLists.txt index e20cd42ae..74c546d7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON) option(WITH_CN_GPU "Enable CryptoNight-GPU algorithm" ON) option(WITH_RANDOMX "Enable RandomX algorithms family" ON) option(WITH_ARGON2 "Enable Argon2 algorithms family" ON) +option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON) option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) @@ -176,6 +177,7 @@ find_package(UV REQUIRED) include(cmake/flags.cmake) include(cmake/randomx.cmake) include(cmake/argon2.cmake) +include(cmake/astrobwt.cmake) include(cmake/OpenSSL.cmake) include(cmake/asm.cmake) include(cmake/cn-gpu.cmake) diff --git a/cmake/astrobwt.cmake b/cmake/astrobwt.cmake new file mode 100644 index 000000000..8c89da00d --- /dev/null +++ b/cmake/astrobwt.cmake @@ -0,0 +1,36 @@ +if (WITH_ASTROBWT) + add_definitions(/DXMRIG_ALGO_ASTROBWT) + + list(APPEND HEADERS_CRYPTO + src/crypto/astrobwt/AstroBWT.h + src/crypto/astrobwt/sha3.h + ) + + list(APPEND SOURCES_CRYPTO + src/crypto/astrobwt/AstroBWT.cpp + src/crypto/astrobwt/sha3.cpp + ) + + if (XMRIG_ARM) + list(APPEND HEADERS_CRYPTO + src/crypto/astrobwt/salsa20_ref/ecrypt-config.h + src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h + src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h + src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h + ) + + list(APPEND SOURCES_CRYPTO + src/crypto/astrobwt/salsa20_ref/salsa20.c + ) + else() + list(APPEND HEADERS_CRYPTO + src/crypto/astrobwt/Salsa20.hpp + ) + + list(APPEND SOURCES_CRYPTO + src/crypto/astrobwt/Salsa20.cpp + ) + endif() +else() + remove_definitions(/DXMRIG_ALGO_ASTROBWT) +endif() diff --git a/src/backend/cpu/CpuConfig.cpp b/src/backend/cpu/CpuConfig.cpp index a9e10338c..31921f183 100644 --- a/src/backend/cpu/CpuConfig.cpp +++ b/src/backend/cpu/CpuConfig.cpp @@ -165,6 +165,7 @@ void xmrig::CpuConfig::generate() count += xmrig::generate<Algorithm::CN_PICO>(m_threads, m_limit); count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit); count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit); + count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit); m_shouldSave = count > 0; } diff --git a/src/backend/cpu/CpuConfig_gen.h b/src/backend/cpu/CpuConfig_gen.h index a7319eea0..568559565 100644 --- a/src/backend/cpu/CpuConfig_gen.h +++ b/src/backend/cpu/CpuConfig_gen.h @@ -143,6 +143,14 @@ size_t inline generate<Algorithm::ARGON2>(Threads<CpuThreads> &threads, uint32_t #endif +#ifdef XMRIG_ALGO_ASTROBWT +template<> +size_t inline generate<Algorithm::ASTROBWT>(Threads<CpuThreads>& threads, uint32_t limit) +{ + return generate("astrobwt", threads, Algorithm::ASTROBWT_DERO, limit); +} +#endif + } /* namespace xmrig */ diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 535ecd6b4..e74ee6ff9 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -44,6 +44,11 @@ #endif +#ifdef XMRIG_ALGO_ASTROBWT +# include "crypto/astrobwt/AstroBWT.h" +#endif + + namespace xmrig { static constexpr uint32_t kReserveCount = 32768; @@ -180,6 +185,12 @@ bool xmrig::CpuWorker<N>::selfTest() } # endif +# ifdef XMRIG_ALGO_ASTROBWT + if (m_algorithm.family() == Algorithm::ASTROBWT) { + return verify(Algorithm::ASTROBWT_DERO, astrobwt_dero_test_out); + } +# endif + return false; } diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp index 837e1b000..0632c1d19 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp @@ -172,6 +172,17 @@ xmrig::CpuThreads xmrig::AdvancedCpuInfo::threads(const Algorithm &algorithm, ui size_t cache = 0; size_t count = 0; +# ifdef XMRIG_ALGO_ASTROBWT + if (algorithm == Algorithm::ASTROBWT_DERO) { + CpuThreads t; + count = threads(); + for (size_t i = 0; i < count; ++i) { + t.add(i, 0); + } + return t; + } +# endif + if (m_L3) { cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3; } diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index fa5a43fa7..d288ee28e 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -258,5 +258,15 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3 } # endif +# ifdef XMRIG_ALGO_ASTROBWT + if (algorithm.family() == Algorithm::ASTROBWT) { + CpuThreads threads; + for (size_t i = 0; i < count; ++i) { + threads.add(i, 0); + } + return threads; + } +# endif + return CpuThreads(std::max<size_t>(count / 2, 1), 1); } diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index a66bf9fdd..527284505 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -216,6 +216,12 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset) xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const { +# ifdef XMRIG_ALGO_ASTROBWT + if (algorithm == Algorithm::ASTROBWT_DERO) { + return BasicCpuInfo::threads(algorithm, limit); + } +# endif + if (L2() == 0 && L3() == 0) { return BasicCpuInfo::threads(algorithm, limit); } diff --git a/src/backend/opencl/cl/cn/algorithm.cl b/src/backend/opencl/cl/cn/algorithm.cl index 80b57afd6..cf932c73c 100644 --- a/src/backend/opencl/cl/cn/algorithm.cl +++ b/src/backend/opencl/cl/cn/algorithm.cl @@ -24,6 +24,7 @@ #define ALGO_RX_SFX 23 #define ALGO_AR2_CHUKWA 24 #define ALGO_AR2_WRKZ 25 +#define ALGO_ASTROBWT_DERO 26 #define FAMILY_UNKNOWN 0 #define FAMILY_CN 1 @@ -32,3 +33,4 @@ #define FAMILY_CN_PICO 4 #define FAMILY_RANDOM_X 5 #define FAMILY_ARGON2 6 +#define FAMILY_ASTROBWT 7 diff --git a/src/base/net/http/HttpClient.cpp b/src/base/net/http/HttpClient.cpp index 364b3248a..24c3f3caf 100644 --- a/src/base/net/http/HttpClient.cpp +++ b/src/base/net/http/HttpClient.cpp @@ -140,6 +140,7 @@ void xmrig::HttpClient::handshake() if (!body.empty()) { headers.insert({ "Content-Length", std::to_string(body.size()) }); + headers.insert({ "Content-Type", "application/json" }); } std::stringstream ss; diff --git a/src/base/net/stratum/DaemonClient.cpp b/src/base/net/stratum/DaemonClient.cpp index bf22e6111..b59272c76 100644 --- a/src/base/net/stratum/DaemonClient.cpp +++ b/src/base/net/stratum/DaemonClient.cpp @@ -59,12 +59,14 @@ static const char *kHash = "hash"; static const char *kHeight = "height"; static const char *kJsonRPC = "/json_rpc"; +static const size_t BlobReserveSize = 8; + } xmrig::DaemonClient::DaemonClient(int id, IClientListener *listener) : BaseClient(id, listener), - m_monero(true) + m_apiVersion(API_MONERO) { m_httpListener = std::make_shared<HttpListener>(this); m_timer = new Timer(this); @@ -106,14 +108,21 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result) # ifdef XMRIG_PROXY_PROJECT memcpy(m_blocktemplate.data() + 78, result.nonce, 8); # else - Buffer::toHex(reinterpret_cast<const uint8_t *>(&result.nonce), 4, m_blocktemplate.data() + 78); + char* data = (m_apiVersion == API_DERO) ? m_blockhashingblob.data() : m_blocktemplate.data(); + Buffer::toHex(reinterpret_cast<const uint8_t *>(&result.nonce), 4, data + 78); # endif using namespace rapidjson; Document doc(kObjectType); Value params(kArrayType); - params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator()); + if (m_apiVersion == API_DERO) { + params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator()); + params.PushBack(m_blockhashingblob.toJSON(), doc.GetAllocator()); + } + else { + params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator()); + } JsonRequest::create(doc, m_sequence, "submitblock", params); @@ -131,6 +140,10 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result) void xmrig::DaemonClient::connect() { + if ((m_pool.algorithm() == Algorithm::ASTROBWT_DERO) || (m_pool.coin() == Coin::DERO)) { + m_apiVersion = API_DERO; + } + setState(ConnectingState); getBlockTemplate(); } @@ -172,7 +185,7 @@ void xmrig::DaemonClient::onHttpData(const HttpData &data) if (data.method == HTTP_GET) { if (data.url == kGetHeight) { if (!doc.HasMember(kHash)) { - m_monero = false; + m_apiVersion = API_CRYPTONOTE_DEFAULT; return send(HTTP_GET, kGetInfo); } @@ -200,7 +213,21 @@ void xmrig::DaemonClient::onTimer(const Timer *) getBlockTemplate(); } else if (m_state == ConnectedState) { - send(HTTP_GET, m_monero ? kGetHeight : kGetInfo); + if (m_apiVersion == API_DERO) { + using namespace rapidjson; + Document doc(kObjectType); + auto& allocator = doc.GetAllocator(); + + doc.AddMember("id", m_sequence, allocator); + doc.AddMember("jsonrpc", "2.0", allocator); + doc.AddMember("method", "get_info", allocator); + + send(HTTP_POST, kJsonRPC, doc); + ++m_sequence; + } + else { + send(HTTP_GET, (m_apiVersion == API_MONERO) ? kGetHeight : kGetInfo); + } } } @@ -216,7 +243,14 @@ bool xmrig::DaemonClient::parseJob(const rapidjson::Value ¶ms, int *code) Job job(false, m_pool.algorithm(), String()); String blocktemplate = Json::getString(params, kBlocktemplateBlob); - if (blocktemplate.isNull() || !job.setBlob(Json::getString(params, "blockhashing_blob"))) { + + m_blockhashingblob = Json::getString(params, "blockhashing_blob"); + if (m_apiVersion == API_DERO) { + const uint64_t offset = Json::getUint64(params, "reserved_offset"); + Buffer::toHex(Buffer::randomBytes(BlobReserveSize).data(), BlobReserveSize, m_blockhashingblob.data() + offset * 2); + } + + if (blocktemplate.isNull() || !job.setBlob(m_blockhashingblob)) { *code = 4; return false; } @@ -263,6 +297,13 @@ bool xmrig::DaemonClient::parseResponse(int64_t id, const rapidjson::Value &resu return false; } + if (result.HasMember("top_block_hash")) { + if (m_prevHash != Json::getString(result, "top_block_hash")) { + getBlockTemplate(); + } + return true; + } + int code = -1; if (result.HasMember(kBlocktemplateBlob) && parseJob(result, &code)) { return true; @@ -286,7 +327,12 @@ int64_t xmrig::DaemonClient::getBlockTemplate() Value params(kObjectType); params.AddMember("wallet_address", m_user.toJSON(), allocator); - params.AddMember("extra_nonce", Buffer::randomBytes(8).toHex().toJSON(doc), allocator); + if (m_apiVersion == API_DERO) { + params.AddMember("reserve_size", BlobReserveSize, allocator); + } + else { + params.AddMember("extra_nonce", Buffer::randomBytes(BlobReserveSize).toHex().toJSON(doc), allocator); + } JsonRequest::create(doc, m_sequence, "getblocktemplate", params); diff --git a/src/base/net/stratum/DaemonClient.h b/src/base/net/stratum/DaemonClient.h index adaef14e0..0b14f5dda 100644 --- a/src/base/net/stratum/DaemonClient.h +++ b/src/base/net/stratum/DaemonClient.h @@ -76,9 +76,15 @@ private: void send(int method, const char *url, const rapidjson::Document &doc); void setState(SocketState state); - bool m_monero; + enum { + API_CRYPTONOTE_DEFAULT, + API_MONERO, + API_DERO, + } m_apiVersion; + std::shared_ptr<IHttpListener> m_httpListener; String m_blocktemplate; + String m_blockhashingblob; String m_prevHash; String m_tlsFingerprint; String m_tlsVersion; diff --git a/src/crypto/astrobwt/AstroBWT.cpp b/src/crypto/astrobwt/AstroBWT.cpp new file mode 100644 index 000000000..b4595f87a --- /dev/null +++ b/src/crypto/astrobwt/AstroBWT.cpp @@ -0,0 +1,207 @@ +/* XMRig + * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> + * Copyright 2012-2014 pooler <pooler@litecoinpool.org> + * Copyright 2014 Lucas Jones <https://github.com/lucasjones> + * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> + * Copyright 2016 Jay D Dee <jayddee246@gmail.com> + * Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> + * Copyright 2018 Lee Clagett <https://github.com/vtnerd> + * Copyright 2018-2019 tevador <tevador@gmail.com> + * Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools> + * Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools> + * Copyright 2018-2020 SChernykh <https://github.com/SChernykh> + * Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include "AstroBWT.h" +#include "sha3.h" +#include "crypto/cn/CryptoNight.h" + +constexpr int STAGE1_SIZE = 147253; +constexpr int ALLOCATION_SIZE = (STAGE1_SIZE + 1048576) + (128 - (STAGE1_SIZE & 63)); + +constexpr int COUNTING_SORT_BITS = 10; +constexpr int COUNTING_SORT_SIZE = 1 << COUNTING_SORT_BITS; + +#ifdef _MSC_VER + +#include <stdlib.h> +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined __GNUC__ + +#define bswap_64(x) __builtin_bswap64(x) + +#else + +#include <byteswap.h> + +#endif + +#ifdef XMRIG_ARM +extern "C" { +#include "salsa20_ref/ecrypt-sync.h" +} + +static void Salsa20_XORKeyStream(const void* key, void* output, size_t size) +{ + uint8_t iv[8] = {}; + ECRYPT_ctx ctx; + ECRYPT_keysetup(&ctx, static_cast<const uint8_t*>(key), 256, 64); + ECRYPT_ivsetup(&ctx, iv); + ECRYPT_keystream_bytes(&ctx, static_cast<uint8_t*>(output), size); + memset(static_cast<uint8_t*>(output) + size, 0, 16); +} +#else +#include "Salsa20.hpp" + +static void Salsa20_XORKeyStream(const void* key, void* output, size_t size) +{ + const uint64_t iv = 0; + ZeroTier::Salsa20 s(key, &iv); + s.XORKeyStream(output, size); + memset(static_cast<uint8_t*>(output) + size, 0, 16); +} +#endif + +void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices) +{ + uint32_t counters[2][COUNTING_SORT_SIZE] = {}; + + for (int i = 0; i < N; ++i) + { + const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i)); + ++counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]; + ++counters[1][k >> (64 - COUNTING_SORT_BITS)]; + } + + uint32_t prev[2] = { counters[0][0], counters[1][0] }; + counters[0][0] = prev[0] - 1; + counters[1][0] = prev[1] - 1; + for (int i = 1; i < COUNTING_SORT_SIZE; ++i) + { + const uint32_t cur[2] = { counters[0][i] + prev[0], counters[1][i] + prev[1] }; + counters[0][i] = cur[0] - 1; + counters[1][i] = cur[1] - 1; + prev[0] = cur[0]; + prev[1] = cur[1]; + } + + for (int i = N - 1; i >= 0; --i) + { + const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i)); + tmp_indices[counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]--] = (k & (static_cast<uint64_t>(-1) << 21)) | i; + } + + for (int i = N - 1; i >= 0; --i) + { + const uint64_t data = tmp_indices[i]; + indices[counters[1][data >> (64 - COUNTING_SORT_BITS)]--] = data; + } + + auto smaller = [v](uint64_t a, uint64_t b) + { + const uint64_t value_a = a >> 21; + const uint64_t value_b = b >> 21; + + if (value_a < value_b) + return true; + + if (value_a > value_b) + return false; + + const uint64_t data_a = bswap_64(*reinterpret_cast<const uint64_t*>(v + (a % (1 << 21)) + 5)); + const uint64_t data_b = bswap_64(*reinterpret_cast<const uint64_t*>(v + (b % (1 << 21)) + 5)); + return (data_a < data_b); + }; + + uint64_t prev_t = indices[0]; + for (int i = 1; i < N; ++i) + { + uint64_t t = indices[i]; + if (smaller(t, prev_t)) + { + const uint64_t t2 = prev_t; + int j = i - 1; + do + { + indices[j + 1] = prev_t; + --j; + if (j < 0) + break; + prev_t = indices[j]; + } while (smaller(t, prev_t)); + indices[j + 1] = t; + t = t2; + } + prev_t = t; + } +} + +void astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash) +{ + uint8_t key[32]; + uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64; + uint8_t* stage1_output = scratchpad_ptr; + uint8_t* stage2_output = scratchpad_ptr; + uint64_t* indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE); + uint64_t* tmp_indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE * 9); + uint8_t* stage1_result = (uint8_t*)(tmp_indices); + uint8_t* stage2_result = (uint8_t*)(tmp_indices); + + sha3_HashBuffer(256, SHA3_FLAGS_NONE, input_data, input_size, key, sizeof(key)); + + Salsa20_XORKeyStream(key, stage1_output, STAGE1_SIZE); + + sort_indices(STAGE1_SIZE + 1, stage1_output, indices, tmp_indices); + + { + const uint8_t* tmp = stage1_output - 1; + for (int i = 0; i <= STAGE1_SIZE; ++i) + stage1_result[i] = tmp[indices[i] & ((1 << 21) - 1)]; + } + + sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage1_result, STAGE1_SIZE + 1, key, sizeof(key)); + + const int stage2_size = STAGE1_SIZE + (*(uint32_t*)(key) & 0xfffff); + Salsa20_XORKeyStream(key, stage2_output, stage2_size); + + sort_indices(stage2_size + 1, stage2_output, indices, tmp_indices); + + { + const uint8_t* tmp = stage2_output - 1; + int i = 0; + const int n = ((stage2_size + 1) / 4) * 4; + for (; i < n; i += 4) + { + stage2_result[i + 0] = tmp[indices[i + 0] & ((1 << 21) - 1)]; + stage2_result[i + 1] = tmp[indices[i + 1] & ((1 << 21) - 1)]; + stage2_result[i + 2] = tmp[indices[i + 2] & ((1 << 21) - 1)]; + stage2_result[i + 3] = tmp[indices[i + 3] & ((1 << 21) - 1)]; + } + for (; i <= stage2_size; ++i) + stage2_result[i] = tmp[indices[i] & ((1 << 21) - 1)]; + } + + sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage2_result, stage2_size + 1, output_hash, 32); +} + +template<> +void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t) +{ + astrobwt_dero(input, static_cast<uint32_t>(size), ctx[0]->memory, output); +} diff --git a/src/crypto/astrobwt/AstroBWT.h b/src/crypto/astrobwt/AstroBWT.h new file mode 100644 index 000000000..66a69cd5a --- /dev/null +++ b/src/crypto/astrobwt/AstroBWT.h @@ -0,0 +1,45 @@ +/* XMRig + * Copyright 2010 Jeff Garzik <jgarzik@pobox.com> + * Copyright 2012-2014 pooler <pooler@litecoinpool.org> + * Copyright 2014 Lucas Jones <https://github.com/lucasjones> + * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> + * Copyright 2016 Jay D Dee <jayddee246@gmail.com> + * Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> + * Copyright 2018 Lee Clagett <https://github.com/vtnerd> + * Copyright 2018-2019 tevador <tevador@gmail.com> + * Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools> + * Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools> + * Copyright 2018-2020 SChernykh <https://github.com/SChernykh> + * Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include "crypto/common/Algorithm.h" + +struct cryptonight_ctx; + + +namespace xmrig { namespace astrobwt { + + +template<Algorithm::Id ALGO> +void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t); + +template<> +void single_hash<Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t); + + +}} // namespace xmrig::argon2 diff --git a/src/crypto/astrobwt/Salsa20.cpp b/src/crypto/astrobwt/Salsa20.cpp new file mode 100644 index 000000000..a355b187d --- /dev/null +++ b/src/crypto/astrobwt/Salsa20.cpp @@ -0,0 +1,352 @@ +/* + * Based on public domain code available at: http://cr.yp.to/snuffle.html + * + * Modifications and C-native SSE macro based SSE implementation by + * Adam Ierymenko <adam.ierymenko@zerotier.com>. + * + * Additional modifications and code cleanup for AstroBWT by + * SChernykh <https://github.com/SChernykh> + * + * Since the original was public domain, this is too. + */ + +#include "Salsa20.hpp" + +// Statically compute and define SSE constants +class _s20sseconsts +{ +public: + _s20sseconsts() + { + maskLo32 = _mm_shuffle_epi32(_mm_cvtsi32_si128(-1), _MM_SHUFFLE(1, 0, 1, 0)); + maskHi32 = _mm_slli_epi64(maskLo32, 32); + } + __m128i maskLo32,maskHi32; +}; +static const _s20sseconsts _S20SSECONSTANTS; + +namespace ZeroTier { + +void Salsa20::init(const void *key,const void *iv) +{ + const uint32_t *const k = (const uint32_t *)key; + _state.i[0] = 0x61707865; + _state.i[1] = 0x3320646e; + _state.i[2] = 0x79622d32; + _state.i[3] = 0x6b206574; + _state.i[4] = k[3]; + _state.i[5] = 0; + _state.i[6] = k[7]; + _state.i[7] = k[2]; + _state.i[8] = 0; + _state.i[9] = k[6]; + _state.i[10] = k[1]; + _state.i[11] = ((const uint32_t *)iv)[1]; + _state.i[12] = k[5]; + _state.i[13] = k[0]; + _state.i[14] = ((const uint32_t *)iv)[0]; + _state.i[15] = k[4]; +} + +void Salsa20::XORKeyStream(void *out,unsigned int bytes) +{ + uint8_t tmp[64]; + uint8_t *c = (uint8_t *)out; + uint8_t *ctarget = c; + unsigned int i; + + if (!bytes) + return; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) + tmp[i] = 0; + ctarget = c; + c = tmp; + } + + __m128i X0 = _mm_loadu_si128((const __m128i *)&(_state.v[0])); + __m128i X1 = _mm_loadu_si128((const __m128i *)&(_state.v[1])); + __m128i X2 = _mm_loadu_si128((const __m128i *)&(_state.v[2])); + __m128i X3 = _mm_loadu_si128((const __m128i *)&(_state.v[3])); + __m128i T; + __m128i X0s = X0; + __m128i X1s = X1; + __m128i X2s = X2; + __m128i X3s = X3; + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + // 2X round ------------------------------------------------------------- + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14)); + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + + X0 = _mm_add_epi32(X0s,X0); + X1 = _mm_add_epi32(X1s,X1); + X2 = _mm_add_epi32(X2s,X2); + X3 = _mm_add_epi32(X3s,X3); + + __m128i k02 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32)), _MM_SHUFFLE(0, 1, 2, 3)); + __m128i k13 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32)), _MM_SHUFFLE(0, 1, 2, 3)); + __m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32)); + __m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32)); + _mm_storeu_ps(reinterpret_cast<float *>(c),_mm_castsi128_ps(_mm_unpackhi_epi64(k02,k20))); + _mm_storeu_ps(reinterpret_cast<float *>(c) + 4,_mm_castsi128_ps(_mm_unpackhi_epi64(k13,k31))); + _mm_storeu_ps(reinterpret_cast<float *>(c) + 8,_mm_castsi128_ps(_mm_unpacklo_epi64(k20,k02))); + _mm_storeu_ps(reinterpret_cast<float *>(c) + 12,_mm_castsi128_ps(_mm_unpacklo_epi64(k31,k13))); + + if (!(++_state.i[8])) { + ++_state.i[5]; // state reordered for SSE + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) + ctarget[i] = c[i]; + } + + return; + } + + bytes -= 64; + c += 64; + } +} + +} // namespace ZeroTier diff --git a/src/crypto/astrobwt/Salsa20.hpp b/src/crypto/astrobwt/Salsa20.hpp new file mode 100644 index 000000000..1313fdb4e --- /dev/null +++ b/src/crypto/astrobwt/Salsa20.hpp @@ -0,0 +1,52 @@ +/* + * Based on public domain code available at: http://cr.yp.to/snuffle.html + * + * This therefore is public domain. + */ + +#ifndef ZT_SALSA20_HPP +#define ZT_SALSA20_HPP + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <emmintrin.h> + +namespace ZeroTier { + +/** + * Salsa20 stream cipher + */ +class Salsa20 +{ +public: + /** + * @param key 256-bit (32 byte) key + * @param iv 64-bit initialization vector + */ + Salsa20(const void *key,const void *iv) + { + init(key,iv); + } + + /** + * Initialize cipher + * + * @param key Key bits + * @param iv 64-bit initialization vector + */ + void init(const void *key,const void *iv); + + void XORKeyStream(void *out,unsigned int bytes); + +private: + union { + __m128i v[4]; + uint32_t i[16]; + } _state; +}; + +} // namespace ZeroTier + +#endif diff --git a/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h b/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h new file mode 100644 index 000000000..a37e5e2cf --- /dev/null +++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h @@ -0,0 +1,272 @@ +/* ecrypt-config.h */ + +/* *** Normally, it should not be necessary to edit this file. *** */ + +#ifndef ECRYPT_CONFIG +#define ECRYPT_CONFIG + +/* ------------------------------------------------------------------------- */ + +/* Guess the endianness of the target architecture. */ + +/* + * The LITTLE endian machines: + */ +#if defined(__ultrix) /* Older MIPS */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__alpha) /* Alpha */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(i386) /* x86 (gcc) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__i386) /* x86 (gcc) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(_M_IX86) /* x86 (MSC, Borland) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(_MSC_VER) /* x86 (surely MSC) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */ +#define ECRYPT_LITTLE_ENDIAN + +/* + * The BIG endian machines: + */ +#elif defined(sun) /* Newer Sparc's */ +#define ECRYPT_BIG_ENDIAN +#elif defined(__ppc__) /* PowerPC */ +#define ECRYPT_BIG_ENDIAN + +/* + * Finally machines with UNKNOWN endianness: + */ +#elif defined (_AIX) /* RS6000 */ +#define ECRYPT_UNKNOWN +#elif defined(__hpux) /* HP-PA */ +#define ECRYPT_UNKNOWN +#elif defined(__aux) /* 68K */ +#define ECRYPT_UNKNOWN +#elif defined(__dgux) /* 88K (but P6 in latest boxes) */ +#define ECRYPT_UNKNOWN +#elif defined(__sgi) /* Newer MIPS */ +#define ECRYPT_UNKNOWN +#else /* Any other processor */ +#define ECRYPT_UNKNOWN +#endif + +/* ------------------------------------------------------------------------- */ + +/* + * Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit + * integers. + * + * Note: to enable 64-bit types on 32-bit compilers, it might be + * necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc + * -std=c99). + */ + +#include <limits.h> + +/* --- check char --- */ + +#if (UCHAR_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T char +#define U8C(v) (v##U) + +#if (UCHAR_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (UCHAR_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T char +#define U16C(v) (v##U) +#endif + +#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T char +#define U32C(v) (v##U) +#endif + +#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T char +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check short --- */ + +#if (USHRT_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T short +#define U8C(v) (v##U) + +#if (USHRT_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (USHRT_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T short +#define U16C(v) (v##U) +#endif + +#if (USHRT_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T short +#define U32C(v) (v##U) +#endif + +#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T short +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check int --- */ + +#if (UINT_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T int +#define U8C(v) (v##U) + +#if (ULONG_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (UINT_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T int +#define U16C(v) (v##U) +#endif + +#if (UINT_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T int +#define U32C(v) (v##U) +#endif + +#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T int +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check long --- */ + +#if (ULONG_MAX / 0xFUL > 0xFUL) +#ifndef I8T +#define I8T long +#define U8C(v) (v##UL) + +#if (ULONG_MAX == 0xFFUL) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (ULONG_MAX / 0xFFUL > 0xFFUL) +#ifndef I16T +#define I16T long +#define U16C(v) (v##UL) +#endif + +#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL) +#ifndef I32T +#define I32T long +#define U32C(v) (v##UL) +#endif + +#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL) +#ifndef I64T +#define I64T long +#define U64C(v) (v##UL) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check long long --- */ + +#ifdef ULLONG_MAX + +#if (ULLONG_MAX / 0xFULL > 0xFULL) +#ifndef I8T +#define I8T long long +#define U8C(v) (v##ULL) + +#if (ULLONG_MAX == 0xFFULL) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (ULLONG_MAX / 0xFFULL > 0xFFULL) +#ifndef I16T +#define I16T long long +#define U16C(v) (v##ULL) +#endif + +#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL) +#ifndef I32T +#define I32T long long +#define U32C(v) (v##ULL) +#endif + +#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL) +#ifndef I64T +#define I64T long long +#define U64C(v) (v##ULL) +#endif + +#endif +#endif +#endif +#endif + +#endif + +/* --- check __int64 --- */ + +#ifdef _UI64_MAX + +#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64) +#ifndef I64T +#define I64T __int64 +#define U64C(v) (v##ui64) +#endif + +#endif + +#endif + +/* ------------------------------------------------------------------------- */ + +#endif diff --git a/src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h b/src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h new file mode 100644 index 000000000..3e550d024 --- /dev/null +++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h @@ -0,0 +1,46 @@ +/* ecrypt-machine.h */ + +/* + * This file is included by 'ecrypt-portable.h'. It allows to override + * the default macros for specific platforms. Please carefully check + * the machine code generated by your compiler (with optimisations + * turned on) before deciding to edit this file. + */ + +/* ------------------------------------------------------------------------- */ + +#if (defined(ECRYPT_DEFAULT_ROT) && !defined(ECRYPT_MACHINE_ROT)) + +#define ECRYPT_MACHINE_ROT + +#if (defined(WIN32) && defined(_MSC_VER)) + +#undef ROTL32 +#undef ROTR32 +#undef ROTL64 +#undef ROTR64 + +#include <stdlib.h> + +#define ROTL32(v, n) _lrotl(v, n) +#define ROTR32(v, n) _lrotr(v, n) +#define ROTL64(v, n) _rotl64(v, n) +#define ROTR64(v, n) _rotr64(v, n) + +#endif + +#endif + +/* ------------------------------------------------------------------------- */ + +#if (defined(ECRYPT_DEFAULT_SWAP) && !defined(ECRYPT_MACHINE_SWAP)) + +#define ECRYPT_MACHINE_SWAP + +/* + * If you want to overwrite the default swap macros, put it here. And so on. + */ + +#endif + +/* ------------------------------------------------------------------------- */ diff --git a/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h b/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h new file mode 100644 index 000000000..72a92833c --- /dev/null +++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h @@ -0,0 +1,303 @@ +/* ecrypt-portable.h */ + +/* + * WARNING: the conversions defined below are implemented as macros, + * and should be used carefully. They should NOT be used with + * parameters which perform some action. E.g., the following two lines + * are not equivalent: + * + * 1) ++x; y = ROTL32(x, n); + * 2) y = ROTL32(++x, n); + */ + +/* + * *** Please do not edit this file. *** + * + * The default macros can be overridden for specific architectures by + * editing 'ecrypt-machine.h'. + */ + +#ifndef ECRYPT_PORTABLE +#define ECRYPT_PORTABLE + +#include "ecrypt-config.h" + +/* ------------------------------------------------------------------------- */ + +/* + * The following types are defined (if available): + * + * u8: unsigned integer type, at least 8 bits + * u16: unsigned integer type, at least 16 bits + * u32: unsigned integer type, at least 32 bits + * u64: unsigned integer type, at least 64 bits + * + * s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64 + * + * The selection of minimum-width integer types is taken care of by + * 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit + * compilers, it might be necessary to switch from ISO C90 mode to ISO + * C99 mode (e.g., gcc -std=c99). + */ + +#ifdef I8T +typedef signed I8T s8; +typedef unsigned I8T u8; +#endif + +#ifdef I16T +typedef signed I16T s16; +typedef unsigned I16T u16; +#endif + +#ifdef I32T +typedef signed I32T s32; +typedef unsigned I32T u32; +#endif + +#ifdef I64T +typedef signed I64T s64; +typedef unsigned I64T u64; +#endif + +/* + * The following macros are used to obtain exact-width results. + */ + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U16V(v) ((u16)(v) & U16C(0xFFFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) +#define U64V(v) ((u64)(v) & U64C(0xFFFFFFFFFFFFFFFF)) + +/* ------------------------------------------------------------------------- */ + +/* + * The following macros return words with their bits rotated over n + * positions to the left/right. + */ + +#define ECRYPT_DEFAULT_ROT + +#define ROTL8(v, n) \ + (U8V((v) << (n)) | ((v) >> (8 - (n)))) + +#define ROTL16(v, n) \ + (U16V((v) << (n)) | ((v) >> (16 - (n)))) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define ROTL64(v, n) \ + (U64V((v) << (n)) | ((v) >> (64 - (n)))) + +#define ROTR8(v, n) ROTL8(v, 8 - (n)) +#define ROTR16(v, n) ROTL16(v, 16 - (n)) +#define ROTR32(v, n) ROTL32(v, 32 - (n)) +#define ROTR64(v, n) ROTL64(v, 64 - (n)) + +#include "ecrypt-machine.h" + +/* ------------------------------------------------------------------------- */ + +/* + * The following macros return a word with bytes in reverse order. + */ + +#define ECRYPT_DEFAULT_SWAP + +#define SWAP16(v) \ + ROTL16(v, 8) + +#define SWAP32(v) \ + ((ROTL32(v, 8) & U32C(0x00FF00FF)) | \ + (ROTL32(v, 24) & U32C(0xFF00FF00))) + +#ifdef ECRYPT_NATIVE64 +#define SWAP64(v) \ + ((ROTL64(v, 8) & U64C(0x000000FF000000FF)) | \ + (ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \ + (ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \ + (ROTL64(v, 56) & U64C(0xFF000000FF000000))) +#else +#define SWAP64(v) \ + (((u64)SWAP32(U32V(v)) << 32) | (u64)SWAP32(U32V(v >> 32))) +#endif + +#include "ecrypt-machine.h" + +#define ECRYPT_DEFAULT_WTOW + +#ifdef ECRYPT_LITTLE_ENDIAN +#define U16TO16_LITTLE(v) (v) +#define U32TO32_LITTLE(v) (v) +#define U64TO64_LITTLE(v) (v) + +#define U16TO16_BIG(v) SWAP16(v) +#define U32TO32_BIG(v) SWAP32(v) +#define U64TO64_BIG(v) SWAP64(v) +#endif + +#ifdef ECRYPT_BIG_ENDIAN +#define U16TO16_LITTLE(v) SWAP16(v) +#define U32TO32_LITTLE(v) SWAP32(v) +#define U64TO64_LITTLE(v) SWAP64(v) + +#define U16TO16_BIG(v) (v) +#define U32TO32_BIG(v) (v) +#define U64TO64_BIG(v) (v) +#endif + +#include "ecrypt-machine.h" + +/* + * The following macros load words from an array of bytes with + * different types of endianness, and vice versa. + */ + +#define ECRYPT_DEFAULT_BTOW + +#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE)) + +#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16*)(p))[0]) +#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32*)(p))[0]) +#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64*)(p))[0]) + +#define U8TO16_BIG(p) U16TO16_BIG(((u16*)(p))[0]) +#define U8TO32_BIG(p) U32TO32_BIG(((u32*)(p))[0]) +#define U8TO64_BIG(p) U64TO64_BIG(((u64*)(p))[0]) + +#define U16TO8_LITTLE(p, v) (((u16*)(p))[0] = U16TO16_LITTLE(v)) +#define U32TO8_LITTLE(p, v) (((u32*)(p))[0] = U32TO32_LITTLE(v)) +#define U64TO8_LITTLE(p, v) (((u64*)(p))[0] = U64TO64_LITTLE(v)) + +#define U16TO8_BIG(p, v) (((u16*)(p))[0] = U16TO16_BIG(v)) +#define U32TO8_BIG(p, v) (((u32*)(p))[0] = U32TO32_BIG(v)) +#define U64TO8_BIG(p, v) (((u64*)(p))[0] = U64TO64_BIG(v)) + +#else + +#define U8TO16_LITTLE(p) \ + (((u16)((p)[0]) ) | \ + ((u16)((p)[1]) << 8)) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#ifdef ECRYPT_NATIVE64 +#define U8TO64_LITTLE(p) \ + (((u64)((p)[0]) ) | \ + ((u64)((p)[1]) << 8) | \ + ((u64)((p)[2]) << 16) | \ + ((u64)((p)[3]) << 24) | \ + ((u64)((p)[4]) << 32) | \ + ((u64)((p)[5]) << 40) | \ + ((u64)((p)[6]) << 48) | \ + ((u64)((p)[7]) << 56)) +#else +#define U8TO64_LITTLE(p) \ + ((u64)U8TO32_LITTLE(p) | ((u64)U8TO32_LITTLE((p) + 4) << 32)) +#endif + +#define U8TO16_BIG(p) \ + (((u16)((p)[0]) << 8) | \ + ((u16)((p)[1]) )) + +#define U8TO32_BIG(p) \ + (((u32)((p)[0]) << 24) | \ + ((u32)((p)[1]) << 16) | \ + ((u32)((p)[2]) << 8) | \ + ((u32)((p)[3]) )) + +#ifdef ECRYPT_NATIVE64 +#define U8TO64_BIG(p) \ + (((u64)((p)[0]) << 56) | \ + ((u64)((p)[1]) << 48) | \ + ((u64)((p)[2]) << 40) | \ + ((u64)((p)[3]) << 32) | \ + ((u64)((p)[4]) << 24) | \ + ((u64)((p)[5]) << 16) | \ + ((u64)((p)[6]) << 8) | \ + ((u64)((p)[7]) )) +#else +#define U8TO64_BIG(p) \ + (((u64)U8TO32_BIG(p) << 32) | (u64)U8TO32_BIG((p) + 4)) +#endif + +#define U16TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + } while (0) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#ifdef ECRYPT_NATIVE64 +#define U64TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + (p)[4] = U8V((v) >> 32); \ + (p)[5] = U8V((v) >> 40); \ + (p)[6] = U8V((v) >> 48); \ + (p)[7] = U8V((v) >> 56); \ + } while (0) +#else +#define U64TO8_LITTLE(p, v) \ + do { \ + U32TO8_LITTLE((p), U32V((v) )); \ + U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \ + } while (0) +#endif + +#define U16TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + } while (0) + +#define U32TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v) >> 24); \ + (p)[1] = U8V((v) >> 16); \ + (p)[2] = U8V((v) >> 8); \ + (p)[3] = U8V((v) ); \ + } while (0) + +#ifdef ECRYPT_NATIVE64 +#define U64TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v) >> 56); \ + (p)[1] = U8V((v) >> 48); \ + (p)[2] = U8V((v) >> 40); \ + (p)[3] = U8V((v) >> 32); \ + (p)[4] = U8V((v) >> 24); \ + (p)[5] = U8V((v) >> 16); \ + (p)[6] = U8V((v) >> 8); \ + (p)[7] = U8V((v) ); \ + } while (0) +#else +#define U64TO8_BIG(p, v) \ + do { \ + U32TO8_BIG((p), U32V((v) >> 32)); \ + U32TO8_BIG((p) + 4, U32V((v) )); \ + } while (0) +#endif + +#endif + +#include "ecrypt-machine.h" + +/* ------------------------------------------------------------------------- */ + +#endif diff --git a/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h b/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h new file mode 100644 index 000000000..7a837cbab --- /dev/null +++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h @@ -0,0 +1,279 @@ +/* ecrypt-sync.h */ + +/* + * Header file for synchronous stream ciphers without authentication + * mechanism. + * + * *** Please only edit parts marked with "[edit]". *** + */ + +#ifndef ECRYPT_SYNC +#define ECRYPT_SYNC + +#include "ecrypt-portable.h" + +/* ------------------------------------------------------------------------- */ + +/* Cipher parameters */ + +/* + * The name of your cipher. + */ +#define ECRYPT_NAME "Salsa20" /* [edit] */ +#define ECRYPT_PROFILE "S!_H." + +/* + * Specify which key and IV sizes are supported by your cipher. A user + * should be able to enumerate the supported sizes by running the + * following code: + * + * for (i = 0; ECRYPT_KEYSIZE(i) <= ECRYPT_MAXKEYSIZE; ++i) + * { + * keysize = ECRYPT_KEYSIZE(i); + * + * ... + * } + * + * All sizes are in bits. + */ + +#define ECRYPT_MAXKEYSIZE 256 /* [edit] */ +#define ECRYPT_KEYSIZE(i) (128 + (i)*128) /* [edit] */ + +#define ECRYPT_MAXIVSIZE 64 /* [edit] */ +#define ECRYPT_IVSIZE(i) (64 + (i)*64) /* [edit] */ + +/* ------------------------------------------------------------------------- */ + +/* Data structures */ + +/* + * ECRYPT_ctx is the structure containing the representation of the + * internal state of your cipher. + */ + +typedef struct +{ + u32 input[16]; /* could be compressed */ + /* + * [edit] + * + * Put here all state variable needed during the encryption process. + */ +} ECRYPT_ctx; + +/* ------------------------------------------------------------------------- */ + +/* Mandatory functions */ + +/* + * Key and message independent initialization. This function will be + * called once when the program starts (e.g., to build expanded S-box + * tables). + */ +void ECRYPT_init(); + +/* + * Key setup. It is the user's responsibility to select the values of + * keysize and ivsize from the set of supported values specified + * above. + */ +void ECRYPT_keysetup( + ECRYPT_ctx* ctx, + const u8* key, + u32 keysize, /* Key size in bits. */ + u32 ivsize); /* IV size in bits. */ + +/* + * IV setup. After having called ECRYPT_keysetup(), the user is + * allowed to call ECRYPT_ivsetup() different times in order to + * encrypt/decrypt different messages with the same key but different + * IV's. + */ +void ECRYPT_ivsetup( + ECRYPT_ctx* ctx, + const u8* iv); + +/* + * Encryption/decryption of arbitrary length messages. + * + * For efficiency reasons, the API provides two types of + * encrypt/decrypt functions. The ECRYPT_encrypt_bytes() function + * (declared here) encrypts byte strings of arbitrary length, while + * the ECRYPT_encrypt_blocks() function (defined later) only accepts + * lengths which are multiples of ECRYPT_BLOCKLENGTH. + * + * The user is allowed to make multiple calls to + * ECRYPT_encrypt_blocks() to incrementally encrypt a long message, + * but he is NOT allowed to make additional encryption calls once he + * has called ECRYPT_encrypt_bytes() (unless he starts a new message + * of course). For example, this sequence of calls is acceptable: + * + * ECRYPT_keysetup(); + * + * ECRYPT_ivsetup(); + * ECRYPT_encrypt_blocks(); + * ECRYPT_encrypt_blocks(); + * ECRYPT_encrypt_bytes(); + * + * ECRYPT_ivsetup(); + * ECRYPT_encrypt_blocks(); + * ECRYPT_encrypt_blocks(); + * + * ECRYPT_ivsetup(); + * ECRYPT_encrypt_bytes(); + * + * The following sequence is not: + * + * ECRYPT_keysetup(); + * ECRYPT_ivsetup(); + * ECRYPT_encrypt_blocks(); + * ECRYPT_encrypt_bytes(); + * ECRYPT_encrypt_blocks(); + */ + +void ECRYPT_encrypt_bytes( + ECRYPT_ctx* ctx, + const u8* plaintext, + u8* ciphertext, + u32 msglen); /* Message length in bytes. */ + +void ECRYPT_decrypt_bytes( + ECRYPT_ctx* ctx, + const u8* ciphertext, + u8* plaintext, + u32 msglen); /* Message length in bytes. */ + +/* ------------------------------------------------------------------------- */ + +/* Optional features */ + +/* + * For testing purposes it can sometimes be useful to have a function + * which immediately generates keystream without having to provide it + * with a zero plaintext. If your cipher cannot provide this function + * (e.g., because it is not strictly a synchronous cipher), please + * reset the ECRYPT_GENERATES_KEYSTREAM flag. + */ + +#define ECRYPT_GENERATES_KEYSTREAM +#ifdef ECRYPT_GENERATES_KEYSTREAM + +void ECRYPT_keystream_bytes( + ECRYPT_ctx* ctx, + u8* keystream, + u32 length); /* Length of keystream in bytes. */ + +#endif + +/* ------------------------------------------------------------------------- */ + +/* Optional optimizations */ + +/* + * By default, the functions in this section are implemented using + * calls to functions declared above. However, you might want to + * implement them differently for performance reasons. + */ + +/* + * All-in-one encryption/decryption of (short) packets. + * + * The default definitions of these functions can be found in + * "ecrypt-sync.c". If you want to implement them differently, please + * undef the ECRYPT_USES_DEFAULT_ALL_IN_ONE flag. + */ +#define ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */ + +void ECRYPT_encrypt_packet( + ECRYPT_ctx* ctx, + const u8* iv, + const u8* plaintext, + u8* ciphertext, + u32 msglen); + +void ECRYPT_decrypt_packet( + ECRYPT_ctx* ctx, + const u8* iv, + const u8* ciphertext, + u8* plaintext, + u32 msglen); + +/* + * Encryption/decryption of blocks. + * + * By default, these functions are defined as macros. If you want to + * provide a different implementation, please undef the + * ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions + * declared below. + */ + +#define ECRYPT_BLOCKLENGTH 64 /* [edit] */ + +#define ECRYPT_USES_DEFAULT_BLOCK_MACROS /* [edit] */ +#ifdef ECRYPT_USES_DEFAULT_BLOCK_MACROS + +#define ECRYPT_encrypt_blocks(ctx, plaintext, ciphertext, blocks) \ + ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, \ + (blocks) * ECRYPT_BLOCKLENGTH) + +#define ECRYPT_decrypt_blocks(ctx, ciphertext, plaintext, blocks) \ + ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext, \ + (blocks) * ECRYPT_BLOCKLENGTH) + +#ifdef ECRYPT_GENERATES_KEYSTREAM + +#define ECRYPT_keystream_blocks(ctx, keystream, blocks) \ + ECRYPT_keystream_bytes(ctx, keystream, \ + (blocks) * ECRYPT_BLOCKLENGTH) + +#endif + +#else + +void ECRYPT_encrypt_blocks( + ECRYPT_ctx* ctx, + const u8* plaintext, + u8* ciphertext, + u32 blocks); /* Message length in blocks. */ + +void ECRYPT_decrypt_blocks( + ECRYPT_ctx* ctx, + const u8* ciphertext, + u8* plaintext, + u32 blocks); /* Message length in blocks. */ + +#ifdef ECRYPT_GENERATES_KEYSTREAM + +void ECRYPT_keystream_blocks( + ECRYPT_ctx* ctx, + const u8* keystream, + u32 blocks); /* Keystream length in blocks. */ + +#endif + +#endif + +/* + * If your cipher can be implemented in different ways, you can use + * the ECRYPT_VARIANT parameter to allow the user to choose between + * them at compile time (e.g., gcc -DECRYPT_VARIANT=3 ...). Please + * only use this possibility if you really think it could make a + * significant difference and keep the number of variants + * (ECRYPT_MAXVARIANT) as small as possible (definitely not more than + * 10). Note also that all variants should have exactly the same + * external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.). + */ +#define ECRYPT_MAXVARIANT 1 /* [edit] */ + +#ifndef ECRYPT_VARIANT +#define ECRYPT_VARIANT 1 +#endif + +#if (ECRYPT_VARIANT > ECRYPT_MAXVARIANT) +#error this variant does not exist +#endif + +/* ------------------------------------------------------------------------- */ + +#endif diff --git a/src/crypto/astrobwt/salsa20_ref/salsa20.c b/src/crypto/astrobwt/salsa20_ref/salsa20.c new file mode 100644 index 000000000..bfced2223 --- /dev/null +++ b/src/crypto/astrobwt/salsa20_ref/salsa20.c @@ -0,0 +1,219 @@ +/* +salsa20-merged.c version 20051118 +D. J. Bernstein +Public domain. +*/ + +#include "ecrypt-sync.h" + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +void ECRYPT_init(void) +{ + return; +} + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +void ECRYPT_keysetup(ECRYPT_ctx *x,const u8 *k,u32 kbits,u32 ivbits) +{ + const char *constants; + + x->input[1] = U8TO32_LITTLE(k + 0); + x->input[2] = U8TO32_LITTLE(k + 4); + x->input[3] = U8TO32_LITTLE(k + 8); + x->input[4] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[11] = U8TO32_LITTLE(k + 0); + x->input[12] = U8TO32_LITTLE(k + 4); + x->input[13] = U8TO32_LITTLE(k + 8); + x->input[14] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[5] = U8TO32_LITTLE(constants + 4); + x->input[10] = U8TO32_LITTLE(constants + 8); + x->input[15] = U8TO32_LITTLE(constants + 12); +} + +void ECRYPT_ivsetup(ECRYPT_ctx *x,const u8 *iv) +{ + x->input[6] = U8TO32_LITTLE(iv + 0); + x->input[7] = U8TO32_LITTLE(iv + 4); + x->input[8] = 0; + x->input[9] = 0; +} + +void ECRYPT_encrypt_bytes(ECRYPT_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = 0; + u8 tmp[64]; + int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7)); + x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9)); + x12 = XOR(x12,ROTATE(PLUS( x8, x4),13)); + x0 = XOR( x0,ROTATE(PLUS(x12, x8),18)); + x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7)); + x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9)); + x1 = XOR( x1,ROTATE(PLUS(x13, x9),13)); + x5 = XOR( x5,ROTATE(PLUS( x1,x13),18)); + x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7)); + x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9)); + x6 = XOR( x6,ROTATE(PLUS( x2,x14),13)); + x10 = XOR(x10,ROTATE(PLUS( x6, x2),18)); + x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7)); + x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9)); + x11 = XOR(x11,ROTATE(PLUS( x7, x3),13)); + x15 = XOR(x15,ROTATE(PLUS(x11, x7),18)); + x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7)); + x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9)); + x3 = XOR( x3,ROTATE(PLUS( x2, x1),13)); + x0 = XOR( x0,ROTATE(PLUS( x3, x2),18)); + x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7)); + x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9)); + x4 = XOR( x4,ROTATE(PLUS( x7, x6),13)); + x5 = XOR( x5,ROTATE(PLUS( x4, x7),18)); + x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7)); + x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9)); + x9 = XOR( x9,ROTATE(PLUS( x8,x11),13)); + x10 = XOR(x10,ROTATE(PLUS( x9, x8),18)); + x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7)); + x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9)); + x14 = XOR(x14,ROTATE(PLUS(x13,x12),13)); + x15 = XOR(x15,ROTATE(PLUS(x14,x13),18)); + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); + + j8 = PLUSONE(j8); + if (!j8) { + j9 = PLUSONE(j9); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[8] = j8; + x->input[9] = j9; + return; + } + bytes -= 64; + c += 64; + m += 64; + } +} + +void ECRYPT_decrypt_bytes(ECRYPT_ctx *x,const u8 *c,u8 *m,u32 bytes) +{ + ECRYPT_encrypt_bytes(x,c,m,bytes); +} + +void ECRYPT_keystream_bytes(ECRYPT_ctx *x,u8 *stream,u32 bytes) +{ + u32 i; + for (i = 0; i < bytes; ++i) stream[i] = 0; + ECRYPT_encrypt_bytes(x,stream,stream,bytes); +} diff --git a/src/crypto/astrobwt/sha3.cpp b/src/crypto/astrobwt/sha3.cpp new file mode 100644 index 000000000..bc7a8dddb --- /dev/null +++ b/src/crypto/astrobwt/sha3.cpp @@ -0,0 +1,258 @@ +/* ------------------------------------------------------------------------- + * Works when compiled for either 32-bit or 64-bit targets, optimized for + * 64 bit. + * + * Canonical implementation of Init/Update/Finalize for SHA-3 byte input. + * + * SHA3-256, SHA3-384, SHA-512 are implemented. SHA-224 can easily be added. + * + * Based on code from http://keccak.noekeon.org/ . + * + * I place the code that I wrote into public domain, free to use. + * + * I would appreciate if you give credits to this work if you used it to + * write or test * your code. + * + * Aug 2015. Andrey Jivsov. crypto@brainhub.org + * ---------------------------------------------------------------------- */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +#include "sha3.h" +#include "crypto/common/keccak.h" + +#define SHA3_ASSERT( x ) +#if defined(_MSC_VER) +#define SHA3_TRACE( format, ...) +#define SHA3_TRACE_BUF( format, buf, l, ...) +#else +#define SHA3_TRACE(format, args...) +#define SHA3_TRACE_BUF(format, buf, l, args...) +#endif + +/* + * This flag is used to configure "pure" Keccak, as opposed to NIST SHA3. + */ +#define SHA3_USE_KECCAK_FLAG 0x80000000 +#define SHA3_CW(x) ((x) & (~SHA3_USE_KECCAK_FLAG)) + + +#if defined(_MSC_VER) +#define SHA3_CONST(x) x +#else +#define SHA3_CONST(x) x##L +#endif + +#define KECCAK_ROUNDS 24 + + +/* *************************** Public Inteface ************************ */ + +/* For Init or Reset call these: */ +sha3_return_t +sha3_Init(void *priv, unsigned bitSize) { + sha3_context *ctx = (sha3_context *) priv; + if( bitSize != 256 && bitSize != 384 && bitSize != 512 ) + return SHA3_RETURN_BAD_PARAMS; + memset(ctx, 0, sizeof(*ctx)); + ctx->capacityWords = 2 * bitSize / (8 * sizeof(uint64_t)); + return SHA3_RETURN_OK; +} + +void +sha3_Init256(void *priv) +{ + sha3_Init(priv, 256); +} + +void +sha3_Init384(void *priv) +{ + sha3_Init(priv, 384); +} + +void +sha3_Init512(void *priv) +{ + sha3_Init(priv, 512); +} + +SHA3_FLAGS +sha3_SetFlags(void *priv, SHA3_FLAGS flags) +{ + sha3_context *ctx = (sha3_context *) priv; + flags = static_cast<SHA3_FLAGS>(static_cast<int>(flags) & SHA3_FLAGS_KECCAK); + ctx->capacityWords |= (flags == SHA3_FLAGS_KECCAK ? SHA3_USE_KECCAK_FLAG : 0); + return flags; +} + + +void +sha3_Update(void *priv, void const *bufIn, size_t len) +{ + sha3_context *ctx = (sha3_context *) priv; + + /* 0...7 -- how much is needed to have a word */ + unsigned old_tail = (8 - ctx->byteIndex) & 7; + + size_t words; + unsigned tail; + size_t i; + + const uint8_t *buf = reinterpret_cast<const uint8_t*>(bufIn); + + SHA3_TRACE_BUF("called to update with:", buf, len); + + SHA3_ASSERT(ctx->byteIndex < 8); + SHA3_ASSERT(ctx->wordIndex < sizeof(ctx->s) / sizeof(ctx->s[0])); + + if(len < old_tail) { /* have no complete word or haven't started + * the word yet */ + SHA3_TRACE("because %d<%d, store it and return", (unsigned)len, + (unsigned)old_tail); + /* endian-independent code follows: */ + while (len--) + ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8); + SHA3_ASSERT(ctx->byteIndex < 8); + return; + } + + if(old_tail) { /* will have one word to process */ + SHA3_TRACE("completing one word with %d bytes", (unsigned)old_tail); + /* endian-independent code follows: */ + len -= old_tail; + while (old_tail--) + ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8); + + /* now ready to add saved to the sponge */ + ctx->s[ctx->wordIndex] ^= ctx->saved; + SHA3_ASSERT(ctx->byteIndex == 8); + ctx->byteIndex = 0; + ctx->saved = 0; + if(++ctx->wordIndex == + (SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) { + xmrig::keccakf(ctx->s, KECCAK_ROUNDS); + ctx->wordIndex = 0; + } + } + + /* now work in full words directly from input */ + + SHA3_ASSERT(ctx->byteIndex == 0); + + words = len / sizeof(uint64_t); + tail = len - words * sizeof(uint64_t); + + SHA3_TRACE("have %d full words to process", (unsigned)words); + + for(i = 0; i < words; i++, buf += sizeof(uint64_t)) { + const uint64_t t = (uint64_t) (buf[0]) | + ((uint64_t) (buf[1]) << 8 * 1) | + ((uint64_t) (buf[2]) << 8 * 2) | + ((uint64_t) (buf[3]) << 8 * 3) | + ((uint64_t) (buf[4]) << 8 * 4) | + ((uint64_t) (buf[5]) << 8 * 5) | + ((uint64_t) (buf[6]) << 8 * 6) | + ((uint64_t) (buf[7]) << 8 * 7); +#if defined(__x86_64__ ) || defined(__i386__) + SHA3_ASSERT(memcmp(&t, buf, 8) == 0); +#endif + ctx->s[ctx->wordIndex] ^= t; + if(++ctx->wordIndex == + (SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) { + xmrig::keccakf(ctx->s, KECCAK_ROUNDS); + ctx->wordIndex = 0; + } + } + + SHA3_TRACE("have %d bytes left to process, save them", (unsigned)tail); + + /* finally, save the partial word */ + SHA3_ASSERT(ctx->byteIndex == 0 && tail < 8); + while (tail--) { + SHA3_TRACE("Store byte %02x '%c'", *buf, *buf); + ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8); + } + SHA3_ASSERT(ctx->byteIndex < 8); + SHA3_TRACE("Have saved=0x%016" PRIx64 " at the end", ctx->saved); +} + +/* This is simply the 'update' with the padding block. + * The padding block is 0x01 || 0x00* || 0x80. First 0x01 and last 0x80 + * bytes are always present, but they can be the same byte. + */ +void const * +sha3_Finalize(void *priv) +{ + sha3_context *ctx = (sha3_context *) priv; + + SHA3_TRACE("called with %d bytes in the buffer", ctx->byteIndex); + + /* Append 2-bit suffix 01, per SHA-3 spec. Instead of 1 for padding we + * use 1<<2 below. The 0x02 below corresponds to the suffix 01. + * Overall, we feed 0, then 1, and finally 1 to start padding. Without + * M || 01, we would simply use 1 to start padding. */ + + uint64_t t; + + if( ctx->capacityWords & SHA3_USE_KECCAK_FLAG ) { + /* Keccak version */ + t = (uint64_t)(((uint64_t) 1) << (ctx->byteIndex * 8)); + } + else { + /* SHA3 version */ + t = (uint64_t)(((uint64_t)(0x02 | (1 << 2))) << ((ctx->byteIndex) * 8)); + } + + ctx->s[ctx->wordIndex] ^= ctx->saved ^ t; + + ctx->s[SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords) - 1] ^= + SHA3_CONST(0x8000000000000000UL); + xmrig::keccakf(ctx->s, KECCAK_ROUNDS); + + /* Return first bytes of the ctx->s. This conversion is not needed for + * little-endian platforms e.g. wrap with #if !defined(__BYTE_ORDER__) + * || !defined(__ORDER_LITTLE_ENDIAN__) || __BYTE_ORDER__!=__ORDER_LITTLE_ENDIAN__ + * ... the conversion below ... + * #endif */ + { + unsigned i; + for(i = 0; i < SHA3_KECCAK_SPONGE_WORDS; i++) { + const unsigned t1 = (uint32_t) ctx->s[i]; + const unsigned t2 = (uint32_t) ((ctx->s[i] >> 16) >> 16); + ctx->sb[i * 8 + 0] = (uint8_t) (t1); + ctx->sb[i * 8 + 1] = (uint8_t) (t1 >> 8); + ctx->sb[i * 8 + 2] = (uint8_t) (t1 >> 16); + ctx->sb[i * 8 + 3] = (uint8_t) (t1 >> 24); + ctx->sb[i * 8 + 4] = (uint8_t) (t2); + ctx->sb[i * 8 + 5] = (uint8_t) (t2 >> 8); + ctx->sb[i * 8 + 6] = (uint8_t) (t2 >> 16); + ctx->sb[i * 8 + 7] = (uint8_t) (t2 >> 24); + } + } + + SHA3_TRACE_BUF("Hash: (first 32 bytes)", ctx->sb, 256 / 8); + + return (ctx->sb); +} + +sha3_return_t sha3_HashBuffer( unsigned bitSize, enum SHA3_FLAGS flags, const void *in, unsigned inBytes, void *out, unsigned outBytes ) { + sha3_return_t err; + sha3_context c; + + err = sha3_Init(&c, bitSize); + if( err != SHA3_RETURN_OK ) + return err; + if( sha3_SetFlags(&c, flags) != flags ) { + return SHA3_RETURN_BAD_PARAMS; + } + sha3_Update(&c, in, inBytes); + const void *h = sha3_Finalize(&c); + + if(outBytes > bitSize/8) + outBytes = bitSize/8; + memcpy(out, h, outBytes); + return SHA3_RETURN_OK; +} diff --git a/src/crypto/astrobwt/sha3.h b/src/crypto/astrobwt/sha3.h new file mode 100644 index 000000000..491de05b2 --- /dev/null +++ b/src/crypto/astrobwt/sha3.h @@ -0,0 +1,71 @@ +#ifndef SHA3_H +#define SHA3_H + +/* ------------------------------------------------------------------------- + * Works when compiled for either 32-bit or 64-bit targets, optimized for + * 64 bit. + * + * Canonical implementation of Init/Update/Finalize for SHA-3 byte input. + * + * SHA3-256, SHA3-384, SHA-512 are implemented. SHA-224 can easily be added. + * + * Based on code from http://keccak.noekeon.org/ . + * + * I place the code that I wrote into public domain, free to use. + * + * I would appreciate if you give credits to this work if you used it to + * write or test * your code. + * + * Aug 2015. Andrey Jivsov. crypto@brainhub.org + * ---------------------------------------------------------------------- */ + +/* 'Words' here refers to uint64_t */ +#define SHA3_KECCAK_SPONGE_WORDS \ + (((1600)/8/*bits to byte*/)/sizeof(uint64_t)) +typedef struct sha3_context_ { + uint64_t saved; /* the portion of the input message that we + * didn't consume yet */ + union { /* Keccak's state */ + uint64_t s[SHA3_KECCAK_SPONGE_WORDS]; + uint8_t sb[SHA3_KECCAK_SPONGE_WORDS * 8]; + }; + unsigned byteIndex; /* 0..7--the next byte after the set one + * (starts from 0; 0--none are buffered) */ + unsigned wordIndex; /* 0..24--the next word to integrate input + * (starts from 0) */ + unsigned capacityWords; /* the double size of the hash output in + * words (e.g. 16 for Keccak 512) */ +} sha3_context; + +enum SHA3_FLAGS { + SHA3_FLAGS_NONE=0, + SHA3_FLAGS_KECCAK=1 +}; + +enum SHA3_RETURN { + SHA3_RETURN_OK=0, + SHA3_RETURN_BAD_PARAMS=1 +}; +typedef enum SHA3_RETURN sha3_return_t; + +/* For Init or Reset call these: */ +sha3_return_t sha3_Init(void *priv, unsigned bitSize); + +void sha3_Init256(void *priv); +void sha3_Init384(void *priv); +void sha3_Init512(void *priv); + +SHA3_FLAGS sha3_SetFlags(void *priv, SHA3_FLAGS); + +void sha3_Update(void *priv, void const *bufIn, size_t len); + +void const *sha3_Finalize(void *priv); + +/* Single-call hashing */ +sha3_return_t sha3_HashBuffer( + unsigned bitSize, /* 256, 384, 512 */ + SHA3_FLAGS flags, /* SHA3_FLAGS_NONE or SHA3_FLAGS_KECCAK */ + const void *in, unsigned inBytes, + void *out, unsigned outBytes ); /* up to bitSize/8; truncation OK */ + +#endif diff --git a/src/crypto/cn/CnHash.cpp b/src/crypto/cn/CnHash.cpp index 162c0c72c..309dbe3ec 100644 --- a/src/crypto/cn/CnHash.cpp +++ b/src/crypto/cn/CnHash.cpp @@ -43,6 +43,11 @@ #endif +#ifdef XMRIG_ALGO_ASTROBWT +# include "crypto/astrobwt/AstroBWT.h" +#endif + + #define ADD_FN(algo) \ m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \ m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \ @@ -277,6 +282,11 @@ xmrig::CnHash::CnHash() m_map[Algorithm::AR2_WRKZ][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_WRKZ>; # endif +# ifdef XMRIG_ALGO_ASTROBWT + m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>; + m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>; +# endif + # ifdef XMRIG_FEATURE_ASM patchAsmVariants(); # endif diff --git a/src/crypto/cn/CryptoNight_test.h b/src/crypto/cn/CryptoNight_test.h index 6b5e8c04e..3e055b981 100644 --- a/src/crypto/cn/CryptoNight_test.h +++ b/src/crypto/cn/CryptoNight_test.h @@ -404,6 +404,24 @@ const static uint8_t argon2_wrkz_test_out[160] = { #endif +#ifdef XMRIG_ALGO_ASTROBWT +// "astrobwt/dero" +const static uint8_t astrobwt_dero_test_out[160] = { + 0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90, + 0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +#endif + + } // namespace xmrig diff --git a/src/crypto/common/Algorithm.cpp b/src/crypto/common/Algorithm.cpp index 2a5f7d5c2..5eaf7905e 100644 --- a/src/crypto/common/Algorithm.cpp +++ b/src/crypto/common/Algorithm.cpp @@ -124,6 +124,9 @@ static AlgoName const algorithm_names[] = { { "chukwa", nullptr, Algorithm::AR2_CHUKWA }, { "argon2/wrkz", nullptr, Algorithm::AR2_WRKZ }, # endif +# ifdef XMRIG_ALGO_ASTROBWT + { "astrobwt/dero", nullptr, Algorithm::ASTROBWT_DERO }, +# endif }; @@ -210,6 +213,18 @@ size_t xmrig::Algorithm::l3() const } # endif +# ifdef XMRIG_ALGO_ASTROBWT + if (f == ASTROBWT) { + switch (m_id) { + case ASTROBWT_DERO: + return oneMiB * 20; + + default: + break; + } + } +# endif + return 0; } @@ -228,6 +243,12 @@ uint32_t xmrig::Algorithm::maxIntensity() const } # endif +# ifdef XMRIG_ALGO_ASTROBWT + if (family() == ASTROBWT) { + return 1; + } +# endif + # ifdef XMRIG_ALGO_CN_GPU if (m_id == CN_GPU) { return 1; @@ -291,6 +312,11 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id) return ARGON2; # endif +# ifdef XMRIG_ALGO_ASTROBWT + case ASTROBWT_DERO: + return ASTROBWT; +# endif + default: break; } diff --git a/src/crypto/common/Algorithm.h b/src/crypto/common/Algorithm.h index 7f87283a8..5973a343f 100644 --- a/src/crypto/common/Algorithm.h +++ b/src/crypto/common/Algorithm.h @@ -71,6 +71,7 @@ public: RX_SFX, // "rx/sfx" RandomSFX (Safex Cash). AR2_CHUKWA, // "argon2/chukwa" Argon2id (Chukwa). AR2_WRKZ, // "argon2/wrkz" Argon2id (WRKZ) + ASTROBWT_DERO, // "astrobwt/dero" AstroBWT (Dero) MAX }; @@ -81,7 +82,8 @@ public: CN_HEAVY, CN_PICO, RANDOM_X, - ARGON2 + ARGON2, + ASTROBWT }; inline Algorithm() = default; diff --git a/src/crypto/common/Coin.cpp b/src/crypto/common/Coin.cpp index 32a1ff568..1588c4f0c 100644 --- a/src/crypto/common/Coin.cpp +++ b/src/crypto/common/Coin.cpp @@ -50,7 +50,8 @@ static CoinName const coin_names[] = { { "monero", Coin::MONERO }, { "xmr", Coin::MONERO }, { "arqma", Coin::ARQMA }, - { "arq", Coin::ARQMA } + { "arq", Coin::ARQMA }, + { "dero", Coin::DERO }, }; @@ -67,6 +68,9 @@ xmrig::Algorithm::Id xmrig::Coin::algorithm(uint8_t blobVersion) const case ARQMA: return (blobVersion >= 15) ? Algorithm::RX_ARQ : Algorithm::CN_PICO_0; + case DERO: + return (blobVersion >= 4) ? Algorithm::ASTROBWT_DERO : Algorithm::CN_0; + case INVALID: break; } diff --git a/src/crypto/common/Coin.h b/src/crypto/common/Coin.h index 3df3784bb..ed9c7ce55 100644 --- a/src/crypto/common/Coin.h +++ b/src/crypto/common/Coin.h @@ -40,7 +40,8 @@ public: enum Id : int { INVALID = -1, MONERO, - ARQMA + ARQMA, + DERO };