AstroBWT algorithm (DERO) support

To test:

- Download https://github.com/deroproject/derosuite/releases/tag/AstroBWT
- Run daemon with `--testnet` in command line

In config.json:
- "coin":"dero"
- "url":"127.0.0.1:30306"
- "daemon:"true"
This commit is contained in:
SChernykh 2020-02-29 22:41:24 +01:00
parent 2cd45a9e38
commit 14ef99ca67
29 changed files with 2316 additions and 11 deletions

View file

@ -9,6 +9,7 @@ option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
option(WITH_CN_GPU "Enable CryptoNight-GPU algorithm" ON) option(WITH_CN_GPU "Enable CryptoNight-GPU algorithm" ON)
option(WITH_RANDOMX "Enable RandomX algorithms family" ON) option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON) option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON) option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_TLS "Enable OpenSSL support" ON)
@ -176,6 +177,7 @@ find_package(UV REQUIRED)
include(cmake/flags.cmake) include(cmake/flags.cmake)
include(cmake/randomx.cmake) include(cmake/randomx.cmake)
include(cmake/argon2.cmake) include(cmake/argon2.cmake)
include(cmake/astrobwt.cmake)
include(cmake/OpenSSL.cmake) include(cmake/OpenSSL.cmake)
include(cmake/asm.cmake) include(cmake/asm.cmake)
include(cmake/cn-gpu.cmake) include(cmake/cn-gpu.cmake)

36
cmake/astrobwt.cmake Normal file
View file

@ -0,0 +1,36 @@
if (WITH_ASTROBWT)
add_definitions(/DXMRIG_ALGO_ASTROBWT)
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/AstroBWT.h
src/crypto/astrobwt/sha3.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/AstroBWT.cpp
src/crypto/astrobwt/sha3.cpp
)
if (XMRIG_ARM)
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h
src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/salsa20_ref/salsa20.c
)
else()
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/Salsa20.hpp
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/Salsa20.cpp
)
endif()
else()
remove_definitions(/DXMRIG_ALGO_ASTROBWT)
endif()

View file

@ -165,6 +165,7 @@ void xmrig::CpuConfig::generate()
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, m_limit); count += xmrig::generate<Algorithm::CN_PICO>(m_threads, m_limit);
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit); count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit); count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
m_shouldSave = count > 0; m_shouldSave = count > 0;
} }

View file

@ -143,6 +143,14 @@ size_t inline generate<Algorithm::ARGON2>(Threads<CpuThreads> &threads, uint32_t
#endif #endif
#ifdef XMRIG_ALGO_ASTROBWT
template<>
size_t inline generate<Algorithm::ASTROBWT>(Threads<CpuThreads>& threads, uint32_t limit)
{
return generate("astrobwt", threads, Algorithm::ASTROBWT_DERO, limit);
}
#endif
} /* namespace xmrig */ } /* namespace xmrig */

View file

@ -44,6 +44,11 @@
#endif #endif
#ifdef XMRIG_ALGO_ASTROBWT
# include "crypto/astrobwt/AstroBWT.h"
#endif
namespace xmrig { namespace xmrig {
static constexpr uint32_t kReserveCount = 32768; static constexpr uint32_t kReserveCount = 32768;
@ -180,6 +185,12 @@ bool xmrig::CpuWorker<N>::selfTest()
} }
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
if (m_algorithm.family() == Algorithm::ASTROBWT) {
return verify(Algorithm::ASTROBWT_DERO, astrobwt_dero_test_out);
}
# endif
return false; return false;
} }

View file

@ -172,6 +172,17 @@ xmrig::CpuThreads xmrig::AdvancedCpuInfo::threads(const Algorithm &algorithm, ui
size_t cache = 0; size_t cache = 0;
size_t count = 0; size_t count = 0;
# ifdef XMRIG_ALGO_ASTROBWT
if (algorithm == Algorithm::ASTROBWT_DERO) {
CpuThreads t;
count = threads();
for (size_t i = 0; i < count; ++i) {
t.add(i, 0);
}
return t;
}
# endif
if (m_L3) { if (m_L3) {
cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3; cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3;
} }

View file

@ -258,5 +258,15 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
} }
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
if (algorithm.family() == Algorithm::ASTROBWT) {
CpuThreads threads;
for (size_t i = 0; i < count; ++i) {
threads.add(i, 0);
}
return threads;
}
# endif
return CpuThreads(std::max<size_t>(count / 2, 1), 1); return CpuThreads(std::max<size_t>(count / 2, 1), 1);
} }

View file

@ -216,6 +216,12 @@ bool xmrig::HwlocCpuInfo::membind(hwloc_const_bitmap_t nodeset)
xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const xmrig::CpuThreads xmrig::HwlocCpuInfo::threads(const Algorithm &algorithm, uint32_t limit) const
{ {
# ifdef XMRIG_ALGO_ASTROBWT
if (algorithm == Algorithm::ASTROBWT_DERO) {
return BasicCpuInfo::threads(algorithm, limit);
}
# endif
if (L2() == 0 && L3() == 0) { if (L2() == 0 && L3() == 0) {
return BasicCpuInfo::threads(algorithm, limit); return BasicCpuInfo::threads(algorithm, limit);
} }

View file

@ -24,6 +24,7 @@
#define ALGO_RX_SFX 23 #define ALGO_RX_SFX 23
#define ALGO_AR2_CHUKWA 24 #define ALGO_AR2_CHUKWA 24
#define ALGO_AR2_WRKZ 25 #define ALGO_AR2_WRKZ 25
#define ALGO_ASTROBWT_DERO 26
#define FAMILY_UNKNOWN 0 #define FAMILY_UNKNOWN 0
#define FAMILY_CN 1 #define FAMILY_CN 1
@ -32,3 +33,4 @@
#define FAMILY_CN_PICO 4 #define FAMILY_CN_PICO 4
#define FAMILY_RANDOM_X 5 #define FAMILY_RANDOM_X 5
#define FAMILY_ARGON2 6 #define FAMILY_ARGON2 6
#define FAMILY_ASTROBWT 7

View file

@ -140,6 +140,7 @@ void xmrig::HttpClient::handshake()
if (!body.empty()) { if (!body.empty()) {
headers.insert({ "Content-Length", std::to_string(body.size()) }); headers.insert({ "Content-Length", std::to_string(body.size()) });
headers.insert({ "Content-Type", "application/json" });
} }
std::stringstream ss; std::stringstream ss;

View file

@ -59,12 +59,14 @@ static const char *kHash = "hash";
static const char *kHeight = "height"; static const char *kHeight = "height";
static const char *kJsonRPC = "/json_rpc"; static const char *kJsonRPC = "/json_rpc";
static const size_t BlobReserveSize = 8;
} }
xmrig::DaemonClient::DaemonClient(int id, IClientListener *listener) : xmrig::DaemonClient::DaemonClient(int id, IClientListener *listener) :
BaseClient(id, listener), BaseClient(id, listener),
m_monero(true) m_apiVersion(API_MONERO)
{ {
m_httpListener = std::make_shared<HttpListener>(this); m_httpListener = std::make_shared<HttpListener>(this);
m_timer = new Timer(this); m_timer = new Timer(this);
@ -106,14 +108,21 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result)
# ifdef XMRIG_PROXY_PROJECT # ifdef XMRIG_PROXY_PROJECT
memcpy(m_blocktemplate.data() + 78, result.nonce, 8); memcpy(m_blocktemplate.data() + 78, result.nonce, 8);
# else # else
Buffer::toHex(reinterpret_cast<const uint8_t *>(&result.nonce), 4, m_blocktemplate.data() + 78); char* data = (m_apiVersion == API_DERO) ? m_blockhashingblob.data() : m_blocktemplate.data();
Buffer::toHex(reinterpret_cast<const uint8_t *>(&result.nonce), 4, data + 78);
# endif # endif
using namespace rapidjson; using namespace rapidjson;
Document doc(kObjectType); Document doc(kObjectType);
Value params(kArrayType); Value params(kArrayType);
params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator()); if (m_apiVersion == API_DERO) {
params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator());
params.PushBack(m_blockhashingblob.toJSON(), doc.GetAllocator());
}
else {
params.PushBack(m_blocktemplate.toJSON(), doc.GetAllocator());
}
JsonRequest::create(doc, m_sequence, "submitblock", params); JsonRequest::create(doc, m_sequence, "submitblock", params);
@ -131,6 +140,10 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result)
void xmrig::DaemonClient::connect() void xmrig::DaemonClient::connect()
{ {
if ((m_pool.algorithm() == Algorithm::ASTROBWT_DERO) || (m_pool.coin() == Coin::DERO)) {
m_apiVersion = API_DERO;
}
setState(ConnectingState); setState(ConnectingState);
getBlockTemplate(); getBlockTemplate();
} }
@ -172,7 +185,7 @@ void xmrig::DaemonClient::onHttpData(const HttpData &data)
if (data.method == HTTP_GET) { if (data.method == HTTP_GET) {
if (data.url == kGetHeight) { if (data.url == kGetHeight) {
if (!doc.HasMember(kHash)) { if (!doc.HasMember(kHash)) {
m_monero = false; m_apiVersion = API_CRYPTONOTE_DEFAULT;
return send(HTTP_GET, kGetInfo); return send(HTTP_GET, kGetInfo);
} }
@ -200,7 +213,21 @@ void xmrig::DaemonClient::onTimer(const Timer *)
getBlockTemplate(); getBlockTemplate();
} }
else if (m_state == ConnectedState) { else if (m_state == ConnectedState) {
send(HTTP_GET, m_monero ? kGetHeight : kGetInfo); if (m_apiVersion == API_DERO) {
using namespace rapidjson;
Document doc(kObjectType);
auto& allocator = doc.GetAllocator();
doc.AddMember("id", m_sequence, allocator);
doc.AddMember("jsonrpc", "2.0", allocator);
doc.AddMember("method", "get_info", allocator);
send(HTTP_POST, kJsonRPC, doc);
++m_sequence;
}
else {
send(HTTP_GET, (m_apiVersion == API_MONERO) ? kGetHeight : kGetInfo);
}
} }
} }
@ -216,7 +243,14 @@ bool xmrig::DaemonClient::parseJob(const rapidjson::Value &params, int *code)
Job job(false, m_pool.algorithm(), String()); Job job(false, m_pool.algorithm(), String());
String blocktemplate = Json::getString(params, kBlocktemplateBlob); String blocktemplate = Json::getString(params, kBlocktemplateBlob);
if (blocktemplate.isNull() || !job.setBlob(Json::getString(params, "blockhashing_blob"))) {
m_blockhashingblob = Json::getString(params, "blockhashing_blob");
if (m_apiVersion == API_DERO) {
const uint64_t offset = Json::getUint64(params, "reserved_offset");
Buffer::toHex(Buffer::randomBytes(BlobReserveSize).data(), BlobReserveSize, m_blockhashingblob.data() + offset * 2);
}
if (blocktemplate.isNull() || !job.setBlob(m_blockhashingblob)) {
*code = 4; *code = 4;
return false; return false;
} }
@ -263,6 +297,13 @@ bool xmrig::DaemonClient::parseResponse(int64_t id, const rapidjson::Value &resu
return false; return false;
} }
if (result.HasMember("top_block_hash")) {
if (m_prevHash != Json::getString(result, "top_block_hash")) {
getBlockTemplate();
}
return true;
}
int code = -1; int code = -1;
if (result.HasMember(kBlocktemplateBlob) && parseJob(result, &code)) { if (result.HasMember(kBlocktemplateBlob) && parseJob(result, &code)) {
return true; return true;
@ -286,7 +327,12 @@ int64_t xmrig::DaemonClient::getBlockTemplate()
Value params(kObjectType); Value params(kObjectType);
params.AddMember("wallet_address", m_user.toJSON(), allocator); params.AddMember("wallet_address", m_user.toJSON(), allocator);
params.AddMember("extra_nonce", Buffer::randomBytes(8).toHex().toJSON(doc), allocator); if (m_apiVersion == API_DERO) {
params.AddMember("reserve_size", BlobReserveSize, allocator);
}
else {
params.AddMember("extra_nonce", Buffer::randomBytes(BlobReserveSize).toHex().toJSON(doc), allocator);
}
JsonRequest::create(doc, m_sequence, "getblocktemplate", params); JsonRequest::create(doc, m_sequence, "getblocktemplate", params);

View file

@ -76,9 +76,15 @@ private:
void send(int method, const char *url, const rapidjson::Document &doc); void send(int method, const char *url, const rapidjson::Document &doc);
void setState(SocketState state); void setState(SocketState state);
bool m_monero; enum {
API_CRYPTONOTE_DEFAULT,
API_MONERO,
API_DERO,
} m_apiVersion;
std::shared_ptr<IHttpListener> m_httpListener; std::shared_ptr<IHttpListener> m_httpListener;
String m_blocktemplate; String m_blocktemplate;
String m_blockhashingblob;
String m_prevHash; String m_prevHash;
String m_tlsFingerprint; String m_tlsFingerprint;
String m_tlsVersion; String m_tlsVersion;

View file

@ -0,0 +1,207 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 tevador <tevador@gmail.com>
* Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
* Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "AstroBWT.h"
#include "sha3.h"
#include "crypto/cn/CryptoNight.h"
constexpr int STAGE1_SIZE = 147253;
constexpr int ALLOCATION_SIZE = (STAGE1_SIZE + 1048576) + (128 - (STAGE1_SIZE & 63));
constexpr int COUNTING_SORT_BITS = 10;
constexpr int COUNTING_SORT_SIZE = 1 << COUNTING_SORT_BITS;
#ifdef _MSC_VER
#include <stdlib.h>
#define bswap_64(x) _byteswap_uint64(x)
#elif defined __GNUC__
#define bswap_64(x) __builtin_bswap64(x)
#else
#include <byteswap.h>
#endif
#ifdef XMRIG_ARM
extern "C" {
#include "salsa20_ref/ecrypt-sync.h"
}
static void Salsa20_XORKeyStream(const void* key, void* output, size_t size)
{
uint8_t iv[8] = {};
ECRYPT_ctx ctx;
ECRYPT_keysetup(&ctx, static_cast<const uint8_t*>(key), 256, 64);
ECRYPT_ivsetup(&ctx, iv);
ECRYPT_keystream_bytes(&ctx, static_cast<uint8_t*>(output), size);
memset(static_cast<uint8_t*>(output) + size, 0, 16);
}
#else
#include "Salsa20.hpp"
static void Salsa20_XORKeyStream(const void* key, void* output, size_t size)
{
const uint64_t iv = 0;
ZeroTier::Salsa20 s(key, &iv);
s.XORKeyStream(output, size);
memset(static_cast<uint8_t*>(output) + size, 0, 16);
}
#endif
void sort_indices(int N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices)
{
uint32_t counters[2][COUNTING_SORT_SIZE] = {};
for (int i = 0; i < N; ++i)
{
const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i));
++counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)];
++counters[1][k >> (64 - COUNTING_SORT_BITS)];
}
uint32_t prev[2] = { counters[0][0], counters[1][0] };
counters[0][0] = prev[0] - 1;
counters[1][0] = prev[1] - 1;
for (int i = 1; i < COUNTING_SORT_SIZE; ++i)
{
const uint32_t cur[2] = { counters[0][i] + prev[0], counters[1][i] + prev[1] };
counters[0][i] = cur[0] - 1;
counters[1][i] = cur[1] - 1;
prev[0] = cur[0];
prev[1] = cur[1];
}
for (int i = N - 1; i >= 0; --i)
{
const uint64_t k = bswap_64(*reinterpret_cast<const uint64_t*>(v + i));
tmp_indices[counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]--] = (k & (static_cast<uint64_t>(-1) << 21)) | i;
}
for (int i = N - 1; i >= 0; --i)
{
const uint64_t data = tmp_indices[i];
indices[counters[1][data >> (64 - COUNTING_SORT_BITS)]--] = data;
}
auto smaller = [v](uint64_t a, uint64_t b)
{
const uint64_t value_a = a >> 21;
const uint64_t value_b = b >> 21;
if (value_a < value_b)
return true;
if (value_a > value_b)
return false;
const uint64_t data_a = bswap_64(*reinterpret_cast<const uint64_t*>(v + (a % (1 << 21)) + 5));
const uint64_t data_b = bswap_64(*reinterpret_cast<const uint64_t*>(v + (b % (1 << 21)) + 5));
return (data_a < data_b);
};
uint64_t prev_t = indices[0];
for (int i = 1; i < N; ++i)
{
uint64_t t = indices[i];
if (smaller(t, prev_t))
{
const uint64_t t2 = prev_t;
int j = i - 1;
do
{
indices[j + 1] = prev_t;
--j;
if (j < 0)
break;
prev_t = indices[j];
} while (smaller(t, prev_t));
indices[j + 1] = t;
t = t2;
}
prev_t = t;
}
}
void astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash)
{
uint8_t key[32];
uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64;
uint8_t* stage1_output = scratchpad_ptr;
uint8_t* stage2_output = scratchpad_ptr;
uint64_t* indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE);
uint64_t* tmp_indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE * 9);
uint8_t* stage1_result = (uint8_t*)(tmp_indices);
uint8_t* stage2_result = (uint8_t*)(tmp_indices);
sha3_HashBuffer(256, SHA3_FLAGS_NONE, input_data, input_size, key, sizeof(key));
Salsa20_XORKeyStream(key, stage1_output, STAGE1_SIZE);
sort_indices(STAGE1_SIZE + 1, stage1_output, indices, tmp_indices);
{
const uint8_t* tmp = stage1_output - 1;
for (int i = 0; i <= STAGE1_SIZE; ++i)
stage1_result[i] = tmp[indices[i] & ((1 << 21) - 1)];
}
sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage1_result, STAGE1_SIZE + 1, key, sizeof(key));
const int stage2_size = STAGE1_SIZE + (*(uint32_t*)(key) & 0xfffff);
Salsa20_XORKeyStream(key, stage2_output, stage2_size);
sort_indices(stage2_size + 1, stage2_output, indices, tmp_indices);
{
const uint8_t* tmp = stage2_output - 1;
int i = 0;
const int n = ((stage2_size + 1) / 4) * 4;
for (; i < n; i += 4)
{
stage2_result[i + 0] = tmp[indices[i + 0] & ((1 << 21) - 1)];
stage2_result[i + 1] = tmp[indices[i + 1] & ((1 << 21) - 1)];
stage2_result[i + 2] = tmp[indices[i + 2] & ((1 << 21) - 1)];
stage2_result[i + 3] = tmp[indices[i + 3] & ((1 << 21) - 1)];
}
for (; i <= stage2_size; ++i)
stage2_result[i] = tmp[indices[i] & ((1 << 21) - 1)];
}
sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage2_result, stage2_size + 1, output_hash, 32);
}
template<>
void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t)
{
astrobwt_dero(input, static_cast<uint32_t>(size), ctx[0]->memory, output);
}

View file

@ -0,0 +1,45 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 tevador <tevador@gmail.com>
* Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
* Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "crypto/common/Algorithm.h"
struct cryptonight_ctx;
namespace xmrig { namespace astrobwt {
template<Algorithm::Id ALGO>
void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
template<>
void single_hash<Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
}} // namespace xmrig::argon2

View file

@ -0,0 +1,352 @@
/*
* Based on public domain code available at: http://cr.yp.to/snuffle.html
*
* Modifications and C-native SSE macro based SSE implementation by
* Adam Ierymenko <adam.ierymenko@zerotier.com>.
*
* Additional modifications and code cleanup for AstroBWT by
* SChernykh <https://github.com/SChernykh>
*
* Since the original was public domain, this is too.
*/
#include "Salsa20.hpp"
// Statically compute and define SSE constants
class _s20sseconsts
{
public:
_s20sseconsts()
{
maskLo32 = _mm_shuffle_epi32(_mm_cvtsi32_si128(-1), _MM_SHUFFLE(1, 0, 1, 0));
maskHi32 = _mm_slli_epi64(maskLo32, 32);
}
__m128i maskLo32,maskHi32;
};
static const _s20sseconsts _S20SSECONSTANTS;
namespace ZeroTier {
void Salsa20::init(const void *key,const void *iv)
{
const uint32_t *const k = (const uint32_t *)key;
_state.i[0] = 0x61707865;
_state.i[1] = 0x3320646e;
_state.i[2] = 0x79622d32;
_state.i[3] = 0x6b206574;
_state.i[4] = k[3];
_state.i[5] = 0;
_state.i[6] = k[7];
_state.i[7] = k[2];
_state.i[8] = 0;
_state.i[9] = k[6];
_state.i[10] = k[1];
_state.i[11] = ((const uint32_t *)iv)[1];
_state.i[12] = k[5];
_state.i[13] = k[0];
_state.i[14] = ((const uint32_t *)iv)[0];
_state.i[15] = k[4];
}
void Salsa20::XORKeyStream(void *out,unsigned int bytes)
{
uint8_t tmp[64];
uint8_t *c = (uint8_t *)out;
uint8_t *ctarget = c;
unsigned int i;
if (!bytes)
return;
for (;;) {
if (bytes < 64) {
for (i = 0;i < bytes;++i)
tmp[i] = 0;
ctarget = c;
c = tmp;
}
__m128i X0 = _mm_loadu_si128((const __m128i *)&(_state.v[0]));
__m128i X1 = _mm_loadu_si128((const __m128i *)&(_state.v[1]));
__m128i X2 = _mm_loadu_si128((const __m128i *)&(_state.v[2]));
__m128i X3 = _mm_loadu_si128((const __m128i *)&(_state.v[3]));
__m128i T;
__m128i X0s = X0;
__m128i X1s = X1;
__m128i X2s = X2;
__m128i X3s = X3;
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
// 2X round -------------------------------------------------------------
T = _mm_add_epi32(X0, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X1, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X3, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x93);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x39);
T = _mm_add_epi32(X0, X1);
X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
T = _mm_add_epi32(X3, X0);
X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
T = _mm_add_epi32(X2, X3);
X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
T = _mm_add_epi32(X1, X2);
X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
X1 = _mm_shuffle_epi32(X1, 0x39);
X2 = _mm_shuffle_epi32(X2, 0x4E);
X3 = _mm_shuffle_epi32(X3, 0x93);
X0 = _mm_add_epi32(X0s,X0);
X1 = _mm_add_epi32(X1s,X1);
X2 = _mm_add_epi32(X2s,X2);
X3 = _mm_add_epi32(X3s,X3);
__m128i k02 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32)), _MM_SHUFFLE(0, 1, 2, 3));
__m128i k13 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32)), _MM_SHUFFLE(0, 1, 2, 3));
__m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32));
__m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32));
_mm_storeu_ps(reinterpret_cast<float *>(c),_mm_castsi128_ps(_mm_unpackhi_epi64(k02,k20)));
_mm_storeu_ps(reinterpret_cast<float *>(c) + 4,_mm_castsi128_ps(_mm_unpackhi_epi64(k13,k31)));
_mm_storeu_ps(reinterpret_cast<float *>(c) + 8,_mm_castsi128_ps(_mm_unpacklo_epi64(k20,k02)));
_mm_storeu_ps(reinterpret_cast<float *>(c) + 12,_mm_castsi128_ps(_mm_unpacklo_epi64(k31,k13)));
if (!(++_state.i[8])) {
++_state.i[5]; // state reordered for SSE
/* stopping at 2^70 bytes per nonce is user's responsibility */
}
if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i)
ctarget[i] = c[i];
}
return;
}
bytes -= 64;
c += 64;
}
}
} // namespace ZeroTier

View file

@ -0,0 +1,52 @@
/*
* Based on public domain code available at: http://cr.yp.to/snuffle.html
*
* This therefore is public domain.
*/
#ifndef ZT_SALSA20_HPP
#define ZT_SALSA20_HPP
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <emmintrin.h>
namespace ZeroTier {
/**
* Salsa20 stream cipher
*/
class Salsa20
{
public:
/**
* @param key 256-bit (32 byte) key
* @param iv 64-bit initialization vector
*/
Salsa20(const void *key,const void *iv)
{
init(key,iv);
}
/**
* Initialize cipher
*
* @param key Key bits
* @param iv 64-bit initialization vector
*/
void init(const void *key,const void *iv);
void XORKeyStream(void *out,unsigned int bytes);
private:
union {
__m128i v[4];
uint32_t i[16];
} _state;
};
} // namespace ZeroTier
#endif

View file

@ -0,0 +1,272 @@
/* ecrypt-config.h */
/* *** Normally, it should not be necessary to edit this file. *** */
#ifndef ECRYPT_CONFIG
#define ECRYPT_CONFIG
/* ------------------------------------------------------------------------- */
/* Guess the endianness of the target architecture. */
/*
* The LITTLE endian machines:
*/
#if defined(__ultrix) /* Older MIPS */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__alpha) /* Alpha */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(i386) /* x86 (gcc) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__i386) /* x86 (gcc) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(_M_IX86) /* x86 (MSC, Borland) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(_MSC_VER) /* x86 (surely MSC) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
#define ECRYPT_LITTLE_ENDIAN
/*
* The BIG endian machines:
*/
#elif defined(sun) /* Newer Sparc's */
#define ECRYPT_BIG_ENDIAN
#elif defined(__ppc__) /* PowerPC */
#define ECRYPT_BIG_ENDIAN
/*
* Finally machines with UNKNOWN endianness:
*/
#elif defined (_AIX) /* RS6000 */
#define ECRYPT_UNKNOWN
#elif defined(__hpux) /* HP-PA */
#define ECRYPT_UNKNOWN
#elif defined(__aux) /* 68K */
#define ECRYPT_UNKNOWN
#elif defined(__dgux) /* 88K (but P6 in latest boxes) */
#define ECRYPT_UNKNOWN
#elif defined(__sgi) /* Newer MIPS */
#define ECRYPT_UNKNOWN
#else /* Any other processor */
#define ECRYPT_UNKNOWN
#endif
/* ------------------------------------------------------------------------- */
/*
* Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit
* integers.
*
* Note: to enable 64-bit types on 32-bit compilers, it might be
* necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc
* -std=c99).
*/
#include <limits.h>
/* --- check char --- */
#if (UCHAR_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T char
#define U8C(v) (v##U)
#if (UCHAR_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (UCHAR_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T char
#define U16C(v) (v##U)
#endif
#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T char
#define U32C(v) (v##U)
#endif
#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T char
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check short --- */
#if (USHRT_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T short
#define U8C(v) (v##U)
#if (USHRT_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (USHRT_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T short
#define U16C(v) (v##U)
#endif
#if (USHRT_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T short
#define U32C(v) (v##U)
#endif
#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T short
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check int --- */
#if (UINT_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T int
#define U8C(v) (v##U)
#if (ULONG_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (UINT_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T int
#define U16C(v) (v##U)
#endif
#if (UINT_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T int
#define U32C(v) (v##U)
#endif
#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T int
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check long --- */
#if (ULONG_MAX / 0xFUL > 0xFUL)
#ifndef I8T
#define I8T long
#define U8C(v) (v##UL)
#if (ULONG_MAX == 0xFFUL)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (ULONG_MAX / 0xFFUL > 0xFFUL)
#ifndef I16T
#define I16T long
#define U16C(v) (v##UL)
#endif
#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL)
#ifndef I32T
#define I32T long
#define U32C(v) (v##UL)
#endif
#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL)
#ifndef I64T
#define I64T long
#define U64C(v) (v##UL)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check long long --- */
#ifdef ULLONG_MAX
#if (ULLONG_MAX / 0xFULL > 0xFULL)
#ifndef I8T
#define I8T long long
#define U8C(v) (v##ULL)
#if (ULLONG_MAX == 0xFFULL)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (ULLONG_MAX / 0xFFULL > 0xFFULL)
#ifndef I16T
#define I16T long long
#define U16C(v) (v##ULL)
#endif
#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL)
#ifndef I32T
#define I32T long long
#define U32C(v) (v##ULL)
#endif
#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL)
#ifndef I64T
#define I64T long long
#define U64C(v) (v##ULL)
#endif
#endif
#endif
#endif
#endif
#endif
/* --- check __int64 --- */
#ifdef _UI64_MAX
#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64)
#ifndef I64T
#define I64T __int64
#define U64C(v) (v##ui64)
#endif
#endif
#endif
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,46 @@
/* ecrypt-machine.h */
/*
* This file is included by 'ecrypt-portable.h'. It allows to override
* the default macros for specific platforms. Please carefully check
* the machine code generated by your compiler (with optimisations
* turned on) before deciding to edit this file.
*/
/* ------------------------------------------------------------------------- */
#if (defined(ECRYPT_DEFAULT_ROT) && !defined(ECRYPT_MACHINE_ROT))
#define ECRYPT_MACHINE_ROT
#if (defined(WIN32) && defined(_MSC_VER))
#undef ROTL32
#undef ROTR32
#undef ROTL64
#undef ROTR64
#include <stdlib.h>
#define ROTL32(v, n) _lrotl(v, n)
#define ROTR32(v, n) _lrotr(v, n)
#define ROTL64(v, n) _rotl64(v, n)
#define ROTR64(v, n) _rotr64(v, n)
#endif
#endif
/* ------------------------------------------------------------------------- */
#if (defined(ECRYPT_DEFAULT_SWAP) && !defined(ECRYPT_MACHINE_SWAP))
#define ECRYPT_MACHINE_SWAP
/*
* If you want to overwrite the default swap macros, put it here. And so on.
*/
#endif
/* ------------------------------------------------------------------------- */

View file

@ -0,0 +1,303 @@
/* ecrypt-portable.h */
/*
* WARNING: the conversions defined below are implemented as macros,
* and should be used carefully. They should NOT be used with
* parameters which perform some action. E.g., the following two lines
* are not equivalent:
*
* 1) ++x; y = ROTL32(x, n);
* 2) y = ROTL32(++x, n);
*/
/*
* *** Please do not edit this file. ***
*
* The default macros can be overridden for specific architectures by
* editing 'ecrypt-machine.h'.
*/
#ifndef ECRYPT_PORTABLE
#define ECRYPT_PORTABLE
#include "ecrypt-config.h"
/* ------------------------------------------------------------------------- */
/*
* The following types are defined (if available):
*
* u8: unsigned integer type, at least 8 bits
* u16: unsigned integer type, at least 16 bits
* u32: unsigned integer type, at least 32 bits
* u64: unsigned integer type, at least 64 bits
*
* s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64
*
* The selection of minimum-width integer types is taken care of by
* 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit
* compilers, it might be necessary to switch from ISO C90 mode to ISO
* C99 mode (e.g., gcc -std=c99).
*/
#ifdef I8T
typedef signed I8T s8;
typedef unsigned I8T u8;
#endif
#ifdef I16T
typedef signed I16T s16;
typedef unsigned I16T u16;
#endif
#ifdef I32T
typedef signed I32T s32;
typedef unsigned I32T u32;
#endif
#ifdef I64T
typedef signed I64T s64;
typedef unsigned I64T u64;
#endif
/*
* The following macros are used to obtain exact-width results.
*/
#define U8V(v) ((u8)(v) & U8C(0xFF))
#define U16V(v) ((u16)(v) & U16C(0xFFFF))
#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
#define U64V(v) ((u64)(v) & U64C(0xFFFFFFFFFFFFFFFF))
/* ------------------------------------------------------------------------- */
/*
* The following macros return words with their bits rotated over n
* positions to the left/right.
*/
#define ECRYPT_DEFAULT_ROT
#define ROTL8(v, n) \
(U8V((v) << (n)) | ((v) >> (8 - (n))))
#define ROTL16(v, n) \
(U16V((v) << (n)) | ((v) >> (16 - (n))))
#define ROTL32(v, n) \
(U32V((v) << (n)) | ((v) >> (32 - (n))))
#define ROTL64(v, n) \
(U64V((v) << (n)) | ((v) >> (64 - (n))))
#define ROTR8(v, n) ROTL8(v, 8 - (n))
#define ROTR16(v, n) ROTL16(v, 16 - (n))
#define ROTR32(v, n) ROTL32(v, 32 - (n))
#define ROTR64(v, n) ROTL64(v, 64 - (n))
#include "ecrypt-machine.h"
/* ------------------------------------------------------------------------- */
/*
* The following macros return a word with bytes in reverse order.
*/
#define ECRYPT_DEFAULT_SWAP
#define SWAP16(v) \
ROTL16(v, 8)
#define SWAP32(v) \
((ROTL32(v, 8) & U32C(0x00FF00FF)) | \
(ROTL32(v, 24) & U32C(0xFF00FF00)))
#ifdef ECRYPT_NATIVE64
#define SWAP64(v) \
((ROTL64(v, 8) & U64C(0x000000FF000000FF)) | \
(ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \
(ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \
(ROTL64(v, 56) & U64C(0xFF000000FF000000)))
#else
#define SWAP64(v) \
(((u64)SWAP32(U32V(v)) << 32) | (u64)SWAP32(U32V(v >> 32)))
#endif
#include "ecrypt-machine.h"
#define ECRYPT_DEFAULT_WTOW
#ifdef ECRYPT_LITTLE_ENDIAN
#define U16TO16_LITTLE(v) (v)
#define U32TO32_LITTLE(v) (v)
#define U64TO64_LITTLE(v) (v)
#define U16TO16_BIG(v) SWAP16(v)
#define U32TO32_BIG(v) SWAP32(v)
#define U64TO64_BIG(v) SWAP64(v)
#endif
#ifdef ECRYPT_BIG_ENDIAN
#define U16TO16_LITTLE(v) SWAP16(v)
#define U32TO32_LITTLE(v) SWAP32(v)
#define U64TO64_LITTLE(v) SWAP64(v)
#define U16TO16_BIG(v) (v)
#define U32TO32_BIG(v) (v)
#define U64TO64_BIG(v) (v)
#endif
#include "ecrypt-machine.h"
/*
* The following macros load words from an array of bytes with
* different types of endianness, and vice versa.
*/
#define ECRYPT_DEFAULT_BTOW
#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE))
#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16*)(p))[0])
#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32*)(p))[0])
#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64*)(p))[0])
#define U8TO16_BIG(p) U16TO16_BIG(((u16*)(p))[0])
#define U8TO32_BIG(p) U32TO32_BIG(((u32*)(p))[0])
#define U8TO64_BIG(p) U64TO64_BIG(((u64*)(p))[0])
#define U16TO8_LITTLE(p, v) (((u16*)(p))[0] = U16TO16_LITTLE(v))
#define U32TO8_LITTLE(p, v) (((u32*)(p))[0] = U32TO32_LITTLE(v))
#define U64TO8_LITTLE(p, v) (((u64*)(p))[0] = U64TO64_LITTLE(v))
#define U16TO8_BIG(p, v) (((u16*)(p))[0] = U16TO16_BIG(v))
#define U32TO8_BIG(p, v) (((u32*)(p))[0] = U32TO32_BIG(v))
#define U64TO8_BIG(p, v) (((u64*)(p))[0] = U64TO64_BIG(v))
#else
#define U8TO16_LITTLE(p) \
(((u16)((p)[0]) ) | \
((u16)((p)[1]) << 8))
#define U8TO32_LITTLE(p) \
(((u32)((p)[0]) ) | \
((u32)((p)[1]) << 8) | \
((u32)((p)[2]) << 16) | \
((u32)((p)[3]) << 24))
#ifdef ECRYPT_NATIVE64
#define U8TO64_LITTLE(p) \
(((u64)((p)[0]) ) | \
((u64)((p)[1]) << 8) | \
((u64)((p)[2]) << 16) | \
((u64)((p)[3]) << 24) | \
((u64)((p)[4]) << 32) | \
((u64)((p)[5]) << 40) | \
((u64)((p)[6]) << 48) | \
((u64)((p)[7]) << 56))
#else
#define U8TO64_LITTLE(p) \
((u64)U8TO32_LITTLE(p) | ((u64)U8TO32_LITTLE((p) + 4) << 32))
#endif
#define U8TO16_BIG(p) \
(((u16)((p)[0]) << 8) | \
((u16)((p)[1]) ))
#define U8TO32_BIG(p) \
(((u32)((p)[0]) << 24) | \
((u32)((p)[1]) << 16) | \
((u32)((p)[2]) << 8) | \
((u32)((p)[3]) ))
#ifdef ECRYPT_NATIVE64
#define U8TO64_BIG(p) \
(((u64)((p)[0]) << 56) | \
((u64)((p)[1]) << 48) | \
((u64)((p)[2]) << 40) | \
((u64)((p)[3]) << 32) | \
((u64)((p)[4]) << 24) | \
((u64)((p)[5]) << 16) | \
((u64)((p)[6]) << 8) | \
((u64)((p)[7]) ))
#else
#define U8TO64_BIG(p) \
(((u64)U8TO32_BIG(p) << 32) | (u64)U8TO32_BIG((p) + 4))
#endif
#define U16TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
} while (0)
#define U32TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
(p)[2] = U8V((v) >> 16); \
(p)[3] = U8V((v) >> 24); \
} while (0)
#ifdef ECRYPT_NATIVE64
#define U64TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
(p)[2] = U8V((v) >> 16); \
(p)[3] = U8V((v) >> 24); \
(p)[4] = U8V((v) >> 32); \
(p)[5] = U8V((v) >> 40); \
(p)[6] = U8V((v) >> 48); \
(p)[7] = U8V((v) >> 56); \
} while (0)
#else
#define U64TO8_LITTLE(p, v) \
do { \
U32TO8_LITTLE((p), U32V((v) )); \
U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \
} while (0)
#endif
#define U16TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
} while (0)
#define U32TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) >> 24); \
(p)[1] = U8V((v) >> 16); \
(p)[2] = U8V((v) >> 8); \
(p)[3] = U8V((v) ); \
} while (0)
#ifdef ECRYPT_NATIVE64
#define U64TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) >> 56); \
(p)[1] = U8V((v) >> 48); \
(p)[2] = U8V((v) >> 40); \
(p)[3] = U8V((v) >> 32); \
(p)[4] = U8V((v) >> 24); \
(p)[5] = U8V((v) >> 16); \
(p)[6] = U8V((v) >> 8); \
(p)[7] = U8V((v) ); \
} while (0)
#else
#define U64TO8_BIG(p, v) \
do { \
U32TO8_BIG((p), U32V((v) >> 32)); \
U32TO8_BIG((p) + 4, U32V((v) )); \
} while (0)
#endif
#endif
#include "ecrypt-machine.h"
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,279 @@
/* ecrypt-sync.h */
/*
* Header file for synchronous stream ciphers without authentication
* mechanism.
*
* *** Please only edit parts marked with "[edit]". ***
*/
#ifndef ECRYPT_SYNC
#define ECRYPT_SYNC
#include "ecrypt-portable.h"
/* ------------------------------------------------------------------------- */
/* Cipher parameters */
/*
* The name of your cipher.
*/
#define ECRYPT_NAME "Salsa20" /* [edit] */
#define ECRYPT_PROFILE "S!_H."
/*
* Specify which key and IV sizes are supported by your cipher. A user
* should be able to enumerate the supported sizes by running the
* following code:
*
* for (i = 0; ECRYPT_KEYSIZE(i) <= ECRYPT_MAXKEYSIZE; ++i)
* {
* keysize = ECRYPT_KEYSIZE(i);
*
* ...
* }
*
* All sizes are in bits.
*/
#define ECRYPT_MAXKEYSIZE 256 /* [edit] */
#define ECRYPT_KEYSIZE(i) (128 + (i)*128) /* [edit] */
#define ECRYPT_MAXIVSIZE 64 /* [edit] */
#define ECRYPT_IVSIZE(i) (64 + (i)*64) /* [edit] */
/* ------------------------------------------------------------------------- */
/* Data structures */
/*
* ECRYPT_ctx is the structure containing the representation of the
* internal state of your cipher.
*/
typedef struct
{
u32 input[16]; /* could be compressed */
/*
* [edit]
*
* Put here all state variable needed during the encryption process.
*/
} ECRYPT_ctx;
/* ------------------------------------------------------------------------- */
/* Mandatory functions */
/*
* Key and message independent initialization. This function will be
* called once when the program starts (e.g., to build expanded S-box
* tables).
*/
void ECRYPT_init();
/*
* Key setup. It is the user's responsibility to select the values of
* keysize and ivsize from the set of supported values specified
* above.
*/
void ECRYPT_keysetup(
ECRYPT_ctx* ctx,
const u8* key,
u32 keysize, /* Key size in bits. */
u32 ivsize); /* IV size in bits. */
/*
* IV setup. After having called ECRYPT_keysetup(), the user is
* allowed to call ECRYPT_ivsetup() different times in order to
* encrypt/decrypt different messages with the same key but different
* IV's.
*/
void ECRYPT_ivsetup(
ECRYPT_ctx* ctx,
const u8* iv);
/*
* Encryption/decryption of arbitrary length messages.
*
* For efficiency reasons, the API provides two types of
* encrypt/decrypt functions. The ECRYPT_encrypt_bytes() function
* (declared here) encrypts byte strings of arbitrary length, while
* the ECRYPT_encrypt_blocks() function (defined later) only accepts
* lengths which are multiples of ECRYPT_BLOCKLENGTH.
*
* The user is allowed to make multiple calls to
* ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
* but he is NOT allowed to make additional encryption calls once he
* has called ECRYPT_encrypt_bytes() (unless he starts a new message
* of course). For example, this sequence of calls is acceptable:
*
* ECRYPT_keysetup();
*
* ECRYPT_ivsetup();
* ECRYPT_encrypt_blocks();
* ECRYPT_encrypt_blocks();
* ECRYPT_encrypt_bytes();
*
* ECRYPT_ivsetup();
* ECRYPT_encrypt_blocks();
* ECRYPT_encrypt_blocks();
*
* ECRYPT_ivsetup();
* ECRYPT_encrypt_bytes();
*
* The following sequence is not:
*
* ECRYPT_keysetup();
* ECRYPT_ivsetup();
* ECRYPT_encrypt_blocks();
* ECRYPT_encrypt_bytes();
* ECRYPT_encrypt_blocks();
*/
void ECRYPT_encrypt_bytes(
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 msglen); /* Message length in bytes. */
void ECRYPT_decrypt_bytes(
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 msglen); /* Message length in bytes. */
/* ------------------------------------------------------------------------- */
/* Optional features */
/*
* For testing purposes it can sometimes be useful to have a function
* which immediately generates keystream without having to provide it
* with a zero plaintext. If your cipher cannot provide this function
* (e.g., because it is not strictly a synchronous cipher), please
* reset the ECRYPT_GENERATES_KEYSTREAM flag.
*/
#define ECRYPT_GENERATES_KEYSTREAM
#ifdef ECRYPT_GENERATES_KEYSTREAM
void ECRYPT_keystream_bytes(
ECRYPT_ctx* ctx,
u8* keystream,
u32 length); /* Length of keystream in bytes. */
#endif
/* ------------------------------------------------------------------------- */
/* Optional optimizations */
/*
* By default, the functions in this section are implemented using
* calls to functions declared above. However, you might want to
* implement them differently for performance reasons.
*/
/*
* All-in-one encryption/decryption of (short) packets.
*
* The default definitions of these functions can be found in
* "ecrypt-sync.c". If you want to implement them differently, please
* undef the ECRYPT_USES_DEFAULT_ALL_IN_ONE flag.
*/
#define ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */
void ECRYPT_encrypt_packet(
ECRYPT_ctx* ctx,
const u8* iv,
const u8* plaintext,
u8* ciphertext,
u32 msglen);
void ECRYPT_decrypt_packet(
ECRYPT_ctx* ctx,
const u8* iv,
const u8* ciphertext,
u8* plaintext,
u32 msglen);
/*
* Encryption/decryption of blocks.
*
* By default, these functions are defined as macros. If you want to
* provide a different implementation, please undef the
* ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
* declared below.
*/
#define ECRYPT_BLOCKLENGTH 64 /* [edit] */
#define ECRYPT_USES_DEFAULT_BLOCK_MACROS /* [edit] */
#ifdef ECRYPT_USES_DEFAULT_BLOCK_MACROS
#define ECRYPT_encrypt_blocks(ctx, plaintext, ciphertext, blocks) \
ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, \
(blocks) * ECRYPT_BLOCKLENGTH)
#define ECRYPT_decrypt_blocks(ctx, ciphertext, plaintext, blocks) \
ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext, \
(blocks) * ECRYPT_BLOCKLENGTH)
#ifdef ECRYPT_GENERATES_KEYSTREAM
#define ECRYPT_keystream_blocks(ctx, keystream, blocks) \
ECRYPT_keystream_bytes(ctx, keystream, \
(blocks) * ECRYPT_BLOCKLENGTH)
#endif
#else
void ECRYPT_encrypt_blocks(
ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 blocks); /* Message length in blocks. */
void ECRYPT_decrypt_blocks(
ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 blocks); /* Message length in blocks. */
#ifdef ECRYPT_GENERATES_KEYSTREAM
void ECRYPT_keystream_blocks(
ECRYPT_ctx* ctx,
const u8* keystream,
u32 blocks); /* Keystream length in blocks. */
#endif
#endif
/*
* If your cipher can be implemented in different ways, you can use
* the ECRYPT_VARIANT parameter to allow the user to choose between
* them at compile time (e.g., gcc -DECRYPT_VARIANT=3 ...). Please
* only use this possibility if you really think it could make a
* significant difference and keep the number of variants
* (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
* 10). Note also that all variants should have exactly the same
* external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
*/
#define ECRYPT_MAXVARIANT 1 /* [edit] */
#ifndef ECRYPT_VARIANT
#define ECRYPT_VARIANT 1
#endif
#if (ECRYPT_VARIANT > ECRYPT_MAXVARIANT)
#error this variant does not exist
#endif
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,219 @@
/*
salsa20-merged.c version 20051118
D. J. Bernstein
Public domain.
*/
#include "ecrypt-sync.h"
#define ROTATE(v,c) (ROTL32(v,c))
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v),1))
void ECRYPT_init(void)
{
return;
}
static const char sigma[16] = "expand 32-byte k";
static const char tau[16] = "expand 16-byte k";
void ECRYPT_keysetup(ECRYPT_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
{
const char *constants;
x->input[1] = U8TO32_LITTLE(k + 0);
x->input[2] = U8TO32_LITTLE(k + 4);
x->input[3] = U8TO32_LITTLE(k + 8);
x->input[4] = U8TO32_LITTLE(k + 12);
if (kbits == 256) { /* recommended */
k += 16;
constants = sigma;
} else { /* kbits == 128 */
constants = tau;
}
x->input[11] = U8TO32_LITTLE(k + 0);
x->input[12] = U8TO32_LITTLE(k + 4);
x->input[13] = U8TO32_LITTLE(k + 8);
x->input[14] = U8TO32_LITTLE(k + 12);
x->input[0] = U8TO32_LITTLE(constants + 0);
x->input[5] = U8TO32_LITTLE(constants + 4);
x->input[10] = U8TO32_LITTLE(constants + 8);
x->input[15] = U8TO32_LITTLE(constants + 12);
}
void ECRYPT_ivsetup(ECRYPT_ctx *x,const u8 *iv)
{
x->input[6] = U8TO32_LITTLE(iv + 0);
x->input[7] = U8TO32_LITTLE(iv + 4);
x->input[8] = 0;
x->input[9] = 0;
}
void ECRYPT_encrypt_bytes(ECRYPT_ctx *x,const u8 *m,u8 *c,u32 bytes)
{
u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
u8 *ctarget = 0;
u8 tmp[64];
int i;
if (!bytes) return;
j0 = x->input[0];
j1 = x->input[1];
j2 = x->input[2];
j3 = x->input[3];
j4 = x->input[4];
j5 = x->input[5];
j6 = x->input[6];
j7 = x->input[7];
j8 = x->input[8];
j9 = x->input[9];
j10 = x->input[10];
j11 = x->input[11];
j12 = x->input[12];
j13 = x->input[13];
j14 = x->input[14];
j15 = x->input[15];
for (;;) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) tmp[i] = m[i];
m = tmp;
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x4 = j4;
x5 = j5;
x6 = j6;
x7 = j7;
x8 = j8;
x9 = j9;
x10 = j10;
x11 = j11;
x12 = j12;
x13 = j13;
x14 = j14;
x15 = j15;
for (i = 20;i > 0;i -= 2) {
x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
}
x0 = PLUS(x0,j0);
x1 = PLUS(x1,j1);
x2 = PLUS(x2,j2);
x3 = PLUS(x3,j3);
x4 = PLUS(x4,j4);
x5 = PLUS(x5,j5);
x6 = PLUS(x6,j6);
x7 = PLUS(x7,j7);
x8 = PLUS(x8,j8);
x9 = PLUS(x9,j9);
x10 = PLUS(x10,j10);
x11 = PLUS(x11,j11);
x12 = PLUS(x12,j12);
x13 = PLUS(x13,j13);
x14 = PLUS(x14,j14);
x15 = PLUS(x15,j15);
x0 = XOR(x0,U8TO32_LITTLE(m + 0));
x1 = XOR(x1,U8TO32_LITTLE(m + 4));
x2 = XOR(x2,U8TO32_LITTLE(m + 8));
x3 = XOR(x3,U8TO32_LITTLE(m + 12));
x4 = XOR(x4,U8TO32_LITTLE(m + 16));
x5 = XOR(x5,U8TO32_LITTLE(m + 20));
x6 = XOR(x6,U8TO32_LITTLE(m + 24));
x7 = XOR(x7,U8TO32_LITTLE(m + 28));
x8 = XOR(x8,U8TO32_LITTLE(m + 32));
x9 = XOR(x9,U8TO32_LITTLE(m + 36));
x10 = XOR(x10,U8TO32_LITTLE(m + 40));
x11 = XOR(x11,U8TO32_LITTLE(m + 44));
x12 = XOR(x12,U8TO32_LITTLE(m + 48));
x13 = XOR(x13,U8TO32_LITTLE(m + 52));
x14 = XOR(x14,U8TO32_LITTLE(m + 56));
x15 = XOR(x15,U8TO32_LITTLE(m + 60));
j8 = PLUSONE(j8);
if (!j8) {
j9 = PLUSONE(j9);
/* stopping at 2^70 bytes per nonce is user's responsibility */
}
U32TO8_LITTLE(c + 0,x0);
U32TO8_LITTLE(c + 4,x1);
U32TO8_LITTLE(c + 8,x2);
U32TO8_LITTLE(c + 12,x3);
U32TO8_LITTLE(c + 16,x4);
U32TO8_LITTLE(c + 20,x5);
U32TO8_LITTLE(c + 24,x6);
U32TO8_LITTLE(c + 28,x7);
U32TO8_LITTLE(c + 32,x8);
U32TO8_LITTLE(c + 36,x9);
U32TO8_LITTLE(c + 40,x10);
U32TO8_LITTLE(c + 44,x11);
U32TO8_LITTLE(c + 48,x12);
U32TO8_LITTLE(c + 52,x13);
U32TO8_LITTLE(c + 56,x14);
U32TO8_LITTLE(c + 60,x15);
if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
x->input[8] = j8;
x->input[9] = j9;
return;
}
bytes -= 64;
c += 64;
m += 64;
}
}
void ECRYPT_decrypt_bytes(ECRYPT_ctx *x,const u8 *c,u8 *m,u32 bytes)
{
ECRYPT_encrypt_bytes(x,c,m,bytes);
}
void ECRYPT_keystream_bytes(ECRYPT_ctx *x,u8 *stream,u32 bytes)
{
u32 i;
for (i = 0; i < bytes; ++i) stream[i] = 0;
ECRYPT_encrypt_bytes(x,stream,stream,bytes);
}

View file

@ -0,0 +1,258 @@
/* -------------------------------------------------------------------------
* Works when compiled for either 32-bit or 64-bit targets, optimized for
* 64 bit.
*
* Canonical implementation of Init/Update/Finalize for SHA-3 byte input.
*
* SHA3-256, SHA3-384, SHA-512 are implemented. SHA-224 can easily be added.
*
* Based on code from http://keccak.noekeon.org/ .
*
* I place the code that I wrote into public domain, free to use.
*
* I would appreciate if you give credits to this work if you used it to
* write or test * your code.
*
* Aug 2015. Andrey Jivsov. crypto@brainhub.org
* ---------------------------------------------------------------------- */
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "sha3.h"
#include "crypto/common/keccak.h"
#define SHA3_ASSERT( x )
#if defined(_MSC_VER)
#define SHA3_TRACE( format, ...)
#define SHA3_TRACE_BUF( format, buf, l, ...)
#else
#define SHA3_TRACE(format, args...)
#define SHA3_TRACE_BUF(format, buf, l, args...)
#endif
/*
* This flag is used to configure "pure" Keccak, as opposed to NIST SHA3.
*/
#define SHA3_USE_KECCAK_FLAG 0x80000000
#define SHA3_CW(x) ((x) & (~SHA3_USE_KECCAK_FLAG))
#if defined(_MSC_VER)
#define SHA3_CONST(x) x
#else
#define SHA3_CONST(x) x##L
#endif
#define KECCAK_ROUNDS 24
/* *************************** Public Inteface ************************ */
/* For Init or Reset call these: */
sha3_return_t
sha3_Init(void *priv, unsigned bitSize) {
sha3_context *ctx = (sha3_context *) priv;
if( bitSize != 256 && bitSize != 384 && bitSize != 512 )
return SHA3_RETURN_BAD_PARAMS;
memset(ctx, 0, sizeof(*ctx));
ctx->capacityWords = 2 * bitSize / (8 * sizeof(uint64_t));
return SHA3_RETURN_OK;
}
void
sha3_Init256(void *priv)
{
sha3_Init(priv, 256);
}
void
sha3_Init384(void *priv)
{
sha3_Init(priv, 384);
}
void
sha3_Init512(void *priv)
{
sha3_Init(priv, 512);
}
SHA3_FLAGS
sha3_SetFlags(void *priv, SHA3_FLAGS flags)
{
sha3_context *ctx = (sha3_context *) priv;
flags = static_cast<SHA3_FLAGS>(static_cast<int>(flags) & SHA3_FLAGS_KECCAK);
ctx->capacityWords |= (flags == SHA3_FLAGS_KECCAK ? SHA3_USE_KECCAK_FLAG : 0);
return flags;
}
void
sha3_Update(void *priv, void const *bufIn, size_t len)
{
sha3_context *ctx = (sha3_context *) priv;
/* 0...7 -- how much is needed to have a word */
unsigned old_tail = (8 - ctx->byteIndex) & 7;
size_t words;
unsigned tail;
size_t i;
const uint8_t *buf = reinterpret_cast<const uint8_t*>(bufIn);
SHA3_TRACE_BUF("called to update with:", buf, len);
SHA3_ASSERT(ctx->byteIndex < 8);
SHA3_ASSERT(ctx->wordIndex < sizeof(ctx->s) / sizeof(ctx->s[0]));
if(len < old_tail) { /* have no complete word or haven't started
* the word yet */
SHA3_TRACE("because %d<%d, store it and return", (unsigned)len,
(unsigned)old_tail);
/* endian-independent code follows: */
while (len--)
ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8);
SHA3_ASSERT(ctx->byteIndex < 8);
return;
}
if(old_tail) { /* will have one word to process */
SHA3_TRACE("completing one word with %d bytes", (unsigned)old_tail);
/* endian-independent code follows: */
len -= old_tail;
while (old_tail--)
ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8);
/* now ready to add saved to the sponge */
ctx->s[ctx->wordIndex] ^= ctx->saved;
SHA3_ASSERT(ctx->byteIndex == 8);
ctx->byteIndex = 0;
ctx->saved = 0;
if(++ctx->wordIndex ==
(SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) {
xmrig::keccakf(ctx->s, KECCAK_ROUNDS);
ctx->wordIndex = 0;
}
}
/* now work in full words directly from input */
SHA3_ASSERT(ctx->byteIndex == 0);
words = len / sizeof(uint64_t);
tail = len - words * sizeof(uint64_t);
SHA3_TRACE("have %d full words to process", (unsigned)words);
for(i = 0; i < words; i++, buf += sizeof(uint64_t)) {
const uint64_t t = (uint64_t) (buf[0]) |
((uint64_t) (buf[1]) << 8 * 1) |
((uint64_t) (buf[2]) << 8 * 2) |
((uint64_t) (buf[3]) << 8 * 3) |
((uint64_t) (buf[4]) << 8 * 4) |
((uint64_t) (buf[5]) << 8 * 5) |
((uint64_t) (buf[6]) << 8 * 6) |
((uint64_t) (buf[7]) << 8 * 7);
#if defined(__x86_64__ ) || defined(__i386__)
SHA3_ASSERT(memcmp(&t, buf, 8) == 0);
#endif
ctx->s[ctx->wordIndex] ^= t;
if(++ctx->wordIndex ==
(SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) {
xmrig::keccakf(ctx->s, KECCAK_ROUNDS);
ctx->wordIndex = 0;
}
}
SHA3_TRACE("have %d bytes left to process, save them", (unsigned)tail);
/* finally, save the partial word */
SHA3_ASSERT(ctx->byteIndex == 0 && tail < 8);
while (tail--) {
SHA3_TRACE("Store byte %02x '%c'", *buf, *buf);
ctx->saved |= (uint64_t) (*(buf++)) << ((ctx->byteIndex++) * 8);
}
SHA3_ASSERT(ctx->byteIndex < 8);
SHA3_TRACE("Have saved=0x%016" PRIx64 " at the end", ctx->saved);
}
/* This is simply the 'update' with the padding block.
* The padding block is 0x01 || 0x00* || 0x80. First 0x01 and last 0x80
* bytes are always present, but they can be the same byte.
*/
void const *
sha3_Finalize(void *priv)
{
sha3_context *ctx = (sha3_context *) priv;
SHA3_TRACE("called with %d bytes in the buffer", ctx->byteIndex);
/* Append 2-bit suffix 01, per SHA-3 spec. Instead of 1 for padding we
* use 1<<2 below. The 0x02 below corresponds to the suffix 01.
* Overall, we feed 0, then 1, and finally 1 to start padding. Without
* M || 01, we would simply use 1 to start padding. */
uint64_t t;
if( ctx->capacityWords & SHA3_USE_KECCAK_FLAG ) {
/* Keccak version */
t = (uint64_t)(((uint64_t) 1) << (ctx->byteIndex * 8));
}
else {
/* SHA3 version */
t = (uint64_t)(((uint64_t)(0x02 | (1 << 2))) << ((ctx->byteIndex) * 8));
}
ctx->s[ctx->wordIndex] ^= ctx->saved ^ t;
ctx->s[SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords) - 1] ^=
SHA3_CONST(0x8000000000000000UL);
xmrig::keccakf(ctx->s, KECCAK_ROUNDS);
/* Return first bytes of the ctx->s. This conversion is not needed for
* little-endian platforms e.g. wrap with #if !defined(__BYTE_ORDER__)
* || !defined(__ORDER_LITTLE_ENDIAN__) || __BYTE_ORDER__!=__ORDER_LITTLE_ENDIAN__
* ... the conversion below ...
* #endif */
{
unsigned i;
for(i = 0; i < SHA3_KECCAK_SPONGE_WORDS; i++) {
const unsigned t1 = (uint32_t) ctx->s[i];
const unsigned t2 = (uint32_t) ((ctx->s[i] >> 16) >> 16);
ctx->sb[i * 8 + 0] = (uint8_t) (t1);
ctx->sb[i * 8 + 1] = (uint8_t) (t1 >> 8);
ctx->sb[i * 8 + 2] = (uint8_t) (t1 >> 16);
ctx->sb[i * 8 + 3] = (uint8_t) (t1 >> 24);
ctx->sb[i * 8 + 4] = (uint8_t) (t2);
ctx->sb[i * 8 + 5] = (uint8_t) (t2 >> 8);
ctx->sb[i * 8 + 6] = (uint8_t) (t2 >> 16);
ctx->sb[i * 8 + 7] = (uint8_t) (t2 >> 24);
}
}
SHA3_TRACE_BUF("Hash: (first 32 bytes)", ctx->sb, 256 / 8);
return (ctx->sb);
}
sha3_return_t sha3_HashBuffer( unsigned bitSize, enum SHA3_FLAGS flags, const void *in, unsigned inBytes, void *out, unsigned outBytes ) {
sha3_return_t err;
sha3_context c;
err = sha3_Init(&c, bitSize);
if( err != SHA3_RETURN_OK )
return err;
if( sha3_SetFlags(&c, flags) != flags ) {
return SHA3_RETURN_BAD_PARAMS;
}
sha3_Update(&c, in, inBytes);
const void *h = sha3_Finalize(&c);
if(outBytes > bitSize/8)
outBytes = bitSize/8;
memcpy(out, h, outBytes);
return SHA3_RETURN_OK;
}

View file

@ -0,0 +1,71 @@
#ifndef SHA3_H
#define SHA3_H
/* -------------------------------------------------------------------------
* Works when compiled for either 32-bit or 64-bit targets, optimized for
* 64 bit.
*
* Canonical implementation of Init/Update/Finalize for SHA-3 byte input.
*
* SHA3-256, SHA3-384, SHA-512 are implemented. SHA-224 can easily be added.
*
* Based on code from http://keccak.noekeon.org/ .
*
* I place the code that I wrote into public domain, free to use.
*
* I would appreciate if you give credits to this work if you used it to
* write or test * your code.
*
* Aug 2015. Andrey Jivsov. crypto@brainhub.org
* ---------------------------------------------------------------------- */
/* 'Words' here refers to uint64_t */
#define SHA3_KECCAK_SPONGE_WORDS \
(((1600)/8/*bits to byte*/)/sizeof(uint64_t))
typedef struct sha3_context_ {
uint64_t saved; /* the portion of the input message that we
* didn't consume yet */
union { /* Keccak's state */
uint64_t s[SHA3_KECCAK_SPONGE_WORDS];
uint8_t sb[SHA3_KECCAK_SPONGE_WORDS * 8];
};
unsigned byteIndex; /* 0..7--the next byte after the set one
* (starts from 0; 0--none are buffered) */
unsigned wordIndex; /* 0..24--the next word to integrate input
* (starts from 0) */
unsigned capacityWords; /* the double size of the hash output in
* words (e.g. 16 for Keccak 512) */
} sha3_context;
enum SHA3_FLAGS {
SHA3_FLAGS_NONE=0,
SHA3_FLAGS_KECCAK=1
};
enum SHA3_RETURN {
SHA3_RETURN_OK=0,
SHA3_RETURN_BAD_PARAMS=1
};
typedef enum SHA3_RETURN sha3_return_t;
/* For Init or Reset call these: */
sha3_return_t sha3_Init(void *priv, unsigned bitSize);
void sha3_Init256(void *priv);
void sha3_Init384(void *priv);
void sha3_Init512(void *priv);
SHA3_FLAGS sha3_SetFlags(void *priv, SHA3_FLAGS);
void sha3_Update(void *priv, void const *bufIn, size_t len);
void const *sha3_Finalize(void *priv);
/* Single-call hashing */
sha3_return_t sha3_HashBuffer(
unsigned bitSize, /* 256, 384, 512 */
SHA3_FLAGS flags, /* SHA3_FLAGS_NONE or SHA3_FLAGS_KECCAK */
const void *in, unsigned inBytes,
void *out, unsigned outBytes ); /* up to bitSize/8; truncation OK */
#endif

View file

@ -43,6 +43,11 @@
#endif #endif
#ifdef XMRIG_ALGO_ASTROBWT
# include "crypto/astrobwt/AstroBWT.h"
#endif
#define ADD_FN(algo) \ #define ADD_FN(algo) \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \ m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \ m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \
@ -277,6 +282,11 @@ xmrig::CnHash::CnHash()
m_map[Algorithm::AR2_WRKZ][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_WRKZ>; m_map[Algorithm::AR2_WRKZ][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_WRKZ>;
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
# endif
# ifdef XMRIG_FEATURE_ASM # ifdef XMRIG_FEATURE_ASM
patchAsmVariants(); patchAsmVariants();
# endif # endif

View file

@ -404,6 +404,24 @@ const static uint8_t argon2_wrkz_test_out[160] = {
#endif #endif
#ifdef XMRIG_ALGO_ASTROBWT
// "astrobwt/dero"
const static uint8_t astrobwt_dero_test_out[160] = {
0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90,
0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif
} // namespace xmrig } // namespace xmrig

View file

@ -124,6 +124,9 @@ static AlgoName const algorithm_names[] = {
{ "chukwa", nullptr, Algorithm::AR2_CHUKWA }, { "chukwa", nullptr, Algorithm::AR2_CHUKWA },
{ "argon2/wrkz", nullptr, Algorithm::AR2_WRKZ }, { "argon2/wrkz", nullptr, Algorithm::AR2_WRKZ },
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
{ "astrobwt/dero", nullptr, Algorithm::ASTROBWT_DERO },
# endif
}; };
@ -210,6 +213,18 @@ size_t xmrig::Algorithm::l3() const
} }
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
if (f == ASTROBWT) {
switch (m_id) {
case ASTROBWT_DERO:
return oneMiB * 20;
default:
break;
}
}
# endif
return 0; return 0;
} }
@ -228,6 +243,12 @@ uint32_t xmrig::Algorithm::maxIntensity() const
} }
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
if (family() == ASTROBWT) {
return 1;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU # ifdef XMRIG_ALGO_CN_GPU
if (m_id == CN_GPU) { if (m_id == CN_GPU) {
return 1; return 1;
@ -291,6 +312,11 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
return ARGON2; return ARGON2;
# endif # endif
# ifdef XMRIG_ALGO_ASTROBWT
case ASTROBWT_DERO:
return ASTROBWT;
# endif
default: default:
break; break;
} }

View file

@ -71,6 +71,7 @@ public:
RX_SFX, // "rx/sfx" RandomSFX (Safex Cash). RX_SFX, // "rx/sfx" RandomSFX (Safex Cash).
AR2_CHUKWA, // "argon2/chukwa" Argon2id (Chukwa). AR2_CHUKWA, // "argon2/chukwa" Argon2id (Chukwa).
AR2_WRKZ, // "argon2/wrkz" Argon2id (WRKZ) AR2_WRKZ, // "argon2/wrkz" Argon2id (WRKZ)
ASTROBWT_DERO, // "astrobwt/dero" AstroBWT (Dero)
MAX MAX
}; };
@ -81,7 +82,8 @@ public:
CN_HEAVY, CN_HEAVY,
CN_PICO, CN_PICO,
RANDOM_X, RANDOM_X,
ARGON2 ARGON2,
ASTROBWT
}; };
inline Algorithm() = default; inline Algorithm() = default;

View file

@ -50,7 +50,8 @@ static CoinName const coin_names[] = {
{ "monero", Coin::MONERO }, { "monero", Coin::MONERO },
{ "xmr", Coin::MONERO }, { "xmr", Coin::MONERO },
{ "arqma", Coin::ARQMA }, { "arqma", Coin::ARQMA },
{ "arq", Coin::ARQMA } { "arq", Coin::ARQMA },
{ "dero", Coin::DERO },
}; };
@ -67,6 +68,9 @@ xmrig::Algorithm::Id xmrig::Coin::algorithm(uint8_t blobVersion) const
case ARQMA: case ARQMA:
return (blobVersion >= 15) ? Algorithm::RX_ARQ : Algorithm::CN_PICO_0; return (blobVersion >= 15) ? Algorithm::RX_ARQ : Algorithm::CN_PICO_0;
case DERO:
return (blobVersion >= 4) ? Algorithm::ASTROBWT_DERO : Algorithm::CN_0;
case INVALID: case INVALID:
break; break;
} }

View file

@ -40,7 +40,8 @@ public:
enum Id : int { enum Id : int {
INVALID = -1, INVALID = -1,
MONERO, MONERO,
ARQMA ARQMA,
DERO
}; };