mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-11 05:14:40 +00:00
Merge pull request #2712 from SChernykh/gh3
GhostRider algorithm (Raptoreum) support
This commit is contained in:
commit
4ab0ad928d
68 changed files with 72217 additions and 178 deletions
CMakeLists.txt
cmake
src
backend
common
cpu
base
core
crypto
cn
ghostrider
CMakeLists.txtaes_helper.cghostrider.cppghostrider.hmd_helper.csph_blake.csph_blake.hsph_bmw.csph_bmw.hsph_cubehash.csph_cubehash.hsph_echo.csph_echo.hsph_fugue.csph_fugue.hsph_groestl.csph_groestl.hsph_hamsi.csph_hamsi.hsph_hamsi_helper.csph_jh.csph_jh.hsph_keccak.csph_keccak.hsph_luffa.csph_luffa.hsph_sha2.csph_sha2.hsph_shabal.csph_shabal.hsph_shavite.csph_shavite.hsph_simd.csph_simd.hsph_skein.csph_skein.hsph_types.hsph_whirlpool.csph_whirlpool.h
rx
net
|
@ -10,6 +10,7 @@ option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
|
|||
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
|
||||
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
|
||||
option(WITH_KAWPOW "Enable KawPow algorithms family" ON)
|
||||
option(WITH_GHOSTRIDER "Enable GhostRider algorithm" ON)
|
||||
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
|
||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||
|
@ -128,6 +129,10 @@ set(SOURCES_CRYPTO
|
|||
src/crypto/common/VirtualMemory.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
|
||||
endif()
|
||||
|
||||
if (WITH_HWLOC)
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/common/NUMAMemoryPool.h
|
||||
|
@ -186,6 +191,7 @@ include(cmake/randomx.cmake)
|
|||
include(cmake/argon2.cmake)
|
||||
include(cmake/astrobwt.cmake)
|
||||
include(cmake/kawpow.cmake)
|
||||
include(cmake/ghostrider.cmake)
|
||||
include(cmake/OpenSSL.cmake)
|
||||
include(cmake/asm.cmake)
|
||||
|
||||
|
@ -221,7 +227,7 @@ if (WITH_DEBUG_LOG)
|
|||
endif()
|
||||
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY})
|
||||
|
||||
if (WIN32)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
|
|
8
cmake/ghostrider.cmake
Normal file
8
cmake/ghostrider.cmake
Normal file
|
@ -0,0 +1,8 @@
|
|||
if (WITH_GHOSTRIDER)
|
||||
add_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
add_subdirectory(src/crypto/ghostrider)
|
||||
set(GHOSTRIDER_LIBRARY ghostrider)
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
set(GHOSTRIDER_LIBRARY "")
|
||||
endif()
|
|
@ -239,6 +239,9 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
|
|||
|
||||
case 5:
|
||||
return new CpuWorker<5>(handle->id(), handle->config());
|
||||
|
||||
case 8:
|
||||
return new CpuWorker<8>(handle->id(), handle->config());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
|
|
@ -122,8 +122,15 @@ std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, cons
|
|||
const size_t count = threads.count();
|
||||
out.reserve(count);
|
||||
|
||||
std::vector<int64_t> affinities;
|
||||
affinities.reserve(count);
|
||||
|
||||
for (const auto& thread : threads.data()) {
|
||||
affinities.emplace_back(thread.affinity());
|
||||
}
|
||||
|
||||
for (const auto &thread : threads.data()) {
|
||||
out.emplace_back(miner, algorithm, *this, thread, count);
|
||||
out.emplace_back(miner, algorithm, *this, thread, count, affinities);
|
||||
}
|
||||
|
||||
return out;
|
||||
|
@ -200,6 +207,7 @@ void xmrig::CpuConfig::generate()
|
|||
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::GHOSTRIDER>(m_threads, m_limit);
|
||||
|
||||
m_shouldSave |= count > 0;
|
||||
}
|
||||
|
|
|
@ -161,6 +161,15 @@ size_t inline generate<Algorithm::ASTROBWT>(Threads<CpuThreads>& threads, uint32
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
template<>
|
||||
size_t inline generate<Algorithm::GHOSTRIDER>(Threads<CpuThreads>& threads, uint32_t limit)
|
||||
{
|
||||
return generate(Algorithm::kGHOSTRIDER, threads, Algorithm::GHOSTRIDER_RTM, limit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#include <algorithm>
|
||||
|
||||
|
||||
xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads) :
|
||||
xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities) :
|
||||
algorithm(algorithm),
|
||||
assembly(config.assembly()),
|
||||
astrobwtAVX2(config.astrobwtAVX2()),
|
||||
|
@ -44,7 +44,8 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
|
|||
affinity(thread.affinity()),
|
||||
miner(miner),
|
||||
threads(threads),
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()))
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
|
||||
affinities(affinities)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ class Miner;
|
|||
class CpuLaunchData
|
||||
{
|
||||
public:
|
||||
CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads);
|
||||
CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities);
|
||||
|
||||
bool isEqual(const CpuLaunchData &other) const;
|
||||
CnHash::AlgoVariant av() const;
|
||||
|
@ -68,6 +68,7 @@ public:
|
|||
const Miner *miner;
|
||||
const size_t threads;
|
||||
const uint32_t intensity;
|
||||
const std::vector<int64_t> affinities;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ public:
|
|||
CpuThread(const rapidjson::Value &value);
|
||||
|
||||
inline bool isEqual(const CpuThread &other) const { return other.m_affinity == m_affinity && other.m_intensity == m_intensity; }
|
||||
inline bool isValid() const { return m_intensity <= 5; }
|
||||
inline bool isValid() const { return m_intensity <= 8; }
|
||||
inline int64_t affinity() const { return m_affinity; }
|
||||
inline uint32_t intensity() const { return m_intensity == 0 ? 1 : m_intensity; }
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "crypto/rx/Rx.h"
|
||||
#include "crypto/rx/RxDataset.h"
|
||||
#include "crypto/rx/RxVm.h"
|
||||
#include "crypto/ghostrider/ghostrider.h"
|
||||
#include "net/JobResults.h"
|
||||
|
||||
|
||||
|
@ -97,6 +98,10 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
|||
{
|
||||
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
m_ghHelper = ghostrider::create_helper_thread(affinity(), data.affinities);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -115,6 +120,10 @@ xmrig::CpuWorker<N>::~CpuWorker()
|
|||
{
|
||||
delete m_memory;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ghostrider::destroy_helper_thread(m_ghHelper);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -152,6 +161,12 @@ bool xmrig::CpuWorker<N>::selfTest()
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return N == 8;
|
||||
}
|
||||
# endif
|
||||
|
||||
allocateCnCtx();
|
||||
|
||||
if (m_algorithm.family() == Algorithm::CN) {
|
||||
|
@ -300,16 +315,30 @@ void xmrig::CpuWorker<N>::start()
|
|||
else
|
||||
# endif
|
||||
{
|
||||
switch (job.algorithm().family()) {
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
if (job.algorithm().family() == Algorithm::ASTROBWT) {
|
||||
case Algorithm::ASTROBWT:
|
||||
if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize, m_astrobwtAVX2)) {
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
# endif
|
||||
{
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
case Algorithm::GHOSTRIDER:
|
||||
if (N == 8) {
|
||||
ghostrider::hash_octa(m_job.blob(), job.size(), m_hash, m_ctx, m_ghHelper);
|
||||
}
|
||||
else {
|
||||
valid = false;
|
||||
}
|
||||
break;
|
||||
# endif
|
||||
|
||||
default:
|
||||
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
|
||||
break;
|
||||
}
|
||||
|
||||
if (!nextRound()) {
|
||||
|
@ -484,6 +513,7 @@ template class CpuWorker<2>;
|
|||
template class CpuWorker<3>;
|
||||
template class CpuWorker<4>;
|
||||
template class CpuWorker<5>;
|
||||
template class CpuWorker<8>;
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
|
|
@ -38,6 +38,11 @@ namespace xmrig {
|
|||
class RxVm;
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
namespace ghostrider { struct HelperThread; }
|
||||
#endif
|
||||
|
||||
|
||||
template<size_t N>
|
||||
class CpuWorker : public Worker
|
||||
{
|
||||
|
@ -87,6 +92,10 @@ private:
|
|||
randomx_vm *m_vm = nullptr;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ghostrider::HelperThread* m_ghHelper = nullptr;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
uint32_t m_benchSize = 0;
|
||||
# endif
|
||||
|
@ -102,6 +111,7 @@ extern template class CpuWorker<2>;
|
|||
extern template class CpuWorker<3>;
|
||||
extern template class CpuWorker<4>;
|
||||
extern template class CpuWorker<5>;
|
||||
extern template class CpuWorker<8>;
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
|
|
@ -361,6 +361,12 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (f == Algorithm::GHOSTRIDER) {
|
||||
return CpuThreads(std::max<size_t>(count / 2, 1), 8);
|
||||
}
|
||||
# endif
|
||||
|
||||
return CpuThreads(std::max<size_t>(count / 2, 1), 1);
|
||||
}
|
||||
|
||||
|
|
|
@ -99,8 +99,14 @@ const char *xmrig::BasicCpuInfo::backend() const
|
|||
}
|
||||
|
||||
|
||||
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &, uint32_t) const
|
||||
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return CpuThreads(threads(), 8);
|
||||
}
|
||||
# endif
|
||||
|
||||
return CpuThreads(threads());
|
||||
}
|
||||
|
||||
|
|
|
@ -269,8 +269,10 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
|
|||
CpuThreads threads;
|
||||
threads.reserve(m_threads);
|
||||
|
||||
const uint32_t intensity = (algorithm.family() == Algorithm::GHOSTRIDER) ? 8 : 0;
|
||||
|
||||
for (const int32_t pu : m_units) {
|
||||
threads.add(pu, 0);
|
||||
threads.add(pu, intensity);
|
||||
}
|
||||
|
||||
if (threads.isEmpty()) {
|
||||
|
@ -296,6 +298,18 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
cores.reserve(m_cores);
|
||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||
// Don't use E-cores on Alder Lake
|
||||
cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
|
||||
|
||||
// This shouldn't happen, but check it anyway
|
||||
if (cores.empty()) {
|
||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
size_t L3 = cache->attr->cache.size;
|
||||
const bool L3_exclusive = isCacheExclusive(cache);
|
||||
size_t L2 = 0;
|
||||
|
@ -351,6 +365,15 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
cacheHashes = std::min(cacheHashes, limit);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
// GhostRider implementation runs 8 hashes at a time
|
||||
intensity = 8;
|
||||
// Always 1 thread per core (it uses additional helper thread when possible)
|
||||
cacheHashes = std::min(cacheHashes, cores.size());
|
||||
}
|
||||
# endif
|
||||
|
||||
if (cacheHashes >= PUs) {
|
||||
for (hwloc_obj_t core : cores) {
|
||||
const std::vector<hwloc_obj_t> units = findByType(core, HWLOC_OBJ_PU);
|
||||
|
|
|
@ -101,6 +101,11 @@ const char *Algorithm::kKAWPOW = "kawpow";
|
|||
const char *Algorithm::kKAWPOW_RVN = "kawpow";
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
const char* Algorithm::kGHOSTRIDER = "ghostrider";
|
||||
const char* Algorithm::kGHOSTRIDER_RTM = "ghostrider";
|
||||
#endif
|
||||
|
||||
|
||||
#define ALGO_NAME(ALGO) { Algorithm::ALGO, Algorithm::k##ALGO }
|
||||
#define ALGO_ALIAS(ALGO, NAME) { NAME, Algorithm::ALGO }
|
||||
|
@ -163,6 +168,10 @@ static const std::map<uint32_t, const char *> kAlgorithmNames = {
|
|||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
ALGO_NAME(KAWPOW_RVN),
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ALGO_NAME(GHOSTRIDER_RTM),
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -278,6 +287,11 @@ static const std::map<const char *, Algorithm::Id, aliasCompare> kAlgorithmAlias
|
|||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
ALGO_ALIAS_AUTO(KAWPOW_RVN), ALGO_ALIAS(KAWPOW_RVN, "kawpow/rvn"),
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ALGO_ALIAS_AUTO(GHOSTRIDER_RTM), ALGO_ALIAS(GHOSTRIDER_RTM, "ghostrider/rtm"),
|
||||
ALGO_ALIAS(GHOSTRIDER_RTM, "gr"),
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -352,7 +366,8 @@ std::vector<xmrig::Algorithm> xmrig::Algorithm::all(const std::function<bool(con
|
|||
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_KEVA,
|
||||
AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
|
||||
ASTROBWT_DERO,
|
||||
KAWPOW_RVN
|
||||
KAWPOW_RVN,
|
||||
GHOSTRIDER_RTM
|
||||
};
|
||||
|
||||
Algorithms out;
|
||||
|
|
|
@ -65,6 +65,13 @@ public:
|
|||
CN_PICO_0 = 0x63120200, // "cn-pico" CryptoNight-Pico
|
||||
CN_PICO_TLO = 0x63120274, // "cn-pico/tlo" CryptoNight-Pico (TLO)
|
||||
CN_UPX2 = 0x63110200, // "cn/upx2" Uplexa (UPX2)
|
||||
CN_GR_0 = 0x63130100, // "cn/dark" GhostRider
|
||||
CN_GR_1 = 0x63130101, // "cn/dark-lite" GhostRider
|
||||
CN_GR_2 = 0x63150102, // "cn/fast" GhostRider
|
||||
CN_GR_3 = 0x63140103, // "cn/lite" GhostRider
|
||||
CN_GR_4 = 0x63120104, // "cn/turtle" GhostRider
|
||||
CN_GR_5 = 0x63120105, // "cn/turtle-lite" GhostRider
|
||||
GHOSTRIDER_RTM = 0x6c150000, // "ghostrider" GhostRider
|
||||
RX_0 = 0x72151200, // "rx/0" RandomX (reference configuration).
|
||||
RX_WOW = 0x72141177, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_ARQ = 0x72121061, // "rx/arq" RandomARQ (Arqma).
|
||||
|
@ -89,7 +96,8 @@ public:
|
|||
RANDOM_X = 0x72000000,
|
||||
ARGON2 = 0x61000000,
|
||||
ASTROBWT = 0x41000000,
|
||||
KAWPOW = 0x6b000000
|
||||
KAWPOW = 0x6b000000,
|
||||
GHOSTRIDER = 0x6c000000
|
||||
};
|
||||
|
||||
static const char *kINVALID;
|
||||
|
@ -157,6 +165,11 @@ public:
|
|||
static const char *kKAWPOW_RVN;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
static const char* kGHOSTRIDER;
|
||||
static const char* kGHOSTRIDER_RTM;
|
||||
# endif
|
||||
|
||||
inline Algorithm() = default;
|
||||
inline Algorithm(const char *algo) : m_id(parse(algo)) {}
|
||||
inline Algorithm(Id id) : m_id(id) {}
|
||||
|
@ -176,7 +189,7 @@ public:
|
|||
inline Id id() const { return m_id; }
|
||||
inline size_t l2() const { return l2(m_id); }
|
||||
inline uint32_t family() const { return family(m_id); }
|
||||
inline uint32_t maxIntensity() const { return isCN() ? 5 : 1; };
|
||||
inline uint32_t maxIntensity() const { return isCN() ? 5 : ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
|
||||
|
||||
inline size_t l3() const
|
||||
{
|
||||
|
|
|
@ -34,6 +34,16 @@
|
|||
#include "base/kernel/interfaces/IClientListener.h"
|
||||
#include "net/JobResult.h"
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
#include <cmath>
|
||||
|
||||
extern "C" {
|
||||
#include "crypto/ghostrider/sph_sha2.h"
|
||||
}
|
||||
|
||||
#include "base/tools/Cvt.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
xmrig::EthStratumClient::EthStratumClient(int id, const char *agent, IClientListener *listener) :
|
||||
|
@ -66,29 +76,53 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result)
|
|||
params.PushBack(m_pool.user().toJSON(), allocator);
|
||||
params.PushBack(result.jobId.toJSON(), allocator);
|
||||
|
||||
std::stringstream s;
|
||||
s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_pool.algorithm().id() == Algorithm::GHOSTRIDER_RTM) {
|
||||
params.PushBack(Value("00000000000000000000000000000000", static_cast<uint32_t>(m_extraNonce2Size * 2)), allocator);
|
||||
params.PushBack(Value(m_ntime.data(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.headerHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
std::stringstream s;
|
||||
s << std::hex << std::setw(8) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
else
|
||||
# endif
|
||||
{
|
||||
std::stringstream s;
|
||||
s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.mixHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.headerHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.mixHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
JsonRequest::create(doc, m_sequence, "mining.submit", params);
|
||||
|
||||
uint64_t actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
|
||||
uint64_t actual_diff;
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (result.algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
actual_diff = reinterpret_cast<const uint64_t*>(result.result())[3];
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
|
||||
}
|
||||
|
||||
actual_diff = actual_diff ? (uint64_t(-1) / actual_diff) : 0;
|
||||
|
||||
# ifdef XMRIG_PROXY_PROJECT
|
||||
|
@ -161,6 +195,33 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
|
|||
setExtraNonce(arr[0]);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (strcmp(method, "mining.set_difficulty") == 0) {
|
||||
if (!params.IsArray()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params is not an array"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_pool.algorithm().id() != Algorithm::GHOSTRIDER_RTM) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto arr = params.GetArray();
|
||||
|
||||
if (arr.Empty()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params array is empty"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!arr[0].IsDouble()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: difficulty is not a number"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
m_nextDifficulty = static_cast<uint64_t>(ceil(arr[0].GetDouble() * 65536.0));
|
||||
}
|
||||
# endif
|
||||
|
||||
if (strcmp(method, "mining.notify") == 0) {
|
||||
if (!params.IsArray()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: params is not an array"), tag());
|
||||
|
@ -169,44 +230,152 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
|
|||
|
||||
auto arr = params.GetArray();
|
||||
|
||||
if (arr.Size() < 6) {
|
||||
auto algo = m_pool.algorithm();
|
||||
if (!algo.isValid()) {
|
||||
algo = m_pool.coin().algorithm();
|
||||
}
|
||||
|
||||
const size_t min_arr_size = (algo.id() == Algorithm::GHOSTRIDER_RTM) ? 8 : 6;
|
||||
|
||||
if (arr.Size() < min_arr_size) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: params array has wrong size"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!arr[0].IsString()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid job id"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
Job job;
|
||||
job.setId(arr[0].GetString());
|
||||
|
||||
auto algo = m_pool.algorithm();
|
||||
if (!algo.isValid()) {
|
||||
algo = m_pool.coin().algorithm();
|
||||
}
|
||||
|
||||
job.setAlgorithm(algo);
|
||||
job.setExtraNonce(m_extraNonce.second);
|
||||
|
||||
std::stringstream s;
|
||||
|
||||
// header hash (32 bytes)
|
||||
s << arr[1].GetString();
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algo.id() == Algorithm::GHOSTRIDER_RTM) {
|
||||
// Raptoreum uses Bitcoin's Stratum protocol
|
||||
// https://en.bitcoinwiki.org/wiki/Stratum_mining_protocol#mining.notify
|
||||
|
||||
// nonce template (8 bytes)
|
||||
for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
|
||||
if (!arr[1].IsString() || !arr[2].IsString() || !arr[3].IsString() || !arr[4].IsArray() || !arr[5].IsString() || !arr[6].IsString() || !arr[7].IsString()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid param array"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
// Version
|
||||
s << arr[5].GetString();
|
||||
|
||||
// Previous block hash
|
||||
s << arr[1].GetString();
|
||||
|
||||
// Merkle tree root
|
||||
std::string blob = arr[2].GetString();
|
||||
blob += m_extraNonce.second;
|
||||
blob.append(m_extraNonce2Size * 2, '0');
|
||||
blob += arr[3].GetString();
|
||||
|
||||
uint8_t merkle_root[64];
|
||||
|
||||
Buffer buf = Cvt::fromHex(blob.c_str(), blob.length());
|
||||
|
||||
// Get height from coinbase
|
||||
{
|
||||
uint8_t* p = buf.data() + 32;
|
||||
uint8_t* m = p + 128;
|
||||
|
||||
while ((p < m) && (*p != 0xff)) ++p;
|
||||
while ((p < m) && (*p == 0xff)) ++p;
|
||||
|
||||
if ((p < m) && (*(p - 1) == 0xff) && (*(p - 2) == 0xff)) {
|
||||
uint32_t height = *reinterpret_cast<uint16_t*>(p + 2);
|
||||
switch (*(p + 1)) {
|
||||
case 4:
|
||||
height += *reinterpret_cast<uint16_t*>(p + 4) * 0x10000UL;
|
||||
break;
|
||||
case 3:
|
||||
height += *(p + 4) * 0x10000UL;
|
||||
break;
|
||||
}
|
||||
job.setHeight(height);
|
||||
}
|
||||
else {
|
||||
job.setHeight(0);
|
||||
}
|
||||
}
|
||||
|
||||
sha256d(merkle_root, buf.data(), static_cast<int>(buf.size()));
|
||||
|
||||
auto merkle_branches = arr[4].GetArray();
|
||||
for (int i = 0, n = merkle_branches.Size(); i < n; ++i) {
|
||||
auto& b = merkle_branches[i];
|
||||
buf = b.IsString() ? Cvt::fromHex(b.GetString(), b.GetStringLength()) : Buffer();
|
||||
if (buf.size() != 32) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: param 4 is invalid"), tag());
|
||||
return;
|
||||
}
|
||||
memcpy(merkle_root + 32, buf.data(), 32);
|
||||
sha256d(merkle_root, merkle_root, 64);
|
||||
}
|
||||
|
||||
s << Cvt::toHex(merkle_root, 32);
|
||||
|
||||
// ntime
|
||||
m_ntime = arr[7].GetString();
|
||||
s << m_ntime;
|
||||
|
||||
// nbits
|
||||
s << arr[6].GetString();
|
||||
|
||||
blob = s.str();
|
||||
|
||||
if (blob.size() != 76 * 2) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid blob size"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
// zeros up to 80 bytes
|
||||
blob.resize(80 * 2, '0');
|
||||
|
||||
// Invert byte order (no idea why, but it's done in Bitcoin's Stratum)
|
||||
buf = Cvt::fromHex(blob.c_str(), blob.length());
|
||||
for (size_t i = 0; i < 80; i += sizeof(uint32_t)) {
|
||||
uint32_t& k = *reinterpret_cast<uint32_t*>(buf.data() + i);
|
||||
if ((i < 36) || (i >= 68)) {
|
||||
k = ethash_swap_u32(k);
|
||||
}
|
||||
}
|
||||
blob = Cvt::toHex(buf.data(), buf.size());
|
||||
|
||||
job.setBlob(blob.c_str());
|
||||
job.setDiff(m_nextDifficulty);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
// header hash (32 bytes)
|
||||
s << arr[1].GetString();
|
||||
|
||||
std::string blob = s.str();
|
||||
// nonce template (8 bytes)
|
||||
for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
|
||||
}
|
||||
|
||||
// zeros up to 76 bytes
|
||||
blob.resize(76 * 2, '0');
|
||||
job.setBlob(blob.c_str());
|
||||
std::string blob = s.str();
|
||||
|
||||
std::string target_str = arr[3].GetString();
|
||||
target_str.resize(16, '0');
|
||||
const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
|
||||
job.setDiff(Job::toDiff(target));
|
||||
// zeros up to 76 bytes
|
||||
blob.resize(76 * 2, '0');
|
||||
job.setBlob(blob.c_str());
|
||||
|
||||
job.setHeight(arr[5].GetUint64());
|
||||
std::string target_str = arr[3].GetString();
|
||||
target_str.resize(16, '0');
|
||||
const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
|
||||
job.setDiff(Job::toDiff(target));
|
||||
|
||||
job.setHeight(arr[5].GetUint64());
|
||||
}
|
||||
|
||||
bool ok = true;
|
||||
m_listener->onVerifyAlgorithm(this, algo, &ok);
|
||||
|
@ -356,11 +525,19 @@ void xmrig::EthStratumClient::onSubscribeResponse(const rapidjson::Value &result
|
|||
throw std::runtime_error("invalid mining.subscribe response: result is not an array");
|
||||
}
|
||||
|
||||
if (result.GetArray().Size() <= 1) {
|
||||
auto arr = result.GetArray();
|
||||
|
||||
if (arr.Size() <= 1) {
|
||||
throw std::runtime_error("invalid mining.subscribe response: result array is too short");
|
||||
}
|
||||
|
||||
setExtraNonce(result.GetArray()[1]);
|
||||
setExtraNonce(arr[1]);
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if ((arr.Size() > 2) && (arr[2].IsUint())) {
|
||||
m_extraNonce2Size = arr[2].GetUint();
|
||||
}
|
||||
# endif
|
||||
|
||||
if (m_pool.isNicehash()) {
|
||||
using namespace rapidjson;
|
||||
|
|
|
@ -57,6 +57,12 @@ private:
|
|||
|
||||
bool m_authorized = false;
|
||||
std::pair<uint64_t, String> m_extraNonce{};
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
uint64_t m_extraNonce2Size = 0;
|
||||
uint64_t m_nextDifficulty = 0;
|
||||
String m_ntime;
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ xmrig::Job::Job(bool nicehash, const Algorithm &algorithm, const String &clientI
|
|||
|
||||
bool xmrig::Job::isEqual(const Job &other) const
|
||||
{
|
||||
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0;
|
||||
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0 && m_target == other.m_target;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ public:
|
|||
inline const String &poolWallet() const { return m_poolWallet; }
|
||||
inline const uint32_t *nonce() const { return reinterpret_cast<const uint32_t*>(m_blob + nonceOffset()); }
|
||||
inline const uint8_t *blob() const { return m_blob; }
|
||||
inline int32_t nonceOffset() const { return (algorithm().family() == Algorithm::KAWPOW) ? 32 : 39; }
|
||||
inline int32_t nonceOffset() const { auto f = algorithm().family(); return (f == Algorithm::KAWPOW) ? 32 : ((f == Algorithm::GHOSTRIDER) ? 76 : 39); }
|
||||
inline size_t nonceSize() const { return (algorithm().family() == Algorithm::KAWPOW) ? 8 : 4; }
|
||||
inline size_t size() const { return m_size; }
|
||||
inline uint32_t *nonce() { return reinterpret_cast<uint32_t*>(m_blob + nonceOffset()); }
|
||||
|
|
|
@ -219,7 +219,8 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
|
|||
|
||||
if (m_mode == MODE_POOL) {
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
if ((m_algorithm.family() == Algorithm::KAWPOW) || (m_coin == Coin::RAVEN)) {
|
||||
const uint32_t f = m_algorithm.family();
|
||||
if ((f == Algorithm::KAWPOW) || (f == Algorithm::GHOSTRIDER) || (m_coin == Coin::RAVEN)) {
|
||||
client = new EthStratumClient(id, Platform::userAgent(), listener);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -67,6 +67,11 @@
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
# include "crypto/ghostrider/ghostrider.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -334,6 +339,11 @@ public:
|
|||
# endif
|
||||
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
inline void initGhostRider() const { ghostrider::benchmark(); }
|
||||
# endif
|
||||
|
||||
|
||||
Algorithm algorithm;
|
||||
Algorithms algorithms;
|
||||
bool active = false;
|
||||
|
@ -553,6 +563,10 @@ void xmrig::Miner::setJob(const Job &job, bool donate)
|
|||
constexpr const bool ready = true;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
d_ptr->initGhostRider();
|
||||
# endif
|
||||
|
||||
mutex.unlock();
|
||||
|
||||
d_ptr->active = true;
|
||||
|
|
|
@ -43,6 +43,7 @@ public:
|
|||
constexpr inline size_t memory() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); }
|
||||
constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
|
||||
constexpr inline uint32_t mask() const { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
|
||||
constexpr inline uint32_t half_mem() const { return mask() < memory() / 2; }
|
||||
|
||||
inline static uint32_t iterations(Algorithm::Id algo)
|
||||
{
|
||||
|
@ -108,6 +109,16 @@ public:
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algo == Algorithm::CN_GR_1) {
|
||||
return 0x3FFF0;
|
||||
}
|
||||
|
||||
if (algo == Algorithm::CN_GR_5) {
|
||||
return 0x1FFF0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return ((Algorithm::l3(algo) - 1) / 16) * 16;
|
||||
}
|
||||
|
||||
|
@ -136,6 +147,18 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() co
|
|||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_0>::iterations() const { return CN_ITER / 4; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::iterations() const { return CN_ITER / 4; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_2>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_3>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_4>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::iterations() const { return CN_ITER / 8; }
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::mask() const { return 0x3FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::mask() const { return 0x1FFF0; }
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
|
|
@ -310,6 +310,15 @@ xmrig::CnHash::CnHash()
|
|||
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ADD_FN(Algorithm::CN_GR_0);
|
||||
ADD_FN(Algorithm::CN_GR_1);
|
||||
ADD_FN(Algorithm::CN_GR_2);
|
||||
ADD_FN(Algorithm::CN_GR_3);
|
||||
ADD_FN(Algorithm::CN_GR_4);
|
||||
ADD_FN(Algorithm::CN_GR_5);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
patchAsmVariants();
|
||||
# endif
|
||||
|
|
|
@ -58,6 +58,9 @@ struct cryptonight_ctx {
|
|||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cryptonight_r_data generated_code_data;
|
||||
|
||||
alignas(16) uint8_t save_state[128];
|
||||
bool first_half;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -349,6 +349,9 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
|
|||
}
|
||||
|
||||
|
||||
alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -368,12 +371,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
|||
|
||||
uint64_t vh = vgetq_lane_u64(tmp, 1);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint8_t>(vh >> 24)];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ const static uint8_t test_output_r[] = {
|
|||
|
||||
|
||||
// "cn/0"
|
||||
const static uint8_t test_output_v0[160] = {
|
||||
const static uint8_t test_output_v0[256] = {
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
|
@ -115,7 +115,7 @@ const static uint8_t test_output_v0[160] = {
|
|||
|
||||
|
||||
// "cn/1" Cryptonight variant 1 (Monero v7)
|
||||
const static uint8_t test_output_v1[160] = {
|
||||
const static uint8_t test_output_v1[256] = {
|
||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
|
@ -130,7 +130,7 @@ const static uint8_t test_output_v1[160] = {
|
|||
|
||||
|
||||
// "cn/2" Cryptonight variant 2 (Monero v8)
|
||||
const static uint8_t test_output_v2[160] = {
|
||||
const static uint8_t test_output_v2[256] = {
|
||||
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
|
||||
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
|
||||
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
|
||||
|
@ -145,7 +145,7 @@ const static uint8_t test_output_v2[160] = {
|
|||
|
||||
|
||||
// "cn/half"
|
||||
const static uint8_t test_output_half[160] = {
|
||||
const static uint8_t test_output_half[256] = {
|
||||
0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
|
||||
0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7,
|
||||
0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B,
|
||||
|
@ -160,7 +160,7 @@ const static uint8_t test_output_half[160] = {
|
|||
|
||||
|
||||
// "cn/msr" Masari (MSR)
|
||||
const static uint8_t test_output_msr[160] = {
|
||||
const static uint8_t test_output_msr[256] = {
|
||||
0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C,
|
||||
0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5,
|
||||
0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C,
|
||||
|
@ -175,7 +175,7 @@ const static uint8_t test_output_msr[160] = {
|
|||
|
||||
|
||||
// "cn/xao" Alloy (XAO)
|
||||
const static uint8_t test_output_xao[160] = {
|
||||
const static uint8_t test_output_xao[256] = {
|
||||
0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C,
|
||||
0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33,
|
||||
0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46,
|
||||
|
@ -190,7 +190,7 @@ const static uint8_t test_output_xao[160] = {
|
|||
|
||||
|
||||
// "cn/rto" Arto (RTO)
|
||||
const static uint8_t test_output_rto[160] = {
|
||||
const static uint8_t test_output_rto[256] = {
|
||||
0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19,
|
||||
0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5,
|
||||
0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9,
|
||||
|
@ -204,7 +204,7 @@ const static uint8_t test_output_rto[160] = {
|
|||
};
|
||||
|
||||
// "cn/rwz"
|
||||
const static uint8_t test_output_rwz[160] = {
|
||||
const static uint8_t test_output_rwz[256] = {
|
||||
0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
|
||||
0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
|
||||
0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
|
||||
|
@ -218,7 +218,7 @@ const static uint8_t test_output_rwz[160] = {
|
|||
};
|
||||
|
||||
// "cn/zls"
|
||||
const static uint8_t test_output_zls[160] = {
|
||||
const static uint8_t test_output_zls[256] = {
|
||||
0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
|
||||
0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2,
|
||||
0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80,
|
||||
|
@ -232,7 +232,7 @@ const static uint8_t test_output_zls[160] = {
|
|||
};
|
||||
|
||||
// "cn/ccx"
|
||||
const static uint8_t test_output_ccx[160] = {
|
||||
const static uint8_t test_output_ccx[256] = {
|
||||
0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
|
||||
0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
|
||||
0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
|
||||
|
@ -246,7 +246,7 @@ const static uint8_t test_output_ccx[160] = {
|
|||
};
|
||||
|
||||
// "cn/double"
|
||||
const static uint8_t test_output_double[160] = {
|
||||
const static uint8_t test_output_double[256] = {
|
||||
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
|
||||
0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21,
|
||||
0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8,
|
||||
|
@ -261,7 +261,7 @@ const static uint8_t test_output_double[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_CN_LITE
|
||||
// "cn-lite/0"
|
||||
const static uint8_t test_output_v0_lite[160] = {
|
||||
const static uint8_t test_output_v0_lite[256] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
|
@ -276,7 +276,7 @@ const static uint8_t test_output_v0_lite[160] = {
|
|||
|
||||
|
||||
// "cn-lite/1" AEON v7
|
||||
const static uint8_t test_output_v1_lite[160] = {
|
||||
const static uint8_t test_output_v1_lite[256] = {
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||
|
@ -293,7 +293,7 @@ const static uint8_t test_output_v1_lite[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// "cn-heavy/0"
|
||||
const static uint8_t test_output_v0_heavy[160] = {
|
||||
const static uint8_t test_output_v0_heavy[256] = {
|
||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
|
||||
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
|
||||
|
@ -308,7 +308,7 @@ const static uint8_t test_output_v0_heavy[160] = {
|
|||
|
||||
|
||||
// "cn-heavy/xhv"
|
||||
const static uint8_t test_output_xhv_heavy[160] = {
|
||||
const static uint8_t test_output_xhv_heavy[256] = {
|
||||
0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57,
|
||||
0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6,
|
||||
0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F,
|
||||
|
@ -323,7 +323,7 @@ const static uint8_t test_output_xhv_heavy[160] = {
|
|||
|
||||
|
||||
// "cn-heavy/tube"
|
||||
const static uint8_t test_output_tube_heavy[160] = {
|
||||
const static uint8_t test_output_tube_heavy[256] = {
|
||||
0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF,
|
||||
0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35,
|
||||
0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3,
|
||||
|
@ -340,7 +340,7 @@ const static uint8_t test_output_tube_heavy[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_CN_PICO
|
||||
// "cn-pico/trtl"
|
||||
const static uint8_t test_output_pico_trtl[160] = {
|
||||
const static uint8_t test_output_pico_trtl[256] = {
|
||||
0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69,
|
||||
0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF,
|
||||
0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E,
|
||||
|
@ -355,7 +355,7 @@ const static uint8_t test_output_pico_trtl[160] = {
|
|||
|
||||
|
||||
// "cn-pico/tlo"
|
||||
const static uint8_t test_output_pico_tlo[160] = {
|
||||
const static uint8_t test_output_pico_tlo[256] = {
|
||||
0x99, 0x75, 0xF2, 0xC1, 0xB3, 0xB4, 0x54, 0x34, 0xA4, 0x93, 0x86, 0x21, 0x30, 0x97, 0xF3, 0x1B,
|
||||
0xB4, 0xB9, 0xA6, 0x58, 0x6A, 0x7E, 0x81, 0xF4, 0x42, 0x9F, 0x6D, 0x5F, 0x65, 0xC3, 0x8D, 0x1A,
|
||||
0xFC, 0x67, 0xDF, 0xCC, 0xB5, 0xFC, 0x90, 0xD7, 0x85, 0x5A, 0xE9, 0x03, 0x36, 0x1E, 0xAB, 0xD7,
|
||||
|
@ -372,7 +372,7 @@ const static uint8_t test_output_pico_tlo[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
// "cn/upx2"
|
||||
const static uint8_t test_output_femto_upx2[160] = {
|
||||
const static uint8_t test_output_femto_upx2[256] = {
|
||||
0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
|
||||
0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
|
||||
0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
|
||||
|
@ -389,7 +389,7 @@ const static uint8_t test_output_femto_upx2[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_ARGON2
|
||||
// "argon2/chukwa"
|
||||
const static uint8_t argon2_chukwa_test_out[160] = {
|
||||
const static uint8_t argon2_chukwa_test_out[256] = {
|
||||
0xC1, 0x58, 0xA1, 0x05, 0xAE, 0x75, 0xC7, 0x56, 0x1C, 0xFD, 0x02, 0x90, 0x83, 0xA4, 0x7A, 0x87,
|
||||
0x65, 0x3D, 0x51, 0xF9, 0x14, 0x12, 0x8E, 0x21, 0xC1, 0x97, 0x1D, 0x8B, 0x10, 0xC4, 0x90, 0x34,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
@ -403,7 +403,7 @@ const static uint8_t argon2_chukwa_test_out[160] = {
|
|||
};
|
||||
|
||||
// "argon2/chukwav2"
|
||||
const static uint8_t argon2_chukwa_v2_test_out[160] = {
|
||||
const static uint8_t argon2_chukwa_v2_test_out[256] = {
|
||||
0x77, 0xCF, 0x69, 0x58, 0xB3, 0x53, 0x6E, 0x1F, 0x9F, 0x0D, 0x1E, 0xA1, 0x65, 0xF2, 0x28, 0x11,
|
||||
0xCA, 0x7B, 0xC4, 0x87, 0xEA, 0x9F, 0x52, 0x03, 0x0B, 0x50, 0x50, 0xC1, 0x7F, 0xCD, 0xD8, 0xF5,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
@ -417,7 +417,7 @@ const static uint8_t argon2_chukwa_v2_test_out[160] = {
|
|||
};
|
||||
|
||||
// "argon2/wrkz"
|
||||
const static uint8_t argon2_wrkz_test_out[160] = {
|
||||
const static uint8_t argon2_wrkz_test_out[256] = {
|
||||
0x35, 0xE0, 0x83, 0xD4, 0xB9, 0xC6, 0x4C, 0x2A, 0x68, 0x82, 0x0A, 0x43, 0x1F, 0x61, 0x31, 0x19,
|
||||
0x98, 0xA8, 0xCD, 0x18, 0x64, 0xDB, 0xA4, 0x07, 0x7E, 0x25, 0xB7, 0xF1, 0x21, 0xD5, 0x4B, 0xD1,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
@ -434,7 +434,7 @@ const static uint8_t argon2_wrkz_test_out[160] = {
|
|||
|
||||
#ifdef XMRIG_ALGO_ASTROBWT
|
||||
// "astrobwt"
|
||||
const static uint8_t astrobwt_dero_test_out[160] = {
|
||||
const static uint8_t astrobwt_dero_test_out[256] = {
|
||||
0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90,
|
||||
0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
|
@ -285,23 +285,41 @@ inline constexpr uint64_t interleaved_index<0>(uint64_t k)
|
|||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||
static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
|
||||
__m128i* output = reinterpret_cast<__m128i*>(ctx->memory);
|
||||
|
||||
aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
if (props.half_mem() && !ctx->first_half) {
|
||||
const __m128i* p = reinterpret_cast<const __m128i*>(ctx->save_state);
|
||||
xin0 = _mm_load_si128(p + 0);
|
||||
xin1 = _mm_load_si128(p + 1);
|
||||
xin2 = _mm_load_si128(p + 2);
|
||||
xin3 = _mm_load_si128(p + 3);
|
||||
xin4 = _mm_load_si128(p + 4);
|
||||
xin5 = _mm_load_si128(p + 5);
|
||||
xin6 = _mm_load_si128(p + 6);
|
||||
xin7 = _mm_load_si128(p + 7);
|
||||
}
|
||||
else {
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
}
|
||||
|
||||
if (props.isHeavy()) {
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
|
@ -320,50 +338,73 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
if (interleave > 0) {
|
||||
_mm_prefetch((const char*)(output), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
}
|
||||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
__m128i* e = output + N - prefetch_dist;
|
||||
__m128i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
|
||||
|
||||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + output_increment + 0, xin4);
|
||||
_mm_store_si128(output + output_increment + 1, xin5);
|
||||
_mm_store_si128(output + output_increment + 2, xin6);
|
||||
_mm_store_si128(output + output_increment + 3, xin7);
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
|
||||
output += output_increment * 2;
|
||||
_mm_store_si128(output + output_increment + 0, xin4);
|
||||
_mm_store_si128(output + output_increment + 1, xin5);
|
||||
_mm_store_si128(output + output_increment + 2, xin6);
|
||||
_mm_store_si128(output + output_increment + 3, xin7);
|
||||
|
||||
output += output_increment * 2;
|
||||
prefetch_ptr += output_increment * 2;
|
||||
} while (output < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr = output;
|
||||
}
|
||||
|
||||
if (props.half_mem() && ctx->first_half) {
|
||||
__m128i* p = reinterpret_cast<__m128i*>(ctx->save_state);
|
||||
_mm_store_si128(p + 0, xin0);
|
||||
_mm_store_si128(p + 1, xin1);
|
||||
_mm_store_si128(p + 2, xin2);
|
||||
_mm_store_si128(p + 3, xin3);
|
||||
_mm_store_si128(p + 4, xin4);
|
||||
_mm_store_si128(p + 5, xin5);
|
||||
_mm_store_si128(p + 6, xin6);
|
||||
_mm_store_si128(p + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||
static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
constexpr bool IS_HEAVY = props.isHeavy();
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i *input = reinterpret_cast<const __m128i*>(ctx->memory);
|
||||
__m128i *output = reinterpret_cast<__m128i*>(ctx->state);
|
||||
|
||||
aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
|
@ -376,46 +417,54 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
const __m128i* input_begin = input;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
|
||||
constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
|
||||
if (props.half_mem() && (part == 1)) {
|
||||
input = input_begin;
|
||||
ctx->first_half = false;
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
|
||||
if (IS_HEAVY) {
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (IS_HEAVY) {
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_HEAVY) {
|
||||
input = input_begin;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
|
@ -523,6 +572,9 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
|
|||
void v4_soft_aes_compile_code(const V4_Instruction *code, int code_size, void *machine_code, xmrig::Assembly ASM);
|
||||
|
||||
|
||||
alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -541,12 +593,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
|
|||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
uint8_t x = static_cast<uint8_t>(vh >> 24);
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -587,7 +634,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);
|
||||
|
||||
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint8_t *l0 = ctx[0]->memory;
|
||||
|
@ -742,7 +793,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -833,7 +884,11 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
|
@ -915,7 +970,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -937,8 +992,12 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
|
@ -977,8 +1036,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
@ -1029,8 +1088,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
|
@ -1225,8 +1288,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
@ -1236,6 +1299,188 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak_gr(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i cx)
|
||||
{
|
||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||
mem_out[0] = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input + size * 0, size, ctx[0]->state);
|
||||
keccak(input + size * 1, size, ctx[1]->state);
|
||||
keccak(input + size * 2, size, ctx[2]->state);
|
||||
keccak(input + size * 3, size, ctx[3]->state);
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
uint8_t* l1 = ctx[1]->memory;
|
||||
uint8_t* l2 = ctx[2]->memory;
|
||||
uint8_t* l3 = ctx[3]->memory;
|
||||
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
VARIANT1_INIT(2);
|
||||
VARIANT1_INIT(3);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
ctx[2]->first_half = true;
|
||||
ctx[3]->first_half = true;
|
||||
}
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t al2 = h2[0] ^ h2[4];
|
||||
uint64_t al3 = h3[0] ^ h3[4];
|
||||
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
uint64_t ah2 = h2[1] ^ h2[5];
|
||||
uint64_t ah3 = h3[1] ^ h3[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx20 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||
__m128i bx30 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
uint64_t idx2 = al2;
|
||||
uint64_t idx3 = al3;
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx0, cx1, cx2, cx3;
|
||||
if (!SOFT_AES) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||
cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
|
||||
cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||
}
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
const __m128i ax2 = _mm_set_epi64x(ah2, al2);
|
||||
const __m128i ax3 = _mm_set_epi64x(ah3, al3);
|
||||
|
||||
if (SOFT_AES) {
|
||||
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx2 = soft_aesenc(&l2[idx2 & MASK], ax2, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx3 = soft_aesenc(&l3[idx3 & MASK], ax3, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
cx2 = _mm_aesenc_si128(cx2, ax2);
|
||||
cx3 = _mm_aesenc_si128(cx3, ax3);
|
||||
}
|
||||
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, cx1);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l2[idx2 & MASK], l2, idx2 & MASK, ax2, bx20, cx2);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l3[idx3 & MASK], l3, idx3 & MASK, ax3, bx30, cx3);
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
idx2 = _mm_cvtsi128_si64(cx2);
|
||||
idx3 = _mm_cvtsi128_si64(cx3);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
|
||||
cl = ((uint64_t*)&l0[idx0 & MASK])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & MASK])[1];
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*)&l1[idx1 & MASK])[0];
|
||||
ch = ((uint64_t*)&l1[idx1 & MASK])[1];
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
cl = ((uint64_t*)&l2[idx2 & MASK])[0];
|
||||
ch = ((uint64_t*)&l2[idx2 & MASK])[1];
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
((uint64_t*)&l2[idx2 & MASK])[0] = al2;
|
||||
((uint64_t*)&l2[idx2 & MASK])[1] = ah2 ^ tweak1_2_2;
|
||||
al2 ^= cl;
|
||||
ah2 ^= ch;
|
||||
idx2 = al2;
|
||||
|
||||
cl = ((uint64_t*)&l3[idx3 & MASK])[0];
|
||||
ch = ((uint64_t*)&l3[idx3 & MASK])[1];
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
((uint64_t*)&l3[idx3 & MASK])[0] = al3;
|
||||
((uint64_t*)&l3[idx3 & MASK])[1] = ah3 ^ tweak1_2_3;
|
||||
al3 ^= cl;
|
||||
ah3 ^= ch;
|
||||
idx3 = al3;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
bx20 = cx2;
|
||||
bx30 = cx3;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
keccakf(h2, 24);
|
||||
keccakf(h3, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
extra_hashes[ctx[2]->state[0] & 3](ctx[2]->state, 200, output + 64);
|
||||
extra_hashes[ctx[3]->state[0] & 3](ctx[3]->state, 200, output + 96);
|
||||
}
|
||||
|
||||
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
c = _mm_load_si128(ptr); \
|
||||
|
@ -1371,7 +1616,10 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1416,7 +1664,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1426,6 +1674,14 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
const auto arch = Cpu::info()->arch();
|
||||
if ((arch >= ICpuInfo::ARCH_ZEN) && (arch <= ICpuInfo::ARCH_ZEN3)) {
|
||||
if ((ALGO == Algorithm::CN_GR_0) || (ALGO == Algorithm::CN_GR_1) || (ALGO == Algorithm::CN_GR_2) || (ALGO == Algorithm::CN_GR_3) || (ALGO == Algorithm::CN_GR_4) || (ALGO == Algorithm::CN_GR_5)) {
|
||||
cryptonight_quad_hash_zen<ALGO, SOFT_AES>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
@ -1445,7 +1701,10 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1498,7 +1757,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1527,7 +1786,10 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1588,7 +1850,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
|
59
src/crypto/ghostrider/CMakeLists.txt
Normal file
59
src/crypto/ghostrider/CMakeLists.txt
Normal file
|
@ -0,0 +1,59 @@
|
|||
cmake_minimum_required(VERSION 2.8.12)
|
||||
project(GhostRider)
|
||||
|
||||
set(HEADERS
|
||||
sph_types.h
|
||||
sph_blake.h
|
||||
sph_bmw.h
|
||||
sph_cubehash.h
|
||||
sph_echo.h
|
||||
sph_fugue.h
|
||||
sph_groestl.h
|
||||
sph_hamsi.h
|
||||
sph_jh.h
|
||||
sph_keccak.h
|
||||
sph_luffa.h
|
||||
sph_sha2.h
|
||||
sph_shabal.h
|
||||
sph_shavite.h
|
||||
sph_simd.h
|
||||
sph_skein.h
|
||||
sph_whirlpool.h
|
||||
ghostrider.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
sph_blake.c
|
||||
sph_bmw.c
|
||||
sph_cubehash.c
|
||||
sph_echo.c
|
||||
sph_fugue.c
|
||||
sph_groestl.c
|
||||
sph_hamsi.c
|
||||
sph_jh.c
|
||||
sph_keccak.c
|
||||
sph_luffa.c
|
||||
sph_shabal.c
|
||||
sph_shavite.c
|
||||
sph_simd.c
|
||||
sph_sha2.c
|
||||
sph_skein.c
|
||||
sph_whirlpool.c
|
||||
ghostrider.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
# gcc 11.2.0 crashes with -ftree-vrp
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vrp")
|
||||
|
||||
# gcc 11.2.0 creates incorrect code with -O3
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-O2")
|
||||
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Ofast -Wno-unused-const-variable")
|
||||
endif()
|
||||
|
||||
include_directories(.)
|
||||
include_directories(../..)
|
||||
include_directories(${UV_INCLUDE_DIR})
|
||||
|
||||
add_library(ghostrider STATIC ${HEADERS} ${SOURCES})
|
392
src/crypto/ghostrider/aes_helper.c
Normal file
392
src/crypto/ghostrider/aes_helper.c
Normal file
|
@ -0,0 +1,392 @@
|
|||
/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
|
||||
/*
|
||||
* AES tables. This file is not meant to be compiled by itself; it
|
||||
* is included by some hash function implementations. It contains
|
||||
* the precomputed tables and helper macros for evaluating an AES
|
||||
* round, optionally with a final XOR with a subkey.
|
||||
*
|
||||
* By default, this file defines the tables and macros for little-endian
|
||||
* processing (i.e. it is assumed that the input bytes have been read
|
||||
* from memory and assembled with the little-endian convention). If
|
||||
* the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
|
||||
* when this file is included, then the tables and macros for big-endian
|
||||
* processing are defined instead. The big-endian tables and macros have
|
||||
* names distinct from the little-endian tables and macros, hence it is
|
||||
* possible to have both simultaneously, by including this file twice
|
||||
* (with and without the AES_BIG_ENDIAN macro).
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include "sph_types.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
#if AES_BIG_ENDIAN
|
||||
|
||||
#define AESx(x) ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
|
||||
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
|
||||
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
|
||||
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
|
||||
|
||||
#define AES0 AES0_BE
|
||||
#define AES1 AES1_BE
|
||||
#define AES2 AES2_BE
|
||||
#define AES3 AES3_BE
|
||||
|
||||
#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
|
||||
(Y0) = AES0[((X0) >> 24) & 0xFF] \
|
||||
^ AES1[((X1) >> 16) & 0xFF] \
|
||||
^ AES2[((X2) >> 8) & 0xFF] \
|
||||
^ AES3[(X3) & 0xFF] ^ (K0); \
|
||||
(Y1) = AES0[((X1) >> 24) & 0xFF] \
|
||||
^ AES1[((X2) >> 16) & 0xFF] \
|
||||
^ AES2[((X3) >> 8) & 0xFF] \
|
||||
^ AES3[(X0) & 0xFF] ^ (K1); \
|
||||
(Y2) = AES0[((X2) >> 24) & 0xFF] \
|
||||
^ AES1[((X3) >> 16) & 0xFF] \
|
||||
^ AES2[((X0) >> 8) & 0xFF] \
|
||||
^ AES3[(X1) & 0xFF] ^ (K2); \
|
||||
(Y3) = AES0[((X3) >> 24) & 0xFF] \
|
||||
^ AES1[((X0) >> 16) & 0xFF] \
|
||||
^ AES2[((X1) >> 8) & 0xFF] \
|
||||
^ AES3[(X2) & 0xFF] ^ (K3); \
|
||||
} while (0)
|
||||
|
||||
#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
|
||||
AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
|
||||
|
||||
#else
|
||||
|
||||
#define AESx(x) SPH_C32(x)
|
||||
#define AES0 AES0_LE
|
||||
#define AES1 AES1_LE
|
||||
#define AES2 AES2_LE
|
||||
#define AES3 AES3_LE
|
||||
|
||||
#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
|
||||
(Y0) = AES0[(X0) & 0xFF] \
|
||||
^ AES1[((X1) >> 8) & 0xFF] \
|
||||
^ AES2[((X2) >> 16) & 0xFF] \
|
||||
^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
|
||||
(Y1) = AES0[(X1) & 0xFF] \
|
||||
^ AES1[((X2) >> 8) & 0xFF] \
|
||||
^ AES2[((X3) >> 16) & 0xFF] \
|
||||
^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
|
||||
(Y2) = AES0[(X2) & 0xFF] \
|
||||
^ AES1[((X3) >> 8) & 0xFF] \
|
||||
^ AES2[((X0) >> 16) & 0xFF] \
|
||||
^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
|
||||
(Y3) = AES0[(X3) & 0xFF] \
|
||||
^ AES1[((X0) >> 8) & 0xFF] \
|
||||
^ AES2[((X1) >> 16) & 0xFF] \
|
||||
^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
|
||||
} while (0)
|
||||
|
||||
#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
|
||||
AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The AES*[] tables allow us to perform a fast evaluation of an AES
|
||||
* round; table AESi[] combines SubBytes for a byte at row i, and
|
||||
* MixColumns for the column where that byte goes after ShiftRows.
|
||||
*/
|
||||
|
||||
static const sph_u32 AES0[256] = {
|
||||
AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
|
||||
AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
|
||||
AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
|
||||
AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
|
||||
AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
|
||||
AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
|
||||
AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
|
||||
AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
|
||||
AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
|
||||
AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
|
||||
AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
|
||||
AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
|
||||
AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
|
||||
AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
|
||||
AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
|
||||
AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
|
||||
AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
|
||||
AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
|
||||
AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
|
||||
AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
|
||||
AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
|
||||
AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
|
||||
AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
|
||||
AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
|
||||
AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
|
||||
AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
|
||||
AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
|
||||
AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
|
||||
AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
|
||||
AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
|
||||
AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
|
||||
AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
|
||||
AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
|
||||
AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
|
||||
AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
|
||||
AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
|
||||
AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
|
||||
AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
|
||||
AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
|
||||
AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
|
||||
AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
|
||||
AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
|
||||
AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
|
||||
AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
|
||||
AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
|
||||
AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
|
||||
AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
|
||||
AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
|
||||
AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
|
||||
AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
|
||||
AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
|
||||
AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
|
||||
AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
|
||||
AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
|
||||
AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
|
||||
AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
|
||||
AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
|
||||
AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
|
||||
AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
|
||||
AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
|
||||
AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
|
||||
AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
|
||||
AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
|
||||
AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
|
||||
};
|
||||
|
||||
static const sph_u32 AES1[256] = {
|
||||
AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
|
||||
AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
|
||||
AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
|
||||
AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
|
||||
AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
|
||||
AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
|
||||
AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
|
||||
AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
|
||||
AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
|
||||
AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
|
||||
AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
|
||||
AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
|
||||
AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
|
||||
AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
|
||||
AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
|
||||
AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
|
||||
AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
|
||||
AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
|
||||
AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
|
||||
AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
|
||||
AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
|
||||
AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
|
||||
AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
|
||||
AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
|
||||
AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
|
||||
AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
|
||||
AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
|
||||
AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
|
||||
AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
|
||||
AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
|
||||
AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
|
||||
AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
|
||||
AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
|
||||
AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
|
||||
AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
|
||||
AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
|
||||
AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
|
||||
AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
|
||||
AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
|
||||
AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
|
||||
AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
|
||||
AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
|
||||
AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
|
||||
AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
|
||||
AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
|
||||
AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
|
||||
AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
|
||||
AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
|
||||
AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
|
||||
AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
|
||||
AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
|
||||
AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
|
||||
AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
|
||||
AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
|
||||
AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
|
||||
AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
|
||||
AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
|
||||
AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
|
||||
AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
|
||||
AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
|
||||
AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
|
||||
AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
|
||||
AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
|
||||
AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
|
||||
};
|
||||
|
||||
static const sph_u32 AES2[256] = {
|
||||
AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
|
||||
AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
|
||||
AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
|
||||
AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
|
||||
AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
|
||||
AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
|
||||
AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
|
||||
AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
|
||||
AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
|
||||
AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
|
||||
AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
|
||||
AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
|
||||
AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
|
||||
AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
|
||||
AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
|
||||
AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
|
||||
AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
|
||||
AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
|
||||
AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
|
||||
AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
|
||||
AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
|
||||
AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
|
||||
AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
|
||||
AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
|
||||
AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
|
||||
AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
|
||||
AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
|
||||
AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
|
||||
AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
|
||||
AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
|
||||
AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
|
||||
AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
|
||||
AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
|
||||
AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
|
||||
AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
|
||||
AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
|
||||
AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
|
||||
AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
|
||||
AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
|
||||
AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
|
||||
AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
|
||||
AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
|
||||
AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
|
||||
AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
|
||||
AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
|
||||
AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
|
||||
AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
|
||||
AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
|
||||
AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
|
||||
AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
|
||||
AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
|
||||
AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
|
||||
AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
|
||||
AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
|
||||
AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
|
||||
AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
|
||||
AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
|
||||
AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
|
||||
AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
|
||||
AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
|
||||
AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
|
||||
AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
|
||||
AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
|
||||
AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
|
||||
};
|
||||
|
||||
static const sph_u32 AES3[256] = {
|
||||
AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
|
||||
AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
|
||||
AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
|
||||
AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
|
||||
AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
|
||||
AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
|
||||
AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
|
||||
AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
|
||||
AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
|
||||
AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
|
||||
AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
|
||||
AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
|
||||
AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
|
||||
AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
|
||||
AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
|
||||
AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
|
||||
AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
|
||||
AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
|
||||
AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
|
||||
AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
|
||||
AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
|
||||
AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
|
||||
AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
|
||||
AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
|
||||
AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
|
||||
AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
|
||||
AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
|
||||
AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
|
||||
AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
|
||||
AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
|
||||
AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
|
||||
AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
|
||||
AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
|
||||
AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
|
||||
AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
|
||||
AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
|
||||
AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
|
||||
AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
|
||||
AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
|
||||
AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
|
||||
AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
|
||||
AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
|
||||
AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
|
||||
AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
|
||||
AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
|
||||
AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
|
||||
AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
|
||||
AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
|
||||
AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
|
||||
AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
|
||||
AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
|
||||
AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
|
||||
AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
|
||||
AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
|
||||
AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
|
||||
AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
|
||||
AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
|
||||
AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
|
||||
AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
|
||||
AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
|
||||
AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
|
||||
AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
|
||||
AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
|
||||
AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
840
src/crypto/ghostrider/ghostrider.cpp
Normal file
840
src/crypto/ghostrider/ghostrider.cpp
Normal file
|
@ -0,0 +1,840 @@
|
|||
/* XMRig
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "ghostrider.h"
|
||||
#include "sph_blake.h"
|
||||
#include "sph_bmw.h"
|
||||
#include "sph_groestl.h"
|
||||
#include "sph_jh.h"
|
||||
#include "sph_keccak.h"
|
||||
#include "sph_skein.h"
|
||||
#include "sph_luffa.h"
|
||||
#include "sph_cubehash.h"
|
||||
#include "sph_shavite.h"
|
||||
#include "sph_simd.h"
|
||||
#include "sph_echo.h"
|
||||
#include "sph_hamsi.h"
|
||||
#include "sph_fugue.h"
|
||||
#include "sph_shabal.h"
|
||||
#include "sph_whirlpool.h"
|
||||
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "crypto/cn/CnHash.h"
|
||||
#include "crypto/cn/CnCtx.h"
|
||||
#include "crypto/cn/CryptoNight.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <uv.h>
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "backend/cpu/platform/HwlocCpuInfo.h"
|
||||
#include <hwloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/cn/sse2neon.h"
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define CORE_HASH(i, x) static void h##i(const uint8_t* data, size_t size, uint8_t* output) \
|
||||
{ \
|
||||
sph_##x##_context ctx; \
|
||||
sph_##x##_init(&ctx); \
|
||||
sph_##x(&ctx, data, size); \
|
||||
sph_##x##_close(&ctx, output); \
|
||||
}
|
||||
|
||||
CORE_HASH( 0, blake512 );
|
||||
CORE_HASH( 1, bmw512 );
|
||||
CORE_HASH( 2, groestl512 );
|
||||
CORE_HASH( 3, jh512 );
|
||||
CORE_HASH( 4, keccak512 );
|
||||
CORE_HASH( 5, skein512 );
|
||||
CORE_HASH( 6, luffa512 );
|
||||
CORE_HASH( 7, cubehash512);
|
||||
CORE_HASH( 8, shavite512 );
|
||||
CORE_HASH( 9, simd512 );
|
||||
CORE_HASH(10, echo512 );
|
||||
CORE_HASH(11, hamsi512 );
|
||||
CORE_HASH(12, fugue512 );
|
||||
CORE_HASH(13, shabal512 );
|
||||
CORE_HASH(14, whirlpool );
|
||||
|
||||
#undef CORE_HASH
|
||||
|
||||
typedef void (*core_hash_func)(const uint8_t* data, size_t size, uint8_t* output);
|
||||
static const core_hash_func core_hash[15] = { h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, h14 };
|
||||
|
||||
namespace xmrig
|
||||
{
|
||||
|
||||
|
||||
static constexpr Algorithm::Id cn_hash[6] = {
|
||||
Algorithm::CN_GR_0,
|
||||
Algorithm::CN_GR_1,
|
||||
Algorithm::CN_GR_2,
|
||||
Algorithm::CN_GR_3,
|
||||
Algorithm::CN_GR_4,
|
||||
Algorithm::CN_GR_5,
|
||||
};
|
||||
|
||||
static constexpr const char* cn_names[6] = {
|
||||
"cn/dark (512 KB)",
|
||||
"cn/dark-lite (256 KB)",
|
||||
"cn/fast (2 MB)",
|
||||
"cn/lite (1 MB)",
|
||||
"cn/turtle (256 KB)",
|
||||
"cn/turtle-lite (128 KB)",
|
||||
};
|
||||
|
||||
static constexpr size_t cn_sizes[6] = {
|
||||
Algorithm::l3(Algorithm::CN_GR_0), // 512 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_1) / 2, // 256 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_2), // 2 MB
|
||||
Algorithm::l3(Algorithm::CN_GR_3), // 1 MB
|
||||
Algorithm::l3(Algorithm::CN_GR_4), // 256 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_5) / 2, // 128 KB
|
||||
};
|
||||
|
||||
static constexpr CnHash::AlgoVariant av_hw_aes[5] = { CnHash::AV_SINGLE, CnHash::AV_SINGLE, CnHash::AV_DOUBLE, CnHash::AV_TRIPLE, CnHash::AV_QUAD };
|
||||
static constexpr CnHash::AlgoVariant av_soft_aes[5] = { CnHash::AV_SINGLE_SOFT, CnHash::AV_SINGLE_SOFT, CnHash::AV_DOUBLE_SOFT, CnHash::AV_TRIPLE_SOFT, CnHash::AV_QUAD_SOFT };
|
||||
|
||||
template<size_t N>
|
||||
static inline void select_indices(uint32_t (&indices)[N], const uint8_t* seed)
|
||||
{
|
||||
bool selected[N] = {};
|
||||
|
||||
uint32_t k = 0;
|
||||
for (uint32_t i = 0; i < 64; ++i) {
|
||||
const uint8_t index = ((seed[i / 2] >> ((i & 1) * 4)) & 0xF) % N;
|
||||
if (!selected[index]) {
|
||||
selected[index] = true;
|
||||
indices[k++] = index;
|
||||
if (k >= N) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i) {
|
||||
if (!selected[i]) {
|
||||
indices[k++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace ghostrider
|
||||
{
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
static struct AlgoTune
|
||||
{
|
||||
double hashrate = 0.0;
|
||||
uint32_t step = 1;
|
||||
uint32_t threads = 1;
|
||||
} tuneDefault[6], tune8MB[6];
|
||||
|
||||
|
||||
struct HelperThread
|
||||
{
|
||||
HelperThread(hwloc_bitmap_t cpu_set, bool is8MB) : m_cpuSet(cpu_set), m_is8MB(is8MB)
|
||||
{
|
||||
uv_mutex_init(&m_mutex);
|
||||
uv_cond_init(&m_cond);
|
||||
|
||||
m_thread = new std::thread(&HelperThread::run, this);
|
||||
do {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
} while (!m_ready);
|
||||
}
|
||||
|
||||
~HelperThread()
|
||||
{
|
||||
uv_mutex_lock(&m_mutex);
|
||||
m_finished = true;
|
||||
uv_cond_signal(&m_cond);
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
|
||||
m_thread->join();
|
||||
delete m_thread;
|
||||
|
||||
uv_mutex_destroy(&m_mutex);
|
||||
uv_cond_destroy(&m_cond);
|
||||
|
||||
hwloc_bitmap_free(m_cpuSet);
|
||||
}
|
||||
|
||||
struct TaskBase
|
||||
{
|
||||
virtual ~TaskBase() {}
|
||||
virtual void run() = 0;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct Task : TaskBase
|
||||
{
|
||||
inline Task(T&& task) : m_task(std::move(task))
|
||||
{
|
||||
static_assert(sizeof(Task) <= 128, "Task struct is too large");
|
||||
}
|
||||
|
||||
void run() override
|
||||
{
|
||||
m_task();
|
||||
this->~Task();
|
||||
}
|
||||
|
||||
T m_task;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline void launch_task(T&& task)
|
||||
{
|
||||
uv_mutex_lock(&m_mutex);
|
||||
new (&m_tasks[m_numTasks++]) Task<T>(std::move(task));
|
||||
uv_cond_signal(&m_cond);
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
}
|
||||
|
||||
inline void wait() const
|
||||
{
|
||||
while (m_numTasks) {
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
if (hwloc_bitmap_weight(m_cpuSet) > 0) {
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
if (hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) < 0) {
|
||||
hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD);
|
||||
}
|
||||
}
|
||||
|
||||
uv_mutex_lock(&m_mutex);
|
||||
m_ready = true;
|
||||
|
||||
do {
|
||||
uv_cond_wait(&m_cond, &m_mutex);
|
||||
|
||||
const uint32_t n = m_numTasks;
|
||||
if (n > 0) {
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
reinterpret_cast<TaskBase*>(&m_tasks[i])->run();
|
||||
}
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
m_numTasks = 0;
|
||||
}
|
||||
} while (!m_finished);
|
||||
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
}
|
||||
|
||||
uv_mutex_t m_mutex;
|
||||
uv_cond_t m_cond;
|
||||
|
||||
alignas(16) uint8_t m_tasks[4][128] = {};
|
||||
volatile uint32_t m_numTasks = 0;
|
||||
volatile bool m_ready = false;
|
||||
volatile bool m_finished = false;
|
||||
hwloc_bitmap_t m_cpuSet = {};
|
||||
bool m_is8MB = false;
|
||||
|
||||
std::thread* m_thread = nullptr;
|
||||
};
|
||||
|
||||
|
||||
void benchmark()
|
||||
{
|
||||
static std::atomic<int> done{ 0 };
|
||||
if (done.exchange(1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::thread t([]() {
|
||||
// Try to avoid CPU core 0 because many system threads use it and can interfere
|
||||
uint32_t thread_index1 = (Cpu::info()->threads() > 2) ? 2 : 0;
|
||||
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index(topology, thread_index1);
|
||||
hwloc_obj_t pu2;
|
||||
hwloc_get_closest_objs(topology, pu, &pu2, 1);
|
||||
uint32_t thread_index2 = pu2->os_index;
|
||||
|
||||
if (thread_index2 < thread_index1) {
|
||||
std::swap(thread_index1, thread_index2);
|
||||
}
|
||||
|
||||
Platform::setThreadAffinity(thread_index1);
|
||||
|
||||
constexpr uint32_t N = 1U << 21;
|
||||
|
||||
VirtualMemory::init(0, N);
|
||||
VirtualMemory* memory = new VirtualMemory(N * 8, true, false, false);
|
||||
|
||||
// 2 MB cache per core by default
|
||||
size_t max_scratchpad_size = 1U << 21;
|
||||
|
||||
if ((Cpu::info()->L3() >> 22) > Cpu::info()->cores()) {
|
||||
// At least 1 core can run with 8 MB cache
|
||||
max_scratchpad_size = 1U << 23;
|
||||
}
|
||||
else if ((Cpu::info()->L3() >> 22) >= Cpu::info()->cores()) {
|
||||
// All cores can run with 4 MB cache
|
||||
max_scratchpad_size = 1U << 22;
|
||||
}
|
||||
|
||||
LOG_VERBOSE("Running GhostRider benchmark on logical CPUs %u and %u (max scratchpad size %zu MB, huge pages %s)", thread_index1, thread_index2, max_scratchpad_size >> 20, memory->isHugePages() ? "on" : "off");
|
||||
|
||||
cryptonight_ctx* ctx[8];
|
||||
CnCtx::create(ctx, memory->scratchpad(), N, 8);
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
uint8_t buf[80];
|
||||
uint8_t hash[32 * 8];
|
||||
|
||||
LOG_VERBOSE("%24s | N | Hashrate", "Algorithm");
|
||||
LOG_VERBOSE("-------------------------|-----|-------------");
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
for (uint32_t algo = 0; algo < 6; ++algo) {
|
||||
for (uint64_t step : { 1, 2, 4}) {
|
||||
# ifdef XMRIG_ARM
|
||||
if (step == 4) {
|
||||
continue;
|
||||
}
|
||||
# endif
|
||||
|
||||
const size_t cur_scratchpad_size = cn_sizes[algo] * step;
|
||||
if (cur_scratchpad_size > max_scratchpad_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
|
||||
|
||||
// Stop after 15 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 15)) {
|
||||
break;
|
||||
}
|
||||
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
|
||||
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step / min_dt;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x1 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
tune8MB[algo].hashrate = hashrate;
|
||||
tune8MB[algo].step = static_cast<uint32_t>(step);
|
||||
tune8MB[algo].threads = 1;
|
||||
}
|
||||
|
||||
if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
|
||||
tuneDefault[algo].hashrate = hashrate;
|
||||
tuneDefault[algo].step = static_cast<uint32_t>(step);
|
||||
tuneDefault[algo].threads = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_bitmap_t helper_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_set(helper_set, thread_index2);
|
||||
HelperThread* helper = new HelperThread(helper_set, false);
|
||||
|
||||
for (uint32_t algo = 0; algo < 6; ++algo) {
|
||||
for (uint64_t step : { 1, 2, 4}) {
|
||||
# ifdef XMRIG_ARM
|
||||
if (step == 4) {
|
||||
continue;
|
||||
}
|
||||
# endif
|
||||
|
||||
const size_t cur_scratchpad_size = cn_sizes[algo] * step * 2;
|
||||
if (cur_scratchpad_size > max_scratchpad_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
|
||||
|
||||
// Stop after 30 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 30)) {
|
||||
break;
|
||||
}
|
||||
|
||||
helper->launch_task([&f, &buf, &hash, &ctx, &step]() { f(buf, sizeof(buf), hash + step * 32, ctx + step, 0); });
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
helper->wait();
|
||||
|
||||
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step * 2.0 / min_dt * 1.0075;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x2 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
tune8MB[algo].hashrate = hashrate;
|
||||
tune8MB[algo].step = static_cast<uint32_t>(step);
|
||||
tune8MB[algo].threads = 2;
|
||||
}
|
||||
|
||||
if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
|
||||
tuneDefault[algo].hashrate = hashrate;
|
||||
tuneDefault[algo].step = static_cast<uint32_t>(step);
|
||||
tuneDefault[algo].threads = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete helper;
|
||||
|
||||
CnCtx::release(ctx, 8);
|
||||
delete memory;
|
||||
});
|
||||
|
||||
t.join();
|
||||
|
||||
LOG_VERBOSE("---------------------------------------------");
|
||||
LOG_VERBOSE("| GhostRider tuning results |");
|
||||
LOG_VERBOSE("---------------------------------------------");
|
||||
|
||||
for (int algo = 0; algo < 6; ++algo) {
|
||||
LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tuneDefault[algo].step, tuneDefault[algo].threads, tuneDefault[algo].hashrate);
|
||||
if ((tune8MB[algo].step != tuneDefault[algo].step) || (tune8MB[algo].threads != tuneDefault[algo].threads)) {
|
||||
LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tune8MB[algo].step, tune8MB[algo].threads, tune8MB[algo].hashrate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename func>
|
||||
static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda)
|
||||
{
|
||||
for (size_t i = 0; i < obj->arity; i++) {
|
||||
if (obj->children[i]->type == type) {
|
||||
if (lambda(obj->children[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (findByType(obj->children[i], type, lambda)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities)
|
||||
{
|
||||
#ifndef XMRIG_ARM
|
||||
hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
|
||||
|
||||
for (int64_t i : affinities) {
|
||||
if (i >= 0) {
|
||||
hwloc_bitmap_set(main_threads_set, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_index >= 0) {
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
hwloc_obj_t root = hwloc_get_root_obj(topology);
|
||||
|
||||
bool is8MB = false;
|
||||
|
||||
findByType(root, HWLOC_OBJ_L3CACHE, [cpu_index, &is8MB](hwloc_obj_t obj) {
|
||||
if (!hwloc_bitmap_isset(obj->cpuset, cpu_index)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t num_cores = 0;
|
||||
findByType(obj, HWLOC_OBJ_CORE, [&num_cores](hwloc_obj_t) { ++num_cores; return false; });
|
||||
|
||||
if ((obj->attr->cache.size >> 22) > num_cores) {
|
||||
uint32_t num_8MB_cores = (obj->attr->cache.size >> 22) - num_cores;
|
||||
|
||||
is8MB = findByType(obj, HWLOC_OBJ_CORE, [cpu_index, &num_8MB_cores](hwloc_obj_t obj2) {
|
||||
if (num_8MB_cores > 0) {
|
||||
--num_8MB_cores;
|
||||
if (hwloc_bitmap_isset(obj2->cpuset, cpu_index)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
for (auto obj_type : { HWLOC_OBJ_CORE, HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L3CACHE }) {
|
||||
findByType(root, obj_type, [cpu_index, helper_cpu_set, main_threads_set](hwloc_obj_t obj) {
|
||||
const hwloc_cpuset_t& s = obj->cpuset;
|
||||
if (hwloc_bitmap_isset(s, cpu_index)) {
|
||||
hwloc_bitmap_andnot(helper_cpu_set, s, main_threads_set);
|
||||
if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
|
||||
return new HelperThread(helper_cpu_set, is8MB);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void destroy_helper_thread(HelperThread* t)
|
||||
{
|
||||
delete t;
|
||||
}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
|
||||
{
|
||||
enum { N = 8 };
|
||||
|
||||
uint8_t* ctx_memory[N];
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx_memory[i] = ctx[i]->memory;
|
||||
}
|
||||
|
||||
// PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
|
||||
uint32_t core_indices[15];
|
||||
select_indices(core_indices, data + 4);
|
||||
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, data + 4);
|
||||
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
const AlgoTune* tune = (helper && helper->m_is8MB) ? tune8MB : tuneDefault;
|
||||
|
||||
uint8_t tmp[64 * N];
|
||||
|
||||
if (helper && (tune[cn_indices[0]].threads == 2) && (tune[cn_indices[1]].threads == 2) && (tune[cn_indices[2]].threads == 2)) {
|
||||
const size_t n = N / 2;
|
||||
|
||||
helper->launch_task([n, av, data, size, &ctx_memory, ctx, &cn_indices, &core_indices, &tmp, output, tune]() {
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[4];
|
||||
|
||||
for (size_t i = n, k = 4; i < N; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 4;
|
||||
p = ctx_memory[4];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = n; j < N; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
|
||||
}
|
||||
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[0];
|
||||
|
||||
for (size_t i = 0, k = 0; i < n; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 0;
|
||||
p = ctx_memory[0];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = 0; j < n; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
|
||||
helper->wait();
|
||||
}
|
||||
else {
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[0];
|
||||
const size_t n = N / t.threads;
|
||||
|
||||
// Thread 1
|
||||
for (size_t i = 0, k = 0; i < n; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 0;
|
||||
p = ctx_memory[0];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
|
||||
// Thread 2
|
||||
for (size_t i = n, k = 4; i < N; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 4;
|
||||
p = ctx_memory[4];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
size_t n = N;
|
||||
|
||||
if (helper && (t.threads == 2)) {
|
||||
n = N / 2;
|
||||
|
||||
helper->launch_task([data, size, n, &cn_indices, &core_indices, part, &tmp, av, &t, output, ctx]() {
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = n; j < N; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
|
||||
}
|
||||
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
}
|
||||
data = tmp;
|
||||
size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = 0; j < n; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
|
||||
if (helper && (t.threads == 2)) {
|
||||
helper->wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx[i]->memory = ctx_memory[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else // XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
void benchmark() {}
|
||||
HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { return nullptr; }
|
||||
void destroy_helper_thread(HelperThread*) {}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
|
||||
{
|
||||
constexpr uint32_t N = 8;
|
||||
|
||||
// PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
|
||||
const uint8_t* seed = data + 4;
|
||||
|
||||
uint32_t core_indices[15];
|
||||
select_indices(core_indices, seed);
|
||||
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, seed);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
uint32_t step[6] = { 1, 1, 1, 1, 1, 2 };
|
||||
#else
|
||||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||
#endif
|
||||
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
const cn_hash_fun f[3] = {
|
||||
CnHash::fn(cn_hash[cn_indices[0]], av[step[cn_indices[0]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[1]], av[step[cn_indices[1]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[2]], av[step[cn_indices[2]]], Assembly::AUTO),
|
||||
};
|
||||
|
||||
uint8_t tmp[64 * N];
|
||||
|
||||
for (uint64_t part = 0; part < 3; ++part) {
|
||||
for (uint64_t i = 0; i < 5; ++i) {
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
data = tmp;
|
||||
size = 64;
|
||||
}
|
||||
}
|
||||
for (uint64_t j = 0, k = step[cn_indices[part]]; j < N; j += k) {
|
||||
f[part](tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
||||
|
||||
} // namespace xmrig
|
52
src/crypto/ghostrider/ghostrider.h
Normal file
52
src/crypto/ghostrider/ghostrider.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* XMRig
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_GR_HASH_H
|
||||
#define XMRIG_GR_HASH_H
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
|
||||
|
||||
namespace xmrig
|
||||
{
|
||||
|
||||
|
||||
namespace ghostrider
|
||||
{
|
||||
|
||||
|
||||
struct HelperThread;
|
||||
|
||||
void benchmark();
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
|
||||
void destroy_helper_thread(HelperThread* t);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
#endif // XMRIG_GR_HASH_H
|
346
src/crypto/ghostrider/md_helper.c
Normal file
346
src/crypto/ghostrider/md_helper.c
Normal file
|
@ -0,0 +1,346 @@
|
|||
/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* This file contains some functions which implement the external data
|
||||
* handling and padding for Merkle-Damgard hash functions which follow
|
||||
* the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
|
||||
*
|
||||
* API: this file is meant to be included, not compiled as a stand-alone
|
||||
* file. Some macros must be defined:
|
||||
* RFUN name for the round function
|
||||
* HASH "short name" for the hash function
|
||||
* BE32 defined for big-endian, 32-bit based (e.g. SHA-1)
|
||||
* LE32 defined for little-endian, 32-bit based (e.g. MD5)
|
||||
* BE64 defined for big-endian, 64-bit based (e.g. SHA-512)
|
||||
* LE64 defined for little-endian, 64-bit based (no example yet)
|
||||
* PW01 if defined, append 0x01 instead of 0x80 (for Tiger)
|
||||
* BLEN if defined, length of a message block (in bytes)
|
||||
* PLW1 if defined, length is defined on one 64-bit word only (for Tiger)
|
||||
* PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL)
|
||||
* SVAL if defined, reference to the context state information
|
||||
*
|
||||
* BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
|
||||
* this is used for instance for Tiger, which works on 64-bit words but
|
||||
* uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
|
||||
* ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
|
||||
* set, then only one word (64 bits) will be used to encode the input
|
||||
* message length (in bits), otherwise two words will be used (as in
|
||||
* SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
|
||||
* not PLW1), four 64-bit words will be used to encode the message length
|
||||
* (in bits). Note that regardless of those settings, only 64-bit message
|
||||
* lengths are supported (in bits): messages longer than 2 Exabytes will be
|
||||
* improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
|
||||
* 2 millions Terabytes, which is huge).
|
||||
*
|
||||
* If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
|
||||
* function. This is used for Tiger2, which is identical to Tiger except
|
||||
* when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
|
||||
* of the 0x01 from original Tiger).
|
||||
*
|
||||
* The RFUN function is invoked with two arguments, the first pointing to
|
||||
* aligned data (as a "const void *"), the second being state information
|
||||
* from the context structure. By default, this state information is the
|
||||
* "val" field from the context, and this field is assumed to be an array
|
||||
* of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
|
||||
* from the context structure. The "val" field can have any type, except
|
||||
* for the output encoding which assumes that it is an array of "sph_u32"
|
||||
* values. By defining NO_OUTPUT, this last step is deactivated; the
|
||||
* includer code is then responsible for writing out the hash result. When
|
||||
* NO_OUTPUT is defined, the third parameter to the "close()" function is
|
||||
* ignored.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#undef SPH_XCAT
|
||||
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
|
||||
#undef SPH_XCAT_
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
#undef SPH_BLEN
|
||||
#undef SPH_WLEN
|
||||
#if defined BE64 || defined LE64
|
||||
#define SPH_BLEN 128U
|
||||
#define SPH_WLEN 8U
|
||||
#else
|
||||
#define SPH_BLEN 64U
|
||||
#define SPH_WLEN 4U
|
||||
#endif
|
||||
|
||||
#ifdef BLEN
|
||||
#undef SPH_BLEN
|
||||
#define SPH_BLEN BLEN
|
||||
#endif
|
||||
|
||||
#undef SPH_MAXPAD
|
||||
#if defined PLW1
|
||||
#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN)
|
||||
#elif defined PLW4
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2))
|
||||
#else
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1))
|
||||
#endif
|
||||
|
||||
#undef SPH_VAL
|
||||
#undef SPH_NO_OUTPUT
|
||||
#ifdef SVAL
|
||||
#define SPH_VAL SVAL
|
||||
#define SPH_NO_OUTPUT 1
|
||||
#else
|
||||
#define SPH_VAL sc->val
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_ONLY
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
static void
|
||||
SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
|
||||
#else
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
#endif
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
size_t current;
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
clen = SPH_BLEN - current;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(sc->buf + current, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
current += clen;
|
||||
len -= clen;
|
||||
if (current == SPH_BLEN) {
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
current = 0;
|
||||
}
|
||||
#if SPH_64
|
||||
sc->count += clen;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + clen);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current;
|
||||
size_t orig_len;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
if (len < (2 * SPH_BLEN)) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
if (current > 0) {
|
||||
unsigned t;
|
||||
|
||||
t = SPH_BLEN - current;
|
||||
SPH_XCAT(HASH, _short)(cc, data, t);
|
||||
data = (const unsigned char *)data + t;
|
||||
len -= t;
|
||||
}
|
||||
#if !SPH_UNALIGNED
|
||||
if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
orig_len = len;
|
||||
while (len >= SPH_BLEN) {
|
||||
RFUN(data, SPH_VAL);
|
||||
len -= SPH_BLEN;
|
||||
data = (const unsigned char *)data + SPH_BLEN;
|
||||
}
|
||||
if (len > 0)
|
||||
memcpy(sc->buf, data, len);
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)orig_len;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + orig_len);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
/*
|
||||
* This code handles the improbable situation where "size_t" is
|
||||
* greater than 32 bits, and yet we do not have a 64-bit type.
|
||||
*/
|
||||
orig_len >>= 12;
|
||||
orig_len >>= 10;
|
||||
orig_len >>= 10;
|
||||
sc->count_high += orig_len;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform padding and produce result. The context is NOT reinitialized
|
||||
* by this function.
|
||||
*/
|
||||
static void
|
||||
SPH_XCAT(HASH, _addbits_and_close)(void *cc,
|
||||
unsigned ub, unsigned n, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current, u;
|
||||
#if !SPH_64
|
||||
sph_u32 low, high;
|
||||
#endif
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
#ifdef PW01
|
||||
sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
|
||||
#else
|
||||
{
|
||||
unsigned z;
|
||||
|
||||
z = 0x80 >> n;
|
||||
sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
|
||||
}
|
||||
#endif
|
||||
if (current > SPH_MAXPAD) {
|
||||
memset(sc->buf + current, 0, SPH_BLEN - current);
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
memset(sc->buf, 0, SPH_MAXPAD);
|
||||
} else {
|
||||
memset(sc->buf + current, 0, SPH_MAXPAD - current);
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#elif defined PLW4
|
||||
memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
|
||||
sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#elif defined LE64
|
||||
#if defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#elif defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
#endif
|
||||
#else
|
||||
#if SPH_64
|
||||
#ifdef BE32
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#else
|
||||
low = sc->count_low;
|
||||
high = SPH_T32((sc->count_high << 3) | (low >> 29));
|
||||
low = SPH_T32(low << 3) + (sph_u32)n;
|
||||
#ifdef BE32
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD, high);
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
|
||||
#else
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD, low);
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
#ifdef SPH_NO_OUTPUT
|
||||
(void)dst;
|
||||
(void)rnum;
|
||||
(void)u;
|
||||
#else
|
||||
for (u = 0; u < rnum; u ++) {
|
||||
#if defined BE64
|
||||
sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined LE64
|
||||
sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined BE32
|
||||
sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#else
|
||||
sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
|
||||
}
|
1132
src/crypto/ghostrider/sph_blake.c
Normal file
1132
src/crypto/ghostrider/sph_blake.c
Normal file
File diff suppressed because it is too large
Load diff
327
src/crypto/ghostrider/sph_blake.h
Normal file
327
src/crypto/ghostrider/sph_blake.h
Normal file
|
@ -0,0 +1,327 @@
|
|||
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
|
||||
/**
|
||||
* BLAKE interface. BLAKE is a family of functions which differ by their
|
||||
* output size; this implementation defines BLAKE for output sizes 224,
|
||||
* 256, 384 and 512 bits. This implementation conforms to the "third
|
||||
* round" specification.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_blake.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BLAKE_H__
|
||||
#define SPH_BLAKE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-224.
|
||||
*/
|
||||
#define SPH_SIZE_blake224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-256.
|
||||
*/
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-384.
|
||||
*/
|
||||
#define SPH_SIZE_blake384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-512.
|
||||
*/
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 and BLAKE-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[8];
|
||||
sph_u32 S[4];
|
||||
sph_u32 T0, T1;
|
||||
#endif
|
||||
} sph_blake_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-256 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake256_context;
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 and BLAKE-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[8];
|
||||
sph_u64 S[4];
|
||||
sph_u64 T0, T1;
|
||||
#endif
|
||||
} sph_blake_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-512 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake512_context;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-224 context (pointer to a
|
||||
* <code>sph_blake224_context</code>)
|
||||
*/
|
||||
void sph_blake224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-256 context (pointer to a
|
||||
* <code>sph_blake256_context</code>)
|
||||
*/
|
||||
void sph_blake256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-384 context (pointer to a
|
||||
* <code>sph_blake384_context</code>)
|
||||
*/
|
||||
void sph_blake384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-512 context (pointer to a
|
||||
* <code>sph_blake512_context</code>)
|
||||
*/
|
||||
void sph_blake512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
986
src/crypto/ghostrider/sph_bmw.c
Normal file
986
src/crypto/ghostrider/sph_bmw.c
Normal file
|
@ -0,0 +1,986 @@
|
|||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* BMW implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
|
||||
#define SPH_SMALL_FOOTPRINT_BMW 1
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x00010203), SPH_C32(0x04050607),
|
||||
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
|
||||
SPH_C32(0x10111213), SPH_C32(0x14151617),
|
||||
SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F),
|
||||
SPH_C32(0x20212223), SPH_C32(0x24252627),
|
||||
SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F),
|
||||
SPH_C32(0x30313233), SPH_C32(0x34353637),
|
||||
SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x40414243), SPH_C32(0x44454647),
|
||||
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
|
||||
SPH_C32(0x50515253), SPH_C32(0x54555657),
|
||||
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
|
||||
SPH_C32(0x60616263), SPH_C32(0x64656667),
|
||||
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
|
||||
SPH_C32(0x70717273), SPH_C32(0x74757677),
|
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
|
||||
};
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static const sph_u64 IV384[] = {
|
||||
SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
|
||||
SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
|
||||
SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
|
||||
SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
|
||||
SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
|
||||
SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
|
||||
SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
|
||||
SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
|
||||
};
|
||||
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
|
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
|
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
|
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
|
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
|
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
|
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
|
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
|
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
|
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
|
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
|
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
|
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
|
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11
|
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12
|
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13
|
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14
|
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15
|
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16
|
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1
|
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2
|
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3
|
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4
|
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5
|
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6
|
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7
|
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
|
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
|
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
|
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
|
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
|
||||
#define sb4(x) (((x) >> 1) ^ (x))
|
||||
#define sb5(x) (((x) >> 2) ^ (x))
|
||||
#define rb1(x) SPH_ROTL64(x, 5)
|
||||
#define rb2(x) SPH_ROTL64(x, 11)
|
||||
#define rb3(x) SPH_ROTL64(x, 27)
|
||||
#define rb4(x) SPH_ROTL64(x, 32)
|
||||
#define rb5(x) SPH_ROTL64(x, 37)
|
||||
#define rb6(x) SPH_ROTL64(x, 43)
|
||||
#define rb7(x) SPH_ROTL64(x, 53)
|
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
static const sph_u64 Kb_tab[] = {
|
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
|
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
|
||||
};
|
||||
|
||||
#define rol_off(mf, j, off) \
|
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
|
||||
|
||||
#define add_elt_b(mf, hf, j) \
|
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
|
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
|
||||
|
||||
#define expand1b(qf, mf, hf, i) \
|
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
|
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
|
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
|
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
|
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
|
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
|
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
|
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i) \
|
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
|
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \
|
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \
|
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \
|
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \
|
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \
|
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \
|
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#else
|
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
|
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
|
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
|
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
|
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1b(qf, mf, hf, i16) \
|
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
|
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
|
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
|
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i16) \
|
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
unsigned u; \
|
||||
sph_u32 Ws[16]; \
|
||||
Ws[ 0] = Ws0; \
|
||||
Ws[ 1] = Ws1; \
|
||||
Ws[ 2] = Ws2; \
|
||||
Ws[ 3] = Ws3; \
|
||||
Ws[ 4] = Ws4; \
|
||||
Ws[ 5] = Ws5; \
|
||||
Ws[ 6] = Ws6; \
|
||||
Ws[ 7] = Ws7; \
|
||||
Ws[ 8] = Ws8; \
|
||||
Ws[ 9] = Ws9; \
|
||||
Ws[10] = Ws10; \
|
||||
Ws[11] = Ws11; \
|
||||
Ws[12] = Ws12; \
|
||||
Ws[13] = Ws13; \
|
||||
Ws[14] = Ws14; \
|
||||
Ws[15] = Ws15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
unsigned u; \
|
||||
sph_u64 Wb[16]; \
|
||||
Wb[ 0] = Wb0; \
|
||||
Wb[ 1] = Wb1; \
|
||||
Wb[ 2] = Wb2; \
|
||||
Wb[ 3] = Wb3; \
|
||||
Wb[ 4] = Wb4; \
|
||||
Wb[ 5] = Wb5; \
|
||||
Wb[ 6] = Wb6; \
|
||||
Wb[ 7] = Wb7; \
|
||||
Wb[ 8] = Wb8; \
|
||||
Wb[ 9] = Wb9; \
|
||||
Wb[10] = Wb10; \
|
||||
Wb[11] = Wb11; \
|
||||
Wb[12] = Wb12; \
|
||||
Wb[13] = Wb13; \
|
||||
Wb[14] = Wb14; \
|
||||
Wb[15] = Wb15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
unsigned u; \
|
||||
for (u = 16; u < 18; u ++) \
|
||||
qt[u] = expand1b(Qb, M, H, u); \
|
||||
for (u = 18; u < 32; u ++) \
|
||||
qt[u] = expand2b(Qb, M, H, u); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
|
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
|
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
|
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
|
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
|
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
|
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
qt[16] = expand1b(Qb, M, H, 16); \
|
||||
qt[17] = expand1b(Qb, M, H, 17); \
|
||||
qt[18] = expand2b(Qb, M, H, 18); \
|
||||
qt[19] = expand2b(Qb, M, H, 19); \
|
||||
qt[20] = expand2b(Qb, M, H, 20); \
|
||||
qt[21] = expand2b(Qb, M, H, 21); \
|
||||
qt[22] = expand2b(Qb, M, H, 22); \
|
||||
qt[23] = expand2b(Qb, M, H, 23); \
|
||||
qt[24] = expand2b(Qb, M, H, 24); \
|
||||
qt[25] = expand2b(Qb, M, H, 25); \
|
||||
qt[26] = expand2b(Qb, M, H, 26); \
|
||||
qt[27] = expand2b(Qb, M, H, 27); \
|
||||
qt[28] = expand2b(Qb, M, H, 28); \
|
||||
qt[29] = expand2b(Qb, M, H, 29); \
|
||||
qt[30] = expand2b(Qb, M, H, 30); \
|
||||
qt[31] = expand2b(Qb, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qb do { \
|
||||
MAKE_Qab; \
|
||||
MAKE_Qbb; \
|
||||
} while (0)
|
||||
|
||||
#define Qb(j) (qt[j])
|
||||
|
||||
#endif
|
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
|
||||
type qt[32], xl, xh; \
|
||||
mkQ; \
|
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
|
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
|
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
|
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
|
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
|
||||
+ (xl ^ qf(24) ^ qf( 0))); \
|
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
|
||||
+ (xl ^ qf(25) ^ qf( 1))); \
|
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
|
||||
+ (xl ^ qf(26) ^ qf( 2))); \
|
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
|
||||
+ (xl ^ qf(27) ^ qf( 3))); \
|
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
|
||||
+ (xl ^ qf(28) ^ qf( 4))); \
|
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
|
||||
+ (xl ^ qf(29) ^ qf( 5))); \
|
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
|
||||
+ (xl ^ qf(30) ^ qf( 6))); \
|
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
|
||||
+ (xl ^ qf(31) ^ qf( 7))); \
|
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
|
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
|
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
|
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
|
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
|
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
|
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
|
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
|
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
|
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
|
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
|
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
|
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
|
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
|
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
static void
|
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec32le_aligned(data + 4 * (x))
|
||||
#else
|
||||
sph_u32 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec32le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec32le_aligned(data + 4);
|
||||
mv[ 2] = sph_dec32le_aligned(data + 8);
|
||||
mv[ 3] = sph_dec32le_aligned(data + 12);
|
||||
mv[ 4] = sph_dec32le_aligned(data + 16);
|
||||
mv[ 5] = sph_dec32le_aligned(data + 20);
|
||||
mv[ 6] = sph_dec32le_aligned(data + 24);
|
||||
mv[ 7] = sph_dec32le_aligned(data + 28);
|
||||
mv[ 8] = sph_dec32le_aligned(data + 32);
|
||||
mv[ 9] = sph_dec32le_aligned(data + 36);
|
||||
mv[10] = sph_dec32le_aligned(data + 40);
|
||||
mv[11] = sph_dec32le_aligned(data + 44);
|
||||
mv[12] = sph_dec32le_aligned(data + 48);
|
||||
mv[13] = sph_dec32le_aligned(data + 52);
|
||||
mv[14] = sph_dec32le_aligned(data + 56);
|
||||
mv[15] = sph_dec32le_aligned(data + 60);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDs;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u32 final_s[16] = {
|
||||
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
|
||||
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
|
||||
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
|
||||
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
|
||||
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
|
||||
SPH_C32(0xaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
#if SPH_64
|
||||
sc->bit_count = 0;
|
||||
#else
|
||||
sc->bit_count_high = 0;
|
||||
sc->bit_count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u32 htmp[16];
|
||||
sph_u32 *h1, *h2;
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
#else
|
||||
tmp = sc->bit_count_low;
|
||||
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
|
||||
if (sc->bit_count_low < tmp)
|
||||
sc->bit_count_high ++;
|
||||
sc->bit_count_high += len >> 29;
|
||||
#endif
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u32 *ht;
|
||||
|
||||
compress_small(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u32 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_small(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
#if SPH_64
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
#else
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
sc->bit_count_low + n);
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
|
||||
SPH_T32(sc->bit_count_high));
|
||||
#endif
|
||||
compress_small(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32le_aligned(buf + 4 * u, h2[u]);
|
||||
compress_small(buf, final_s, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
sph_enc32le(out + 4 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static void
|
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#else
|
||||
sph_u64 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec64le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec64le_aligned(data + 8);
|
||||
mv[ 2] = sph_dec64le_aligned(data + 16);
|
||||
mv[ 3] = sph_dec64le_aligned(data + 24);
|
||||
mv[ 4] = sph_dec64le_aligned(data + 32);
|
||||
mv[ 5] = sph_dec64le_aligned(data + 40);
|
||||
mv[ 6] = sph_dec64le_aligned(data + 48);
|
||||
mv[ 7] = sph_dec64le_aligned(data + 56);
|
||||
mv[ 8] = sph_dec64le_aligned(data + 64);
|
||||
mv[ 9] = sph_dec64le_aligned(data + 72);
|
||||
mv[10] = sph_dec64le_aligned(data + 80);
|
||||
mv[11] = sph_dec64le_aligned(data + 88);
|
||||
mv[12] = sph_dec64le_aligned(data + 96);
|
||||
mv[13] = sph_dec64le_aligned(data + 104);
|
||||
mv[14] = sph_dec64le_aligned(data + 112);
|
||||
mv[15] = sph_dec64le_aligned(data + 120);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDb;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u64 final_b[16] = {
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64(sph_bmw_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u64 htmp[16];
|
||||
sph_u64 *h1, *h2;
|
||||
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u64 *ht;
|
||||
|
||||
compress_big(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w64)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u64 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_big(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
compress_big(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc64le_aligned(buf + 8 * u, h2[u]);
|
||||
compress_big(buf, final_b, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
|
||||
sph_enc64le(out + 8 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 7);
|
||||
// sph_bmw224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw256_init(cc);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 6);
|
||||
// sph_bmw384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw512_init(cc);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
337
src/crypto/ghostrider/sph_bmw.h
Normal file
337
src/crypto/ghostrider/sph_bmw.h
Normal file
|
@ -0,0 +1,337 @@
|
|||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
|
||||
* functions which differ by their output size; this implementation
|
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_bmw.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BMW_H__
|
||||
#define SPH_BMW_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-224.
|
||||
*/
|
||||
#define SPH_SIZE_bmw224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-256.
|
||||
*/
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-384.
|
||||
*/
|
||||
#define SPH_SIZE_bmw384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-512.
|
||||
*/
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 and BMW-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[16];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_bmw_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-256 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw256_context;
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 and BMW-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[16];
|
||||
sph_u64 bit_count;
|
||||
#endif
|
||||
} sph_bmw_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-512 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw512_context;
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/**
|
||||
* Initialize a BMW-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-224 context (pointer to a
|
||||
* <code>sph_bmw224_context</code>)
|
||||
*/
|
||||
void sph_bmw224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-256 context (pointer to a
|
||||
* <code>sph_bmw256_context</code>)
|
||||
*/
|
||||
void sph_bmw256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BMW-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-384 context (pointer to a
|
||||
* <code>sph_bmw384_context</code>)
|
||||
*/
|
||||
void sph_bmw384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-512 context (pointer to a
|
||||
* <code>sph_bmw512_context</code>)
|
||||
*/
|
||||
void sph_bmw512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
723
src/crypto/ghostrider/sph_cubehash.c
Normal file
723
src/crypto/ghostrider/sph_cubehash.c
Normal file
|
@ -0,0 +1,723 @@
|
|||
/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* CubeHash implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "sph_cubehash.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
#define SPH_SMALL_FOOTPRINT_CUBEHASH 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
|
||||
* mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
|
||||
* It appears that the optimal settings are:
|
||||
* -- full unroll, no state copy on the "big" systems (x86, PowerPC)
|
||||
* -- unroll to 4 or 8, state copy on the "small" system (MIPS)
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 4
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 1
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 0
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
|
||||
SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
|
||||
SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
|
||||
SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
|
||||
SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
|
||||
SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
|
||||
SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
|
||||
SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
|
||||
SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
|
||||
SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
|
||||
SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
|
||||
SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
|
||||
SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
|
||||
SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
|
||||
SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
|
||||
SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
|
||||
SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
|
||||
SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
|
||||
SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
|
||||
SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
|
||||
SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
|
||||
SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
|
||||
SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
|
||||
SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
|
||||
SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
|
||||
SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
|
||||
SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
|
||||
SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
|
||||
SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
|
||||
SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
|
||||
SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
|
||||
SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
|
||||
SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
|
||||
SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
|
||||
SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
|
||||
SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
|
||||
SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
|
||||
SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
|
||||
SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
|
||||
SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
|
||||
SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
|
||||
};
|
||||
|
||||
#define T32 SPH_T32
|
||||
#define ROTL32 SPH_ROTL32
|
||||
|
||||
#if SPH_CUBEHASH_NOCOPY
|
||||
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(cc)
|
||||
#define WRITE_STATE(cc)
|
||||
|
||||
#define x0 ((sc)->state[ 0])
|
||||
#define x1 ((sc)->state[ 1])
|
||||
#define x2 ((sc)->state[ 2])
|
||||
#define x3 ((sc)->state[ 3])
|
||||
#define x4 ((sc)->state[ 4])
|
||||
#define x5 ((sc)->state[ 5])
|
||||
#define x6 ((sc)->state[ 6])
|
||||
#define x7 ((sc)->state[ 7])
|
||||
#define x8 ((sc)->state[ 8])
|
||||
#define x9 ((sc)->state[ 9])
|
||||
#define xa ((sc)->state[10])
|
||||
#define xb ((sc)->state[11])
|
||||
#define xc ((sc)->state[12])
|
||||
#define xd ((sc)->state[13])
|
||||
#define xe ((sc)->state[14])
|
||||
#define xf ((sc)->state[15])
|
||||
#define xg ((sc)->state[16])
|
||||
#define xh ((sc)->state[17])
|
||||
#define xi ((sc)->state[18])
|
||||
#define xj ((sc)->state[19])
|
||||
#define xk ((sc)->state[20])
|
||||
#define xl ((sc)->state[21])
|
||||
#define xm ((sc)->state[22])
|
||||
#define xn ((sc)->state[23])
|
||||
#define xo ((sc)->state[24])
|
||||
#define xp ((sc)->state[25])
|
||||
#define xq ((sc)->state[26])
|
||||
#define xr ((sc)->state[27])
|
||||
#define xs ((sc)->state[28])
|
||||
#define xt ((sc)->state[29])
|
||||
#define xu ((sc)->state[30])
|
||||
#define xv ((sc)->state[31])
|
||||
|
||||
#else
|
||||
|
||||
#define DECL_STATE \
|
||||
sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
|
||||
sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
|
||||
sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
|
||||
sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
|
||||
|
||||
#define READ_STATE(cc) do { \
|
||||
x0 = (cc)->state[ 0]; \
|
||||
x1 = (cc)->state[ 1]; \
|
||||
x2 = (cc)->state[ 2]; \
|
||||
x3 = (cc)->state[ 3]; \
|
||||
x4 = (cc)->state[ 4]; \
|
||||
x5 = (cc)->state[ 5]; \
|
||||
x6 = (cc)->state[ 6]; \
|
||||
x7 = (cc)->state[ 7]; \
|
||||
x8 = (cc)->state[ 8]; \
|
||||
x9 = (cc)->state[ 9]; \
|
||||
xa = (cc)->state[10]; \
|
||||
xb = (cc)->state[11]; \
|
||||
xc = (cc)->state[12]; \
|
||||
xd = (cc)->state[13]; \
|
||||
xe = (cc)->state[14]; \
|
||||
xf = (cc)->state[15]; \
|
||||
xg = (cc)->state[16]; \
|
||||
xh = (cc)->state[17]; \
|
||||
xi = (cc)->state[18]; \
|
||||
xj = (cc)->state[19]; \
|
||||
xk = (cc)->state[20]; \
|
||||
xl = (cc)->state[21]; \
|
||||
xm = (cc)->state[22]; \
|
||||
xn = (cc)->state[23]; \
|
||||
xo = (cc)->state[24]; \
|
||||
xp = (cc)->state[25]; \
|
||||
xq = (cc)->state[26]; \
|
||||
xr = (cc)->state[27]; \
|
||||
xs = (cc)->state[28]; \
|
||||
xt = (cc)->state[29]; \
|
||||
xu = (cc)->state[30]; \
|
||||
xv = (cc)->state[31]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(cc) do { \
|
||||
(cc)->state[ 0] = x0; \
|
||||
(cc)->state[ 1] = x1; \
|
||||
(cc)->state[ 2] = x2; \
|
||||
(cc)->state[ 3] = x3; \
|
||||
(cc)->state[ 4] = x4; \
|
||||
(cc)->state[ 5] = x5; \
|
||||
(cc)->state[ 6] = x6; \
|
||||
(cc)->state[ 7] = x7; \
|
||||
(cc)->state[ 8] = x8; \
|
||||
(cc)->state[ 9] = x9; \
|
||||
(cc)->state[10] = xa; \
|
||||
(cc)->state[11] = xb; \
|
||||
(cc)->state[12] = xc; \
|
||||
(cc)->state[13] = xd; \
|
||||
(cc)->state[14] = xe; \
|
||||
(cc)->state[15] = xf; \
|
||||
(cc)->state[16] = xg; \
|
||||
(cc)->state[17] = xh; \
|
||||
(cc)->state[18] = xi; \
|
||||
(cc)->state[19] = xj; \
|
||||
(cc)->state[20] = xk; \
|
||||
(cc)->state[21] = xl; \
|
||||
(cc)->state[22] = xm; \
|
||||
(cc)->state[23] = xn; \
|
||||
(cc)->state[24] = xo; \
|
||||
(cc)->state[25] = xp; \
|
||||
(cc)->state[26] = xq; \
|
||||
(cc)->state[27] = xr; \
|
||||
(cc)->state[28] = xs; \
|
||||
(cc)->state[29] = xt; \
|
||||
(cc)->state[30] = xu; \
|
||||
(cc)->state[31] = xv; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define INPUT_BLOCK do { \
|
||||
x0 ^= sph_dec32le_aligned(buf + 0); \
|
||||
x1 ^= sph_dec32le_aligned(buf + 4); \
|
||||
x2 ^= sph_dec32le_aligned(buf + 8); \
|
||||
x3 ^= sph_dec32le_aligned(buf + 12); \
|
||||
x4 ^= sph_dec32le_aligned(buf + 16); \
|
||||
x5 ^= sph_dec32le_aligned(buf + 20); \
|
||||
x6 ^= sph_dec32le_aligned(buf + 24); \
|
||||
x7 ^= sph_dec32le_aligned(buf + 28); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_EVEN do { \
|
||||
xg = T32(x0 + xg); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xh = T32(x1 + xh); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xi = T32(x2 + xi); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xj = T32(x3 + xj); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
xk = T32(x4 + xk); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xl = T32(x5 + xl); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xm = T32(x6 + xm); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xn = T32(x7 + xn); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xo = T32(x8 + xo); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xp = T32(x9 + xp); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xq = T32(xa + xq); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xr = T32(xb + xr); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xs = T32(xc + xs); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xt = T32(xd + xt); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xu = T32(xe + xu); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xv = T32(xf + xv); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
x8 ^= xg; \
|
||||
x9 ^= xh; \
|
||||
xa ^= xi; \
|
||||
xb ^= xj; \
|
||||
xc ^= xk; \
|
||||
xd ^= xl; \
|
||||
xe ^= xm; \
|
||||
xf ^= xn; \
|
||||
x0 ^= xo; \
|
||||
x1 ^= xp; \
|
||||
x2 ^= xq; \
|
||||
x3 ^= xr; \
|
||||
x4 ^= xs; \
|
||||
x5 ^= xt; \
|
||||
x6 ^= xu; \
|
||||
x7 ^= xv; \
|
||||
xi = T32(x8 + xi); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xj = T32(x9 + xj); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xg = T32(xa + xg); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xh = T32(xb + xh); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
xm = T32(xc + xm); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xn = T32(xd + xn); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xk = T32(xe + xk); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xl = T32(xf + xl); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xq = T32(x0 + xq); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xr = T32(x1 + xr); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xo = T32(x2 + xo); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xp = T32(x3 + xp); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xu = T32(x4 + xu); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xv = T32(x5 + xv); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xs = T32(x6 + xs); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xt = T32(x7 + xt); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xc ^= xi; \
|
||||
xd ^= xj; \
|
||||
xe ^= xg; \
|
||||
xf ^= xh; \
|
||||
x8 ^= xm; \
|
||||
x9 ^= xn; \
|
||||
xa ^= xk; \
|
||||
xb ^= xl; \
|
||||
x4 ^= xq; \
|
||||
x5 ^= xr; \
|
||||
x6 ^= xo; \
|
||||
x7 ^= xp; \
|
||||
x0 ^= xu; \
|
||||
x1 ^= xv; \
|
||||
x2 ^= xs; \
|
||||
x3 ^= xt; \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_ODD do { \
|
||||
xj = T32(xc + xj); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xi = T32(xd + xi); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xh = T32(xe + xh); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xg = T32(xf + xg); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
xn = T32(x8 + xn); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xm = T32(x9 + xm); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xl = T32(xa + xl); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xk = T32(xb + xk); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xr = T32(x4 + xr); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xq = T32(x5 + xq); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xp = T32(x6 + xp); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xo = T32(x7 + xo); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xv = T32(x0 + xv); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xu = T32(x1 + xu); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xt = T32(x2 + xt); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xs = T32(x3 + xs); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
x4 ^= xj; \
|
||||
x5 ^= xi; \
|
||||
x6 ^= xh; \
|
||||
x7 ^= xg; \
|
||||
x0 ^= xn; \
|
||||
x1 ^= xm; \
|
||||
x2 ^= xl; \
|
||||
x3 ^= xk; \
|
||||
xc ^= xr; \
|
||||
xd ^= xq; \
|
||||
xe ^= xp; \
|
||||
xf ^= xo; \
|
||||
x8 ^= xv; \
|
||||
x9 ^= xu; \
|
||||
xa ^= xt; \
|
||||
xb ^= xs; \
|
||||
xh = T32(x4 + xh); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xg = T32(x5 + xg); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xj = T32(x6 + xj); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xi = T32(x7 + xi); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xl = T32(x0 + xl); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xk = T32(x1 + xk); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xn = T32(x2 + xn); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xm = T32(x3 + xm); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xp = T32(xc + xp); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xo = T32(xd + xo); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xr = T32(xe + xr); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xq = T32(xf + xq); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xt = T32(x8 + xt); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xs = T32(x9 + xs); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xv = T32(xa + xv); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xu = T32(xb + xu); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
x0 ^= xh; \
|
||||
x1 ^= xg; \
|
||||
x2 ^= xj; \
|
||||
x3 ^= xi; \
|
||||
x4 ^= xl; \
|
||||
x5 ^= xk; \
|
||||
x6 ^= xn; \
|
||||
x7 ^= xm; \
|
||||
x8 ^= xp; \
|
||||
x9 ^= xo; \
|
||||
xa ^= xr; \
|
||||
xb ^= xq; \
|
||||
xc ^= xt; \
|
||||
xd ^= xs; \
|
||||
xe ^= xv; \
|
||||
xf ^= xu; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* There is no need to unroll all 16 rounds. The word-swapping permutation
|
||||
* is an involution, so we need to unroll an even number of rounds. On
|
||||
* "big" systems, unrolling 4 rounds yields about 97% of the speed
|
||||
* achieved with full unrolling; and it keeps the code more compact
|
||||
* for small architectures.
|
||||
*/
|
||||
|
||||
#if SPH_CUBEHASH_UNROLL == 2
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 8; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 4
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 4; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 8
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 2; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->state, iv, sizeof sc->state);
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
INPUT_BLOCK;
|
||||
SIXTEEN_ROUNDS;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr;
|
||||
unsigned z;
|
||||
int i;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
READ_STATE(sc);
|
||||
INPUT_BLOCK;
|
||||
for (i = 0; i < 11; i ++) {
|
||||
SIXTEEN_ROUNDS;
|
||||
if (i == 0)
|
||||
xv ^= SPH_C32(1);
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
out = dst;
|
||||
for (z = 0; z < out_size_w32; z ++)
|
||||
sph_enc32le(out + (z << 2), sc->state[z]);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 7);
|
||||
sph_cubehash224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 8);
|
||||
sph_cubehash256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 12);
|
||||
sph_cubehash384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 16);
|
||||
sph_cubehash512_init(cc);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
292
src/crypto/ghostrider/sph_cubehash.h
Normal file
292
src/crypto/ghostrider/sph_cubehash.h
Normal file
|
@ -0,0 +1,292 @@
|
|||
/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
|
||||
/**
|
||||
* CubeHash interface. CubeHash is a family of functions which differ by
|
||||
* their output size; this implementation defines CubeHash for output
|
||||
* sizes 224, 256, 384 and 512 bits, with the "standard parameters"
|
||||
* (CubeHash16/32 with the CubeHash specification notations).
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_cubehash.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_CUBEHASH_H__
|
||||
#define SPH_CUBEHASH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-224.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-256.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-384.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-512.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for CubeHash computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a CubeHash computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running CubeHash computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[32];
|
||||
#endif
|
||||
} sph_cubehash_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash224_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash256_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash384_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash512_context;
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-224 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-224 context (pointer to a
|
||||
* <code>sph_cubehash224_context</code>)
|
||||
*/
|
||||
void sph_cubehash224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-256 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-256 context (pointer to a
|
||||
* <code>sph_cubehash256_context</code>)
|
||||
*/
|
||||
void sph_cubehash256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-384 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-384 context (pointer to a
|
||||
* <code>sph_cubehash384_context</code>)
|
||||
*/
|
||||
void sph_cubehash384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-512 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-512 context (pointer to a
|
||||
* <code>sph_cubehash512_context</code>)
|
||||
*/
|
||||
void sph_cubehash512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1032
src/crypto/ghostrider/sph_echo.c
Normal file
1032
src/crypto/ghostrider/sph_echo.c
Normal file
File diff suppressed because it is too large
Load diff
319
src/crypto/ghostrider/sph_echo.h
Normal file
319
src/crypto/ghostrider/sph_echo.h
Normal file
|
@ -0,0 +1,319 @@
|
|||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by
|
||||
* their output size; this implementation defines ECHO for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_echo.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224.
|
||||
*/
|
||||
#define SPH_SIZE_echo224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256.
|
||||
*/
|
||||
#define SPH_SIZE_echo256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384.
|
||||
*/
|
||||
#define SPH_SIZE_echo384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512.
|
||||
*/
|
||||
#define SPH_SIZE_echo512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-224
|
||||
* and ECHO-256.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[192]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[4][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[4][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-384
|
||||
* and ECHO-512.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[8][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[8][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_big_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo224_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo256_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo384_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo512_context;
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-224 context (pointer to a
|
||||
* <code>sph_echo224_context</code>)
|
||||
*/
|
||||
void sph_echo224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-256 context (pointer to a
|
||||
* <code>sph_echo256_context</code>)
|
||||
*/
|
||||
void sph_echo256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-384 context (pointer to a
|
||||
* <code>sph_echo384_context</code>)
|
||||
*/
|
||||
void sph_echo384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-512 context (pointer to a
|
||||
* <code>sph_echo512_context</code>)
|
||||
*/
|
||||
void sph_echo512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
1210
src/crypto/ghostrider/sph_fugue.c
Normal file
1210
src/crypto/ghostrider/sph_fugue.c
Normal file
File diff suppressed because it is too large
Load diff
89
src/crypto/ghostrider/sph_fugue.h
Normal file
89
src/crypto/ghostrider/sph_fugue.h
Normal file
|
@ -0,0 +1,89 @@
|
|||
#ifndef SPH_FUGUE_H__
|
||||
#define SPH_FUGUE_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#define SPH_SIZE_fugue224 224
|
||||
|
||||
#define SPH_SIZE_fugue256 256
|
||||
|
||||
#define SPH_SIZE_fugue384 384
|
||||
|
||||
#define SPH_SIZE_fugue512 512
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
sph_u32 partial;
|
||||
unsigned partial_len;
|
||||
unsigned round_shift;
|
||||
sph_u32 S[36];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_fugue_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue224_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue256_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue384_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue512_context;
|
||||
|
||||
void sph_fugue224_init(void *cc);
|
||||
|
||||
void sph_fugue224(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue224_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue256_init(void *cc);
|
||||
|
||||
void sph_fugue256(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue256_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue384_init(void *cc);
|
||||
|
||||
void sph_fugue384(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue384_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue512_init(void *cc);
|
||||
|
||||
void sph_fugue512(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue512_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#define sph_fugue512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
sph_fugue512_init( cc ); \
|
||||
sph_fugue512( cc, data, len ); \
|
||||
sph_fugue512_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
3121
src/crypto/ghostrider/sph_groestl.c
Normal file
3121
src/crypto/ghostrider/sph_groestl.c
Normal file
File diff suppressed because it is too large
Load diff
329
src/crypto/ghostrider/sph_groestl.h
Normal file
329
src/crypto/ghostrider/sph_groestl.h
Normal file
|
@ -0,0 +1,329 @@
|
|||
/* $Id: sph_groestl.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Groestl interface. This code implements Groestl with the recommended
|
||||
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_groestl.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_GROESTL_H__
|
||||
#define SPH_GROESTL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-224.
|
||||
*/
|
||||
#define SPH_SIZE_groestl224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-256.
|
||||
*/
|
||||
#define SPH_SIZE_groestl256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-384.
|
||||
*/
|
||||
#define SPH_SIZE_groestl384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-512.
|
||||
*/
|
||||
#define SPH_SIZE_groestl512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-224 and Groestl-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Groestl computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Groestl
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[8];
|
||||
#endif
|
||||
sph_u32 narrow[16];
|
||||
} state;
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_groestl_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-224 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_small_context sph_groestl224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-256 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_small_context sph_groestl256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-384 and Groestl-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Groestl computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Groestl
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[16];
|
||||
#endif
|
||||
sph_u32 narrow[32];
|
||||
} state;
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_groestl_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-384 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_big_context sph_groestl384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-512 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_big_context sph_groestl512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-224 context (pointer to a
|
||||
* <code>sph_groestl224_context</code>)
|
||||
*/
|
||||
void sph_groestl224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-256 context (pointer to a
|
||||
* <code>sph_groestl256_context</code>)
|
||||
*/
|
||||
void sph_groestl256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-384 context (pointer to a
|
||||
* <code>sph_groestl384_context</code>)
|
||||
*/
|
||||
void sph_groestl384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-512 context (pointer to a
|
||||
* <code>sph_groestl512_context</code>)
|
||||
*/
|
||||
void sph_groestl512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
867
src/crypto/ghostrider/sph_hamsi.c
Normal file
867
src/crypto/ghostrider/sph_hamsi.c
Normal file
|
@ -0,0 +1,867 @@
|
|||
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
|
||||
/*
|
||||
* Hamsi implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_hamsi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_SMALL_FOOTPRINT_HAMSI 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
|
||||
* table lookup during message expansion (1 to 8, inclusive). If we note
|
||||
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
|
||||
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
|
||||
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
|
||||
* then we will get t tables (where t=ceil(w/n)) of individual size
|
||||
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
|
||||
* n=5, there are 7 tables, but the last one uses only two bits on
|
||||
* input, not five).
|
||||
*
|
||||
* Also, we read t rows of r words from RAM. Words in a given row are
|
||||
* concatenated in RAM in that order, so most of the cost is about
|
||||
* reading the first row word; comparatively, cache misses are thus
|
||||
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
|
||||
*
|
||||
* When n=1, tables are "special" in that we omit the first entry of
|
||||
* each table (which always contains 0), so that total table size is
|
||||
* halved.
|
||||
*
|
||||
* We thus have the following (size1 is the cumulative table size of
|
||||
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
|
||||
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
|
||||
*
|
||||
* n size1 size2 t1 t2
|
||||
* ---------------------------------------
|
||||
* 1 1024 4096 32 64
|
||||
* 2 2048 8192 16 32
|
||||
* 3 2688 10880 11 22
|
||||
* 4 4096 16384 8 16
|
||||
* 5 6272 25600 7 13
|
||||
* 6 10368 41984 6 11
|
||||
* 7 16896 73856 5 10
|
||||
* 8 32768 131072 4 8
|
||||
*
|
||||
* So there is a trade-off: a lower n makes the tables fit better in
|
||||
* L1 cache, but increases the number of memory accesses. The optimal
|
||||
* value depends on the amount of available L1 cache and the relative
|
||||
* impact of a cache miss.
|
||||
*
|
||||
* Experimentally, in ideal benchmark conditions (which are not necessarily
|
||||
* realistic with regards to L1 cache contention), it seems that n=8 is
|
||||
* the best value on "big" architectures (those with 32 kB or more of L1
|
||||
* cache), while n=4 is better on "small" architectures. This was tested
|
||||
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
|
||||
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
|
||||
* (8 kB L1 cache).
|
||||
*
|
||||
* Note: with n=1, the 32 tables (actually implemented as one big table)
|
||||
* are read entirely and sequentially, regardless of the input data,
|
||||
* thus avoiding any data-dependent table access pattern.
|
||||
*/
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_SMALL
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_HAMSI_EXPAND_SMALL 4
|
||||
#else
|
||||
#define SPH_HAMSI_EXPAND_SMALL 8
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_BIG
|
||||
#define SPH_HAMSI_EXPAND_BIG 8
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#include "sph_hamsi_helper.c"
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
|
||||
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
|
||||
/*
|
||||
* This version is the one used in the Hamsi submission package for
|
||||
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
|
||||
* shall soon be corrected in the official Hamsi specification.
|
||||
*
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
|
||||
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
|
||||
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
|
||||
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
|
||||
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
|
||||
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
|
||||
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
|
||||
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
|
||||
SPH_C32(0x2c204b61)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
|
||||
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
|
||||
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
|
||||
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
|
||||
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
|
||||
SPH_C32(0x6769756d)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_f[] = {
|
||||
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
};
|
||||
|
||||
#define DECL_STATE_SMALL \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
#define READ_STATE_SMALL(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_SMALL(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
} while (0)
|
||||
|
||||
#define s0 m0
|
||||
#define s1 m1
|
||||
#define s2 c0
|
||||
#define s3 c1
|
||||
#define s4 c2
|
||||
#define s5 c3
|
||||
#define s6 m2
|
||||
#define s7 m3
|
||||
#define s8 m4
|
||||
#define s9 m5
|
||||
#define sA c4
|
||||
#define sB c5
|
||||
#define sC c6
|
||||
#define sD c7
|
||||
#define sE m6
|
||||
#define sF m7
|
||||
|
||||
#define SBOX(a, b, c, d) do { \
|
||||
sph_u32 t; \
|
||||
t = (a); \
|
||||
(a) &= (c); \
|
||||
(a) ^= (d); \
|
||||
(c) ^= (b); \
|
||||
(c) ^= (a); \
|
||||
(d) |= t; \
|
||||
(d) ^= (b); \
|
||||
t ^= (c); \
|
||||
(b) = (d); \
|
||||
(d) |= t; \
|
||||
(d) ^= (a); \
|
||||
(a) &= (b); \
|
||||
t ^= (a); \
|
||||
(b) ^= (d); \
|
||||
(b) ^= t; \
|
||||
(a) = (c); \
|
||||
(c) = (b); \
|
||||
(b) = (d); \
|
||||
(d) = SPH_T32(~t); \
|
||||
} while (0)
|
||||
|
||||
#define L(a, b, c, d) do { \
|
||||
(a) = SPH_ROTL32(a, 13); \
|
||||
(c) = SPH_ROTL32(c, 3); \
|
||||
(b) ^= (a) ^ (c); \
|
||||
(d) ^= (c) ^ SPH_T32((a) << 3); \
|
||||
(b) = SPH_ROTL32(b, 1); \
|
||||
(d) = SPH_ROTL32(d, 7); \
|
||||
(a) ^= (b) ^ (d); \
|
||||
(c) ^= (d) ^ SPH_T32((b) << 7); \
|
||||
(a) = SPH_ROTL32(a, 5); \
|
||||
(c) = SPH_ROTL32(c, 22); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_SMALL(rc, alpha) do { \
|
||||
s0 ^= alpha[0x00]; \
|
||||
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s2 ^= alpha[0x02]; \
|
||||
s3 ^= alpha[0x03]; \
|
||||
s4 ^= alpha[0x08]; \
|
||||
s5 ^= alpha[0x09]; \
|
||||
s6 ^= alpha[0x0A]; \
|
||||
s7 ^= alpha[0x0B]; \
|
||||
s8 ^= alpha[0x10]; \
|
||||
s9 ^= alpha[0x11]; \
|
||||
sA ^= alpha[0x12]; \
|
||||
sB ^= alpha[0x13]; \
|
||||
sC ^= alpha[0x18]; \
|
||||
sD ^= alpha[0x19]; \
|
||||
sE ^= alpha[0x1A]; \
|
||||
sF ^= alpha[0x1B]; \
|
||||
SBOX(s0, s4, s8, sC); \
|
||||
SBOX(s1, s5, s9, sD); \
|
||||
SBOX(s2, s6, sA, sE); \
|
||||
SBOX(s3, s7, sB, sF); \
|
||||
L(s0, s5, sA, sF); \
|
||||
L(s1, s6, sB, sC); \
|
||||
L(s2, s7, s8, sD); \
|
||||
L(s3, s4, s9, sE); \
|
||||
} while (0)
|
||||
|
||||
#define P_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_n); \
|
||||
ROUND_SMALL(1, alpha_n); \
|
||||
ROUND_SMALL(2, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_f); \
|
||||
ROUND_SMALL(1, alpha_f); \
|
||||
ROUND_SMALL(2, alpha_f); \
|
||||
ROUND_SMALL(3, alpha_f); \
|
||||
ROUND_SMALL(4, alpha_f); \
|
||||
ROUND_SMALL(5, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#define T_SMALL do { \
|
||||
/* order is important */ \
|
||||
c7 = (sc->h[7] ^= sB); \
|
||||
c6 = (sc->h[6] ^= sA); \
|
||||
c5 = (sc->h[5] ^= s9); \
|
||||
c4 = (sc->h[4] ^= s8); \
|
||||
c3 = (sc->h[3] ^= s3); \
|
||||
c2 = (sc->h[2] ^= s2); \
|
||||
c1 = (sc->h[1] ^= s1); \
|
||||
c0 = (sc->h[0] ^= s0); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_SMALL
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 5;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 5);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 14);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_SMALL(sc);
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
|
||||
INPUT_SMALL;
|
||||
P_SMALL;
|
||||
T_SMALL;
|
||||
buf += 4;
|
||||
}
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
DECL_STATE_SMALL
|
||||
|
||||
READ_STATE_SMALL(sc);
|
||||
INPUT_SMALL;
|
||||
PF_SMALL;
|
||||
T_SMALL;
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
if (sc->partial_len != 0) {
|
||||
size_t mlen;
|
||||
|
||||
mlen = 4 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_small(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_small(sc, data, (len >> 2));
|
||||
data = (const unsigned char *)data + (len & ~(size_t)3);
|
||||
len &= (size_t)3;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_close(sph_hamsi_small_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[12];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
memcpy(pad, sc->partial, ptr);
|
||||
#if SPH_64
|
||||
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad + 4, sc->count_high);
|
||||
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 4)
|
||||
pad[ptr ++] = 0;
|
||||
hamsi_small(sc, pad, 2);
|
||||
hamsi_small_final(sc, pad + 8);
|
||||
out = dst;
|
||||
for (u = 0; u < out_size_w32; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
|
||||
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
|
||||
|
||||
#define READ_STATE_BIG(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
c8 = sc->h[0x8]; \
|
||||
c9 = sc->h[0x9]; \
|
||||
cA = sc->h[0xA]; \
|
||||
cB = sc->h[0xB]; \
|
||||
cC = sc->h[0xC]; \
|
||||
cD = sc->h[0xD]; \
|
||||
cE = sc->h[0xE]; \
|
||||
cF = sc->h[0xF]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_BIG(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
sc->h[0x8] = c8; \
|
||||
sc->h[0x9] = c9; \
|
||||
sc->h[0xA] = cA; \
|
||||
sc->h[0xB] = cB; \
|
||||
sc->h[0xC] = cC; \
|
||||
sc->h[0xD] = cD; \
|
||||
sc->h[0xE] = cE; \
|
||||
sc->h[0xF] = cF; \
|
||||
} while (0)
|
||||
|
||||
#define s00 m0
|
||||
#define s01 m1
|
||||
#define s02 c0
|
||||
#define s03 c1
|
||||
#define s04 m2
|
||||
#define s05 m3
|
||||
#define s06 c2
|
||||
#define s07 c3
|
||||
#define s08 c4
|
||||
#define s09 c5
|
||||
#define s0A m4
|
||||
#define s0B m5
|
||||
#define s0C c6
|
||||
#define s0D c7
|
||||
#define s0E m6
|
||||
#define s0F m7
|
||||
#define s10 m8
|
||||
#define s11 m9
|
||||
#define s12 c8
|
||||
#define s13 c9
|
||||
#define s14 mA
|
||||
#define s15 mB
|
||||
#define s16 cA
|
||||
#define s17 cB
|
||||
#define s18 cC
|
||||
#define s19 cD
|
||||
#define s1A mC
|
||||
#define s1B mD
|
||||
#define s1C cE
|
||||
#define s1D cF
|
||||
#define s1E mE
|
||||
#define s1F mF
|
||||
|
||||
#define ROUND_BIG(rc, alpha) do { \
|
||||
s00 ^= alpha[0x00]; \
|
||||
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s02 ^= alpha[0x02]; \
|
||||
s03 ^= alpha[0x03]; \
|
||||
s04 ^= alpha[0x04]; \
|
||||
s05 ^= alpha[0x05]; \
|
||||
s06 ^= alpha[0x06]; \
|
||||
s07 ^= alpha[0x07]; \
|
||||
s08 ^= alpha[0x08]; \
|
||||
s09 ^= alpha[0x09]; \
|
||||
s0A ^= alpha[0x0A]; \
|
||||
s0B ^= alpha[0x0B]; \
|
||||
s0C ^= alpha[0x0C]; \
|
||||
s0D ^= alpha[0x0D]; \
|
||||
s0E ^= alpha[0x0E]; \
|
||||
s0F ^= alpha[0x0F]; \
|
||||
s10 ^= alpha[0x10]; \
|
||||
s11 ^= alpha[0x11]; \
|
||||
s12 ^= alpha[0x12]; \
|
||||
s13 ^= alpha[0x13]; \
|
||||
s14 ^= alpha[0x14]; \
|
||||
s15 ^= alpha[0x15]; \
|
||||
s16 ^= alpha[0x16]; \
|
||||
s17 ^= alpha[0x17]; \
|
||||
s18 ^= alpha[0x18]; \
|
||||
s19 ^= alpha[0x19]; \
|
||||
s1A ^= alpha[0x1A]; \
|
||||
s1B ^= alpha[0x1B]; \
|
||||
s1C ^= alpha[0x1C]; \
|
||||
s1D ^= alpha[0x1D]; \
|
||||
s1E ^= alpha[0x1E]; \
|
||||
s1F ^= alpha[0x1F]; \
|
||||
SBOX(s00, s08, s10, s18); \
|
||||
SBOX(s01, s09, s11, s19); \
|
||||
SBOX(s02, s0A, s12, s1A); \
|
||||
SBOX(s03, s0B, s13, s1B); \
|
||||
SBOX(s04, s0C, s14, s1C); \
|
||||
SBOX(s05, s0D, s15, s1D); \
|
||||
SBOX(s06, s0E, s16, s1E); \
|
||||
SBOX(s07, s0F, s17, s1F); \
|
||||
L(s00, s09, s12, s1B); \
|
||||
L(s01, s0A, s13, s1C); \
|
||||
L(s02, s0B, s14, s1D); \
|
||||
L(s03, s0C, s15, s1E); \
|
||||
L(s04, s0D, s16, s1F); \
|
||||
L(s05, s0E, s17, s18); \
|
||||
L(s06, s0F, s10, s19); \
|
||||
L(s07, s08, s11, s1A); \
|
||||
L(s00, s02, s05, s07); \
|
||||
L(s10, s13, s15, s16); \
|
||||
L(s09, s0B, s0C, s0E); \
|
||||
L(s19, s1A, s1C, s1F); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
|
||||
#define P_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 6; r ++) \
|
||||
ROUND_BIG(r, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 12; r ++) \
|
||||
ROUND_BIG(r, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define P_BIG do { \
|
||||
ROUND_BIG(0, alpha_n); \
|
||||
ROUND_BIG(1, alpha_n); \
|
||||
ROUND_BIG(2, alpha_n); \
|
||||
ROUND_BIG(3, alpha_n); \
|
||||
ROUND_BIG(4, alpha_n); \
|
||||
ROUND_BIG(5, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
ROUND_BIG(0, alpha_f); \
|
||||
ROUND_BIG(1, alpha_f); \
|
||||
ROUND_BIG(2, alpha_f); \
|
||||
ROUND_BIG(3, alpha_f); \
|
||||
ROUND_BIG(4, alpha_f); \
|
||||
ROUND_BIG(5, alpha_f); \
|
||||
ROUND_BIG(6, alpha_f); \
|
||||
ROUND_BIG(7, alpha_f); \
|
||||
ROUND_BIG(8, alpha_f); \
|
||||
ROUND_BIG(9, alpha_f); \
|
||||
ROUND_BIG(10, alpha_f); \
|
||||
ROUND_BIG(11, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define T_BIG do { \
|
||||
/* order is important */ \
|
||||
cF = (sc->h[0xF] ^= s17); \
|
||||
cE = (sc->h[0xE] ^= s16); \
|
||||
cD = (sc->h[0xD] ^= s15); \
|
||||
cC = (sc->h[0xC] ^= s14); \
|
||||
cB = (sc->h[0xB] ^= s13); \
|
||||
cA = (sc->h[0xA] ^= s12); \
|
||||
c9 = (sc->h[0x9] ^= s11); \
|
||||
c8 = (sc->h[0x8] ^= s10); \
|
||||
c7 = (sc->h[0x7] ^= s07); \
|
||||
c6 = (sc->h[0x6] ^= s06); \
|
||||
c5 = (sc->h[0x5] ^= s05); \
|
||||
c4 = (sc->h[0x4] ^= s04); \
|
||||
c3 = (sc->h[0x3] ^= s03); \
|
||||
c2 = (sc->h[0x2] ^= s02); \
|
||||
c1 = (sc->h[0x1] ^= s01); \
|
||||
c0 = (sc->h[0x0] ^= s00); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_BIG
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 6;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 6);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 13);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_BIG(sc);
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
|
||||
INPUT_BIG;
|
||||
P_BIG;
|
||||
T_BIG;
|
||||
buf += 8;
|
||||
}
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
DECL_STATE_BIG
|
||||
|
||||
READ_STATE_BIG(sc);
|
||||
INPUT_BIG;
|
||||
PF_BIG;
|
||||
T_BIG;
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
if (sc->partial_len != 0) {
|
||||
size_t mlen;
|
||||
|
||||
mlen = 8 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_big(sc, data, (len >> 3));
|
||||
data = (const unsigned char *)data + (len & ~(size_t)7);
|
||||
len &= (size_t)7;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_close(sph_hamsi_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[8];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
#if SPH_64
|
||||
sph_enc64be(pad, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad, sc->count_high);
|
||||
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 8)
|
||||
sc->partial[ptr ++] = 0;
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
hamsi_big_final(sc, pad);
|
||||
out = dst;
|
||||
if (out_size_w32 == 12) {
|
||||
sph_enc32be(out + 0, sc->h[ 0]);
|
||||
sph_enc32be(out + 4, sc->h[ 1]);
|
||||
sph_enc32be(out + 8, sc->h[ 3]);
|
||||
sph_enc32be(out + 12, sc->h[ 4]);
|
||||
sph_enc32be(out + 16, sc->h[ 5]);
|
||||
sph_enc32be(out + 20, sc->h[ 6]);
|
||||
sph_enc32be(out + 24, sc->h[ 8]);
|
||||
sph_enc32be(out + 28, sc->h[ 9]);
|
||||
sph_enc32be(out + 32, sc->h[10]);
|
||||
sph_enc32be(out + 36, sc->h[12]);
|
||||
sph_enc32be(out + 40, sc->h[13]);
|
||||
sph_enc32be(out + 44, sc->h[15]);
|
||||
} else {
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
321
src/crypto/ghostrider/sph_hamsi.h
Normal file
321
src/crypto/ghostrider/sph_hamsi.h
Normal file
|
@ -0,0 +1,321 @@
|
|||
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Hamsi interface. This code implements Hamsi with the recommended
|
||||
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_hamsi.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_HAMSI_H__
|
||||
#define SPH_HAMSI_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-224.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-256.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-384.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-512.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-224 and Hamsi-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Hamsi computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Hamsi
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char partial[4];
|
||||
size_t partial_len;
|
||||
sph_u32 h[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_hamsi_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-224 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_small_context sph_hamsi224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-256 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_small_context sph_hamsi256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-384 and Hamsi-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Hamsi computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Hamsi
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char partial[8];
|
||||
size_t partial_len;
|
||||
sph_u32 h[16];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_hamsi_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-384 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_big_context sph_hamsi384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-512 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_big_context sph_hamsi512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-224 context (pointer to a
|
||||
* <code>sph_hamsi224_context</code>)
|
||||
*/
|
||||
void sph_hamsi224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-256 context (pointer to a
|
||||
* <code>sph_hamsi256_context</code>)
|
||||
*/
|
||||
void sph_hamsi256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-384 context (pointer to a
|
||||
* <code>sph_hamsi384_context</code>)
|
||||
*/
|
||||
void sph_hamsi384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-512 context (pointer to a
|
||||
* <code>sph_hamsi512_context</code>)
|
||||
*/
|
||||
void sph_hamsi512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
39648
src/crypto/ghostrider/sph_hamsi_helper.c
Normal file
39648
src/crypto/ghostrider/sph_hamsi_helper.c
Normal file
File diff suppressed because it is too large
Load diff
1040
src/crypto/ghostrider/sph_jh.c
Normal file
1040
src/crypto/ghostrider/sph_jh.c
Normal file
File diff suppressed because it is too large
Load diff
298
src/crypto/ghostrider/sph_jh.h
Normal file
298
src/crypto/ghostrider/sph_jh.h
Normal file
|
@ -0,0 +1,298 @@
|
|||
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* JH interface. JH is a family of functions which differ by
|
||||
* their output size; this implementation defines JH for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_jh.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_JH_H__
|
||||
#define SPH_JH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-224.
|
||||
*/
|
||||
#define SPH_SIZE_jh224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-256.
|
||||
*/
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-384.
|
||||
*/
|
||||
#define SPH_SIZE_jh384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-512.
|
||||
*/
|
||||
#define SPH_SIZE_jh512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for JH computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a JH computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running JH computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[16];
|
||||
#endif
|
||||
sph_u32 narrow[32];
|
||||
} H;
|
||||
#if SPH_64
|
||||
sph_u64 block_count;
|
||||
#else
|
||||
sph_u32 block_count_high, block_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_jh_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh224_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh256_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh384_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh512_context;
|
||||
|
||||
/**
|
||||
* Initialize a JH-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-224 context (pointer to a
|
||||
* <code>sph_jh224_context</code>)
|
||||
*/
|
||||
void sph_jh224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-256 context (pointer to a
|
||||
* <code>sph_jh256_context</code>)
|
||||
*/
|
||||
void sph_jh256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-384 context (pointer to a
|
||||
* <code>sph_jh384_context</code>)
|
||||
*/
|
||||
void sph_jh384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-512 context (pointer to a
|
||||
* <code>sph_jh512_context</code>)
|
||||
*/
|
||||
void sph_jh512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1868
src/crypto/ghostrider/sph_keccak.c
Normal file
1868
src/crypto/ghostrider/sph_keccak.c
Normal file
File diff suppressed because it is too large
Load diff
296
src/crypto/ghostrider/sph_keccak.h
Normal file
296
src/crypto/ghostrider/sph_keccak.h
Normal file
|
@ -0,0 +1,296 @@
|
|||
/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Keccak interface. This is the interface for Keccak with the
|
||||
* recommended parameters for SHA-3, with output lengths 224, 256,
|
||||
* 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_keccak.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_KECCAK_H__
|
||||
#define SPH_KECCAK_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Taken from keccak-gate.h
|
||||
extern int hard_coded_eb;
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-224.
|
||||
*/
|
||||
#define SPH_SIZE_keccak224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-256.
|
||||
*/
|
||||
#define SPH_SIZE_keccak256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-384.
|
||||
*/
|
||||
#define SPH_SIZE_keccak384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-512.
|
||||
*/
|
||||
#define SPH_SIZE_keccak512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Keccak computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once a
|
||||
* Keccak computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Keccak computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[144]; /* first field, for alignment */
|
||||
size_t ptr, lim;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[25];
|
||||
#endif
|
||||
sph_u32 narrow[50];
|
||||
} u;
|
||||
#endif
|
||||
} sph_keccak_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak224_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak256_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak384_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-224 context (pointer to a
|
||||
* <code>sph_keccak224_context</code>)
|
||||
*/
|
||||
void sph_keccak224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-256 context (pointer to a
|
||||
* <code>sph_keccak256_context</code>)
|
||||
*/
|
||||
void sph_keccak256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-384 context (pointer to a
|
||||
* <code>sph_keccak384_context</code>)
|
||||
*/
|
||||
void sph_keccak384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-512 context (pointer to a
|
||||
* <code>sph_keccak512_context</code>)
|
||||
*/
|
||||
void sph_keccak512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1426
src/crypto/ghostrider/sph_luffa.c
Normal file
1426
src/crypto/ghostrider/sph_luffa.c
Normal file
File diff suppressed because it is too large
Load diff
296
src/crypto/ghostrider/sph_luffa.h
Normal file
296
src/crypto/ghostrider/sph_luffa.h
Normal file
|
@ -0,0 +1,296 @@
|
|||
/* $Id: sph_luffa.h 154 2010-04-26 17:00:24Z tp $ */
|
||||
/**
|
||||
* Luffa interface. Luffa is a family of functions which differ by
|
||||
* their output size; this implementation defines Luffa for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_luffa.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_LUFFA_H__
|
||||
#define SPH_LUFFA_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-224.
|
||||
*/
|
||||
#define SPH_SIZE_luffa224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-256.
|
||||
*/
|
||||
#define SPH_SIZE_luffa256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-384.
|
||||
*/
|
||||
#define SPH_SIZE_luffa384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-512.
|
||||
*/
|
||||
#define SPH_SIZE_luffa512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-224 computations: it contains
|
||||
* the intermediate values and some data from the last entered block.
|
||||
* Once a Luffa computation has been performed, the context can be
|
||||
* reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Luffa
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[3][8];
|
||||
#endif
|
||||
} sph_luffa224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-256 computations. It is
|
||||
* identical to <code>sph_luffa224_context</code>.
|
||||
*/
|
||||
typedef sph_luffa224_context sph_luffa256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-384 computations.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[4][8];
|
||||
#endif
|
||||
} sph_luffa384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-512 computations.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[5][8];
|
||||
#endif
|
||||
} sph_luffa512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-224 context (pointer to a
|
||||
* <code>sph_luffa224_context</code>)
|
||||
*/
|
||||
void sph_luffa224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-256 context (pointer to a
|
||||
* <code>sph_luffa256_context</code>)
|
||||
*/
|
||||
void sph_luffa256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-384 context (pointer to a
|
||||
* <code>sph_luffa384_context</code>)
|
||||
*/
|
||||
void sph_luffa384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-512 context (pointer to a
|
||||
* <code>sph_luffa512_context</code>)
|
||||
*/
|
||||
void sph_luffa512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
793
src/crypto/ghostrider/sph_sha2.c
Normal file
793
src/crypto/ghostrider/sph_sha2.c
Normal file
|
@ -0,0 +1,793 @@
|
|||
/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* SHA-224 / SHA-256 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_sha2.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
|
||||
#define SPH_SMALL_FOOTPRINT_SHA2 1
|
||||
#endif
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
//#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
|
||||
#define MAJ( X, Y, Z ) ( Y ^ ( ( X_xor_Y = X ^ Y ) & ( Y_xor_Z ) ) )
|
||||
#define ROTR SPH_ROTR32
|
||||
|
||||
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
|
||||
#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
|
||||
|
||||
static const sph_u32 H224[8] = {
|
||||
SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
|
||||
SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
|
||||
SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
|
||||
};
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
|
||||
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
/*
|
||||
* The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
|
||||
* compression function implementation. The "in" parameter should
|
||||
* evaluate, when applied to a numerical input parameter from 0 to 15,
|
||||
* to an expression which yields the corresponding input block. The "r"
|
||||
* parameter should evaluate to an array or pointer expression
|
||||
* designating the array of 8 words which contains the input and output
|
||||
* of the compression function.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
static const sph_u32 K[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
|
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
|
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
|
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
|
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
|
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
|
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
|
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
|
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
|
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
|
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
|
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
|
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
|
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
|
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
|
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
|
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
|
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
|
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
|
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
|
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
|
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
|
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
|
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
|
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \
|
||||
W[pc] = in(pc); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_MEXP2(in, pc) do { \
|
||||
W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
|
||||
+ W[((pc) - 7) & 0x0F] \
|
||||
+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \
|
||||
sph_u32 t1, t2; \
|
||||
SHA2_MEXP ## n(in, pc); \
|
||||
t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
|
||||
+ K[pcount + (pc)] + W[(pc) & 0x0F]); \
|
||||
t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
d = SPH_T32(d + t1); \
|
||||
h = SPH_T32(t1 + t2); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
|
||||
#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z; \
|
||||
sph_u32 W[16]; \
|
||||
unsigned pcount; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
pcount = 0; \
|
||||
Y_xor_Z = B ^ C; \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
|
||||
for (pcount = 16; pcount < 64; pcount += 16) { \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
|
||||
} \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#else // large footprint (default)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2, X_xor_Y, Y_xor_Z;; \
|
||||
sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
|
||||
sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
Y_xor_Z = B ^ C; \
|
||||
W00 = in(0); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x428A2F98) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = in(1); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x71374491) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = in(2); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB5C0FBCF) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = in(3); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xE9B5DBA5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = in(4); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x3956C25B) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = in(5); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x59F111F1) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = in(6); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x923F82A4) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = in(7); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xAB1C5ED5) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = in(8); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xD807AA98) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = in(9); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x12835B01) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = in(10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x243185BE) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = in(11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x550C7DC3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = in(12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x72BE5D74) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = in(13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x80DEB1FE) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = in(14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x9BDC06A7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = in(15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC19BF174) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xE49B69C1) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xEFBE4786) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x0FC19DC6) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x240CA1CC) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x2DE92C6F) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4A7484AA) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5CB0A9DC) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x76F988DA) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x983E5152) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA831C66D) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB00327C8) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xBF597FC7) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xC6E00BF3) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD5A79147) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x06CA6351) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x14292967) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x27B70A85) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x2E1B2138) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x4D2C6DFC) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x53380D13) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x650A7354) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x766A0ABB) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x81C2C92E) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x92722C85) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xA2BFE8A1) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA81A664B) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xC24B8B70) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xC76C51A3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xD192E819) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD6990624) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xF40E3585) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x106AA070) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x19A4C116) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x1E376C08) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x2748774C) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x34B0BCB5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x391C0CB3) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4ED8AA4A) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5B9CCA4F) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x682E6FF3) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x748F82EE) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x78A5636F) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x84C87814) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x8CC70208) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x90BEFFFA) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xA4506CEB) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xBEF9A3F7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC67178F2) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#endif // small footprint else
|
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
sha2_round(const unsigned char *data, sph_u32 r[8])
|
||||
{
|
||||
#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x)))
|
||||
SHA2_ROUND_BODY(SHA2_IN, r);
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
memcpy( state_out, state_in, 32 );
|
||||
#define SHA2_IN(x) (data[x])
|
||||
SHA2_ROUND_BODY( SHA2_IN, state_out );
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
memcpy( state_out, state_in, 32 );
|
||||
#define SHA2_IN(x) sph_dec32be_aligned( data+(x) )
|
||||
SHA2_ROUND_BODY( SHA2_IN, state_out );
|
||||
#undef SHA2_IN
|
||||
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_init(void *cc)
|
||||
{
|
||||
sph_sha224_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H224, sizeof H224);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_init(void *cc)
|
||||
{
|
||||
sph_sha256_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H256, sizeof H256);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN sha2_round
|
||||
#define HASH sha224
|
||||
#define BE32 1
|
||||
#include "md_helper.c"
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 7);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 7);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 8);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 8);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
void sph_sha256_full( void *dst, const void *data, size_t len )
|
||||
{
|
||||
sph_sha256_context cc;
|
||||
sph_sha256_init( &cc );
|
||||
sph_sha256( &cc, data, len );
|
||||
sph_sha256_close( &cc, dst );
|
||||
}
|
||||
|
||||
void sha256d(void* hash, const void* data, int len)
|
||||
{
|
||||
sph_sha256_full(hash, data, len);
|
||||
sph_sha256_full(hash, hash, 32);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
//void
|
||||
//sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
//{
|
||||
//#define SHA2_IN(x) msg[x]
|
||||
// SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
//#undef SHA2_IN
|
||||
//}
|
383
src/crypto/ghostrider/sph_sha2.h
Normal file
383
src/crypto/ghostrider/sph_sha2.h
Normal file
|
@ -0,0 +1,383 @@
|
|||
/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* SHA-224, SHA-256, SHA-384 and SHA-512 interface.
|
||||
*
|
||||
* SHA-256 has been published in FIPS 180-2, now amended with a change
|
||||
* notice to include SHA-224 as well (which is a simple variation on
|
||||
* SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS
|
||||
* standards can be found at:
|
||||
* http://csrc.nist.gov/publications/fips/
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_sha2.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHA2_H__
|
||||
#define SPH_SHA2_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-224.
|
||||
*/
|
||||
#define SPH_SIZE_sha224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-256.
|
||||
*/
|
||||
#define SPH_SIZE_sha256 256
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-224 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a SHA-224 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHA-224 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_sha224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-256 computations. It is identical
|
||||
* to the SHA-224 context. However, a context is initialized for SHA-224
|
||||
* <strong>or</strong> SHA-256, but not both (the internal IV is not the
|
||||
* same).
|
||||
*/
|
||||
typedef sph_sha224_context sph_sha256_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-224 context (pointer to
|
||||
* a <code>sph_sha224_context</code>)
|
||||
*/
|
||||
void sph_sha224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-224 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the SHA-224 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks,
|
||||
* as numerical values (hence after the big-endian decoding). The
|
||||
* <code>val</code> parameter contains the 8 32-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 256-bit input and output
|
||||
*/
|
||||
void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
|
||||
/**
|
||||
* Initialize a SHA-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-256 context (pointer to
|
||||
* a <code>sph_sha256_context</code>)
|
||||
*/
|
||||
void sph_sha256_init(void *cc);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Process some data bytes, for SHA-256. This function is identical to
|
||||
* <code>sha_224()</code>
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha256(void *cc, const void *data, size_t len);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha256 sph_sha224
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-256 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Apply the SHA-256 compression function on the provided data. This
|
||||
* function is identical to <code>sha224_comp()</code>.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 256-bit input and output
|
||||
*/
|
||||
void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha256_comp sph_sha224_comp
|
||||
#endif
|
||||
|
||||
void sph_sha256_full( void *dst, const void *data, size_t len );
|
||||
void sha256d(void* hash, const void* data, int len);
|
||||
|
||||
// These shouldn't be called directly, use sha256-hash.h generic functions
|
||||
// sha256_transform_le & sha256_transform_be instead.
|
||||
void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-384.
|
||||
*/
|
||||
#define SPH_SIZE_sha384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-512.
|
||||
*/
|
||||
#define SPH_SIZE_sha512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-384 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a SHA-384 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHA-384 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
sph_u64 val[8];
|
||||
sph_u64 count;
|
||||
#endif
|
||||
} sph_sha384_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-384 context (pointer to
|
||||
* a <code>sph_sha384_context</code>)
|
||||
*/
|
||||
void sph_sha384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-384 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the SHA-384 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 64-bit input blocks,
|
||||
* as numerical values (hence after the big-endian decoding). The
|
||||
* <code>val</code> parameter contains the 8 64-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 512-bit input and output
|
||||
*/
|
||||
void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]);
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-512 computations. It is identical
|
||||
* to the SHA-384 context. However, a context is initialized for SHA-384
|
||||
* <strong>or</strong> SHA-512, but not both (the internal IV is not the
|
||||
* same).
|
||||
*/
|
||||
typedef sph_sha384_context sph_sha512_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-512 context (pointer to
|
||||
* a <code>sph_sha512_context</code>)
|
||||
*/
|
||||
void sph_sha512_init(void *cc);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Process some data bytes, for SHA-512. This function is identical to
|
||||
* <code>sph_sha384()</code>.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha512(void *cc, const void *data, size_t len);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha512 sph_sha384
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-512 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Apply the SHA-512 compression function. This function is identical to
|
||||
* <code>sph_sha384_comp()</code>.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 512-bit input and output
|
||||
*/
|
||||
void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha512_comp sph_sha384_comp
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
808
src/crypto/ghostrider/sph_shabal.c
Normal file
808
src/crypto/ghostrider/sph_shabal.c
Normal file
|
@ -0,0 +1,808 @@
|
|||
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
|
||||
/*
|
||||
* Shabal implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_shabal.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Part of this code was automatically generated (the part between
|
||||
* the "BEGIN" and "END" markers).
|
||||
*/
|
||||
|
||||
#define sM 16
|
||||
|
||||
#define C32 SPH_C32
|
||||
#define T32 SPH_T32
|
||||
|
||||
#define O1 13
|
||||
#define O2 9
|
||||
#define O3 6
|
||||
|
||||
/*
|
||||
* We copy the state into local variables, so that the compiler knows
|
||||
* that it can optimize them at will.
|
||||
*/
|
||||
|
||||
/* BEGIN -- automatically generated code. */
|
||||
|
||||
#define DECL_STATE \
|
||||
sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
|
||||
A08, A09, A0A, A0B; \
|
||||
sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
|
||||
B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||
sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
|
||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
sph_u32 Wlow, Whigh;
|
||||
|
||||
#define READ_STATE(state) do { \
|
||||
A00 = (state)->A[0]; \
|
||||
A01 = (state)->A[1]; \
|
||||
A02 = (state)->A[2]; \
|
||||
A03 = (state)->A[3]; \
|
||||
A04 = (state)->A[4]; \
|
||||
A05 = (state)->A[5]; \
|
||||
A06 = (state)->A[6]; \
|
||||
A07 = (state)->A[7]; \
|
||||
A08 = (state)->A[8]; \
|
||||
A09 = (state)->A[9]; \
|
||||
A0A = (state)->A[10]; \
|
||||
A0B = (state)->A[11]; \
|
||||
B0 = (state)->B[0]; \
|
||||
B1 = (state)->B[1]; \
|
||||
B2 = (state)->B[2]; \
|
||||
B3 = (state)->B[3]; \
|
||||
B4 = (state)->B[4]; \
|
||||
B5 = (state)->B[5]; \
|
||||
B6 = (state)->B[6]; \
|
||||
B7 = (state)->B[7]; \
|
||||
B8 = (state)->B[8]; \
|
||||
B9 = (state)->B[9]; \
|
||||
BA = (state)->B[10]; \
|
||||
BB = (state)->B[11]; \
|
||||
BC = (state)->B[12]; \
|
||||
BD = (state)->B[13]; \
|
||||
BE = (state)->B[14]; \
|
||||
BF = (state)->B[15]; \
|
||||
C0 = (state)->C[0]; \
|
||||
C1 = (state)->C[1]; \
|
||||
C2 = (state)->C[2]; \
|
||||
C3 = (state)->C[3]; \
|
||||
C4 = (state)->C[4]; \
|
||||
C5 = (state)->C[5]; \
|
||||
C6 = (state)->C[6]; \
|
||||
C7 = (state)->C[7]; \
|
||||
C8 = (state)->C[8]; \
|
||||
C9 = (state)->C[9]; \
|
||||
CA = (state)->C[10]; \
|
||||
CB = (state)->C[11]; \
|
||||
CC = (state)->C[12]; \
|
||||
CD = (state)->C[13]; \
|
||||
CE = (state)->C[14]; \
|
||||
CF = (state)->C[15]; \
|
||||
Wlow = (state)->Wlow; \
|
||||
Whigh = (state)->Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(state) do { \
|
||||
(state)->A[0] = A00; \
|
||||
(state)->A[1] = A01; \
|
||||
(state)->A[2] = A02; \
|
||||
(state)->A[3] = A03; \
|
||||
(state)->A[4] = A04; \
|
||||
(state)->A[5] = A05; \
|
||||
(state)->A[6] = A06; \
|
||||
(state)->A[7] = A07; \
|
||||
(state)->A[8] = A08; \
|
||||
(state)->A[9] = A09; \
|
||||
(state)->A[10] = A0A; \
|
||||
(state)->A[11] = A0B; \
|
||||
(state)->B[0] = B0; \
|
||||
(state)->B[1] = B1; \
|
||||
(state)->B[2] = B2; \
|
||||
(state)->B[3] = B3; \
|
||||
(state)->B[4] = B4; \
|
||||
(state)->B[5] = B5; \
|
||||
(state)->B[6] = B6; \
|
||||
(state)->B[7] = B7; \
|
||||
(state)->B[8] = B8; \
|
||||
(state)->B[9] = B9; \
|
||||
(state)->B[10] = BA; \
|
||||
(state)->B[11] = BB; \
|
||||
(state)->B[12] = BC; \
|
||||
(state)->B[13] = BD; \
|
||||
(state)->B[14] = BE; \
|
||||
(state)->B[15] = BF; \
|
||||
(state)->C[0] = C0; \
|
||||
(state)->C[1] = C1; \
|
||||
(state)->C[2] = C2; \
|
||||
(state)->C[3] = C3; \
|
||||
(state)->C[4] = C4; \
|
||||
(state)->C[5] = C5; \
|
||||
(state)->C[6] = C6; \
|
||||
(state)->C[7] = C7; \
|
||||
(state)->C[8] = C8; \
|
||||
(state)->C[9] = C9; \
|
||||
(state)->C[10] = CA; \
|
||||
(state)->C[11] = CB; \
|
||||
(state)->C[12] = CC; \
|
||||
(state)->C[13] = CD; \
|
||||
(state)->C[14] = CE; \
|
||||
(state)->C[15] = CF; \
|
||||
(state)->Wlow = Wlow; \
|
||||
(state)->Whigh = Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define DECODE_BLOCK do { \
|
||||
M0 = sph_dec32le_aligned(buf + 0); \
|
||||
M1 = sph_dec32le_aligned(buf + 4); \
|
||||
M2 = sph_dec32le_aligned(buf + 8); \
|
||||
M3 = sph_dec32le_aligned(buf + 12); \
|
||||
M4 = sph_dec32le_aligned(buf + 16); \
|
||||
M5 = sph_dec32le_aligned(buf + 20); \
|
||||
M6 = sph_dec32le_aligned(buf + 24); \
|
||||
M7 = sph_dec32le_aligned(buf + 28); \
|
||||
M8 = sph_dec32le_aligned(buf + 32); \
|
||||
M9 = sph_dec32le_aligned(buf + 36); \
|
||||
MA = sph_dec32le_aligned(buf + 40); \
|
||||
MB = sph_dec32le_aligned(buf + 44); \
|
||||
MC = sph_dec32le_aligned(buf + 48); \
|
||||
MD = sph_dec32le_aligned(buf + 52); \
|
||||
ME = sph_dec32le_aligned(buf + 56); \
|
||||
MF = sph_dec32le_aligned(buf + 60); \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BLOCK_ADD do { \
|
||||
B0 = T32(B0 + M0); \
|
||||
B1 = T32(B1 + M1); \
|
||||
B2 = T32(B2 + M2); \
|
||||
B3 = T32(B3 + M3); \
|
||||
B4 = T32(B4 + M4); \
|
||||
B5 = T32(B5 + M5); \
|
||||
B6 = T32(B6 + M6); \
|
||||
B7 = T32(B7 + M7); \
|
||||
B8 = T32(B8 + M8); \
|
||||
B9 = T32(B9 + M9); \
|
||||
BA = T32(BA + MA); \
|
||||
BB = T32(BB + MB); \
|
||||
BC = T32(BC + MC); \
|
||||
BD = T32(BD + MD); \
|
||||
BE = T32(BE + ME); \
|
||||
BF = T32(BF + MF); \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BLOCK_SUB do { \
|
||||
C0 = T32(C0 - M0); \
|
||||
C1 = T32(C1 - M1); \
|
||||
C2 = T32(C2 - M2); \
|
||||
C3 = T32(C3 - M3); \
|
||||
C4 = T32(C4 - M4); \
|
||||
C5 = T32(C5 - M5); \
|
||||
C6 = T32(C6 - M6); \
|
||||
C7 = T32(C7 - M7); \
|
||||
C8 = T32(C8 - M8); \
|
||||
C9 = T32(C9 - M9); \
|
||||
CA = T32(CA - MA); \
|
||||
CB = T32(CB - MB); \
|
||||
CC = T32(CC - MC); \
|
||||
CD = T32(CD - MD); \
|
||||
CE = T32(CE - ME); \
|
||||
CF = T32(CF - MF); \
|
||||
} while (0)
|
||||
|
||||
#define XOR_W do { \
|
||||
A00 ^= Wlow; \
|
||||
A01 ^= Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP(v1, v2) do { \
|
||||
sph_u32 tmp = (v1); \
|
||||
(v1) = (v2); \
|
||||
(v2) = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP_BC do { \
|
||||
SWAP(B0, C0); \
|
||||
SWAP(B1, C1); \
|
||||
SWAP(B2, C2); \
|
||||
SWAP(B3, C3); \
|
||||
SWAP(B4, C4); \
|
||||
SWAP(B5, C5); \
|
||||
SWAP(B6, C6); \
|
||||
SWAP(B7, C7); \
|
||||
SWAP(B8, C8); \
|
||||
SWAP(B9, C9); \
|
||||
SWAP(BA, CA); \
|
||||
SWAP(BB, CB); \
|
||||
SWAP(BC, CC); \
|
||||
SWAP(BD, CD); \
|
||||
SWAP(BE, CE); \
|
||||
SWAP(BF, CF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
|
||||
xa0 = T32((xa0 \
|
||||
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
|
||||
^ xc) * 3U) \
|
||||
^ xb1 ^ (xb2 & ~xb3) ^ xm; \
|
||||
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_0 do { \
|
||||
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_1 do { \
|
||||
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_2 do { \
|
||||
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define APPLY_P do { \
|
||||
B0 = T32(B0 << 17) | (B0 >> 15); \
|
||||
B1 = T32(B1 << 17) | (B1 >> 15); \
|
||||
B2 = T32(B2 << 17) | (B2 >> 15); \
|
||||
B3 = T32(B3 << 17) | (B3 >> 15); \
|
||||
B4 = T32(B4 << 17) | (B4 >> 15); \
|
||||
B5 = T32(B5 << 17) | (B5 >> 15); \
|
||||
B6 = T32(B6 << 17) | (B6 >> 15); \
|
||||
B7 = T32(B7 << 17) | (B7 >> 15); \
|
||||
B8 = T32(B8 << 17) | (B8 >> 15); \
|
||||
B9 = T32(B9 << 17) | (B9 >> 15); \
|
||||
BA = T32(BA << 17) | (BA >> 15); \
|
||||
BB = T32(BB << 17) | (BB >> 15); \
|
||||
BC = T32(BC << 17) | (BC >> 15); \
|
||||
BD = T32(BD << 17) | (BD >> 15); \
|
||||
BE = T32(BE << 17) | (BE >> 15); \
|
||||
BF = T32(BF << 17) | (BF >> 15); \
|
||||
PERM_STEP_0; \
|
||||
PERM_STEP_1; \
|
||||
PERM_STEP_2; \
|
||||
A0B = T32(A0B + C6); \
|
||||
A0A = T32(A0A + C5); \
|
||||
A09 = T32(A09 + C4); \
|
||||
A08 = T32(A08 + C3); \
|
||||
A07 = T32(A07 + C2); \
|
||||
A06 = T32(A06 + C1); \
|
||||
A05 = T32(A05 + C0); \
|
||||
A04 = T32(A04 + CF); \
|
||||
A03 = T32(A03 + CE); \
|
||||
A02 = T32(A02 + CD); \
|
||||
A01 = T32(A01 + CC); \
|
||||
A00 = T32(A00 + CB); \
|
||||
A0B = T32(A0B + CA); \
|
||||
A0A = T32(A0A + C9); \
|
||||
A09 = T32(A09 + C8); \
|
||||
A08 = T32(A08 + C7); \
|
||||
A07 = T32(A07 + C6); \
|
||||
A06 = T32(A06 + C5); \
|
||||
A05 = T32(A05 + C4); \
|
||||
A04 = T32(A04 + C3); \
|
||||
A03 = T32(A03 + C2); \
|
||||
A02 = T32(A02 + C1); \
|
||||
A01 = T32(A01 + C0); \
|
||||
A00 = T32(A00 + CF); \
|
||||
A0B = T32(A0B + CE); \
|
||||
A0A = T32(A0A + CD); \
|
||||
A09 = T32(A09 + CC); \
|
||||
A08 = T32(A08 + CB); \
|
||||
A07 = T32(A07 + CA); \
|
||||
A06 = T32(A06 + C9); \
|
||||
A05 = T32(A05 + C8); \
|
||||
A04 = T32(A04 + C7); \
|
||||
A03 = T32(A03 + C6); \
|
||||
A02 = T32(A02 + C5); \
|
||||
A01 = T32(A01 + C4); \
|
||||
A00 = T32(A00 + C3); \
|
||||
} while (0)
|
||||
|
||||
#define INCR_W do { \
|
||||
if ((Wlow = T32(Wlow + 1)) == 0) \
|
||||
Whigh = T32(Whigh + 1); \
|
||||
} while (0)
|
||||
|
||||
static const sph_u32 A_init_192[] = {
|
||||
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
|
||||
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
|
||||
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_192[] = {
|
||||
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
|
||||
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
|
||||
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
|
||||
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_192[] = {
|
||||
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
|
||||
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
|
||||
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
|
||||
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_224[] = {
|
||||
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
|
||||
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
|
||||
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_224[] = {
|
||||
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
|
||||
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
|
||||
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
|
||||
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_224[] = {
|
||||
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
|
||||
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
|
||||
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
|
||||
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_256[] = {
|
||||
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
|
||||
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
|
||||
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_256[] = {
|
||||
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
|
||||
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
|
||||
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
|
||||
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_256[] = {
|
||||
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
|
||||
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
|
||||
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
|
||||
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_384[] = {
|
||||
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
|
||||
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
|
||||
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_384[] = {
|
||||
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
|
||||
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
|
||||
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
|
||||
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_384[] = {
|
||||
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
|
||||
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
|
||||
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
|
||||
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_512[] = {
|
||||
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
|
||||
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
|
||||
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_512[] = {
|
||||
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
|
||||
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
|
||||
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
|
||||
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_512[] = {
|
||||
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
|
||||
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
|
||||
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
|
||||
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
|
||||
};
|
||||
|
||||
/* END -- automatically generated code. */
|
||||
|
||||
static void
|
||||
shabal_init(void *cc, unsigned size)
|
||||
{
|
||||
/*
|
||||
* We have precomputed initial states for all the supported
|
||||
* output bit lengths.
|
||||
*/
|
||||
const sph_u32 *A_init, *B_init, *C_init;
|
||||
sph_shabal_context *sc;
|
||||
|
||||
switch (size) {
|
||||
case 192:
|
||||
A_init = A_init_192;
|
||||
B_init = B_init_192;
|
||||
C_init = C_init_192;
|
||||
break;
|
||||
case 224:
|
||||
A_init = A_init_224;
|
||||
B_init = B_init_224;
|
||||
C_init = C_init_224;
|
||||
break;
|
||||
case 256:
|
||||
A_init = A_init_256;
|
||||
B_init = B_init_256;
|
||||
C_init = C_init_256;
|
||||
break;
|
||||
case 384:
|
||||
A_init = A_init_384;
|
||||
B_init = B_init_384;
|
||||
C_init = C_init_384;
|
||||
break;
|
||||
case 512:
|
||||
A_init = A_init_512;
|
||||
B_init = B_init_512;
|
||||
C_init = C_init_512;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
memcpy(sc->A, A_init, sizeof sc->A);
|
||||
memcpy(sc->B, B_init, sizeof sc->B);
|
||||
memcpy(sc->C, C_init, sizeof sc->C);
|
||||
sc->Wlow = 1;
|
||||
sc->Whigh = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_core(void *cc, const unsigned char *data, size_t len)
|
||||
{
|
||||
sph_shabal_context *sc;
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
sc = cc;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
/*
|
||||
* We do not want to copy the state to local variables if the
|
||||
* amount of data is less than what is needed to complete the
|
||||
* current block. Note that it is anyway suboptimal to call
|
||||
* this method many times for small chunks of data.
|
||||
*/
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data += clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
DECODE_BLOCK;
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
INPUT_BLOCK_SUB;
|
||||
SWAP_BC;
|
||||
INCR_W;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
|
||||
{
|
||||
sph_shabal_context *sc;
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
int i;
|
||||
unsigned z;
|
||||
union {
|
||||
unsigned char tmp_out[64];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
size_t out_len;
|
||||
DECL_STATE
|
||||
|
||||
sc = cc;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr] = ((ub & -z) | z) & 0xFF;
|
||||
memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
|
||||
READ_STATE(sc);
|
||||
DECODE_BLOCK;
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
for (i = 0; i < 3; i ++) {
|
||||
SWAP_BC;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
}
|
||||
|
||||
/*
|
||||
* We just use our local variables; no need to go through
|
||||
* the state structure. In order to share some code, we
|
||||
* emit the relevant words into a temporary buffer, which
|
||||
* we finally copy into the destination array.
|
||||
*/
|
||||
switch (size_words) {
|
||||
case 16:
|
||||
sph_enc32le_aligned(u.tmp_out + 0, B0);
|
||||
sph_enc32le_aligned(u.tmp_out + 4, B1);
|
||||
sph_enc32le_aligned(u.tmp_out + 8, B2);
|
||||
sph_enc32le_aligned(u.tmp_out + 12, B3);
|
||||
/* fall through */
|
||||
case 12:
|
||||
sph_enc32le_aligned(u.tmp_out + 16, B4);
|
||||
sph_enc32le_aligned(u.tmp_out + 20, B5);
|
||||
sph_enc32le_aligned(u.tmp_out + 24, B6);
|
||||
sph_enc32le_aligned(u.tmp_out + 28, B7);
|
||||
/* fall through */
|
||||
case 8:
|
||||
sph_enc32le_aligned(u.tmp_out + 32, B8);
|
||||
/* fall through */
|
||||
case 7:
|
||||
sph_enc32le_aligned(u.tmp_out + 36, B9);
|
||||
/* fall through */
|
||||
case 6:
|
||||
sph_enc32le_aligned(u.tmp_out + 40, BA);
|
||||
sph_enc32le_aligned(u.tmp_out + 44, BB);
|
||||
sph_enc32le_aligned(u.tmp_out + 48, BC);
|
||||
sph_enc32le_aligned(u.tmp_out + 52, BD);
|
||||
sph_enc32le_aligned(u.tmp_out + 56, BE);
|
||||
sph_enc32le_aligned(u.tmp_out + 60, BF);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
out_len = size_words << 2;
|
||||
memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
|
||||
// shabal_init(sc, size_words << 5);
|
||||
}
|
||||
#if 0
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 192);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 6);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 6);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 224);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 7);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 7);
|
||||
}
|
||||
|
||||
#endif
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 256);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 384);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 12);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 12);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 512);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 16);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
344
src/crypto/ghostrider/sph_shabal.h
Normal file
344
src/crypto/ghostrider/sph_shabal.h
Normal file
|
@ -0,0 +1,344 @@
|
|||
/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
|
||||
/**
|
||||
* Shabal interface. Shabal is a family of functions which differ by
|
||||
* their output size; this implementation defines Shabal for output
|
||||
* sizes 192, 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_shabal.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHABAL_H__
|
||||
#define SPH_SHABAL_H__
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-192.
|
||||
*/
|
||||
#define SPH_SIZE_shabal192 192
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-224.
|
||||
*/
|
||||
#define SPH_SIZE_shabal224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-256.
|
||||
*/
|
||||
#define SPH_SIZE_shabal256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-384.
|
||||
*/
|
||||
#define SPH_SIZE_shabal384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-512.
|
||||
*/
|
||||
#define SPH_SIZE_shabal512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Shabal computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a Shabal computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Shabal computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 A[12], B[16], C[16];
|
||||
sph_u32 Whigh, Wlow;
|
||||
#endif
|
||||
} sph_shabal_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-192 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal192_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal224_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal256_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal384_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-192 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-192 context (pointer to a
|
||||
* <code>sph_shabal192_context</code>)
|
||||
*/
|
||||
void sph_shabal192_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal192(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-192 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (24 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal192_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (24 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-224 context (pointer to a
|
||||
* <code>sph_shabal224_context</code>)
|
||||
*/
|
||||
void sph_shabal224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-256 context (pointer to a
|
||||
* <code>sph_shabal256_context</code>)
|
||||
*/
|
||||
void sph_shabal256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-384 context (pointer to a
|
||||
* <code>sph_shabal384_context</code>)
|
||||
*/
|
||||
void sph_shabal384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-512 context (pointer to a
|
||||
* <code>sph_shabal512_context</code>)
|
||||
*/
|
||||
void sph_shabal512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1764
src/crypto/ghostrider/sph_shavite.c
Normal file
1764
src/crypto/ghostrider/sph_shavite.c
Normal file
File diff suppressed because it is too large
Load diff
314
src/crypto/ghostrider/sph_shavite.h
Normal file
314
src/crypto/ghostrider/sph_shavite.h
Normal file
|
@ -0,0 +1,314 @@
|
|||
/* $Id: sph_shavite.h 208 2010-06-02 20:33:00Z tp $ */
|
||||
/**
|
||||
* SHAvite-3 interface. This code implements SHAvite-3 with the
|
||||
* recommended parameters for SHA-3, with outputs of 224, 256, 384 and
|
||||
* 512 bits. In the following, we call the function "SHAvite" (without
|
||||
* the "-3" suffix), thus "SHAvite-224" is "SHAvite-3 with a 224-bit
|
||||
* output".
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_shavite.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHAVITE_H__
|
||||
#define SPH_SHAVITE_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-224.
|
||||
*/
|
||||
#define SPH_SIZE_shavite224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-256.
|
||||
*/
|
||||
#define SPH_SIZE_shavite256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-384.
|
||||
*/
|
||||
#define SPH_SIZE_shavite384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-512.
|
||||
*/
|
||||
#define SPH_SIZE_shavite512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 and SHAvite-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a SHAvite computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHAvite
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 h[8];
|
||||
sph_u32 count0, count1;
|
||||
#endif
|
||||
} sph_shavite_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_small_context sph_shavite224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-256 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_small_context sph_shavite256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 and SHAvite-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a SHAvite computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHAvite
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 h[16];
|
||||
sph_u32 count0, count1, count2, count3;
|
||||
#endif
|
||||
} sph_shavite_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_big_context sph_shavite384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-512 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_big_context sph_shavite512_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-224 context (pointer to a
|
||||
* <code>sph_shavite224_context</code>)
|
||||
*/
|
||||
void sph_shavite224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-256 context (pointer to a
|
||||
* <code>sph_shavite256_context</code>)
|
||||
*/
|
||||
void sph_shavite256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-384 context (pointer to a
|
||||
* <code>sph_shavite384_context</code>)
|
||||
*/
|
||||
void sph_shavite384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-512 context (pointer to a
|
||||
* <code>sph_shavite512_context</code>)
|
||||
*/
|
||||
void sph_shavite512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1799
src/crypto/ghostrider/sph_simd.c
Normal file
1799
src/crypto/ghostrider/sph_simd.c
Normal file
File diff suppressed because it is too large
Load diff
309
src/crypto/ghostrider/sph_simd.h
Normal file
309
src/crypto/ghostrider/sph_simd.h
Normal file
|
@ -0,0 +1,309 @@
|
|||
/* $Id: sph_simd.h 154 2010-04-26 17:00:24Z tp $ */
|
||||
/**
|
||||
* SIMD interface. SIMD is a family of functions which differ by
|
||||
* their output size; this implementation defines SIMD for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_simd.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SIMD_H__
|
||||
#define SPH_SIMD_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-224.
|
||||
*/
|
||||
#define SPH_SIZE_simd224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-256.
|
||||
*/
|
||||
#define SPH_SIZE_simd256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-384.
|
||||
*/
|
||||
#define SPH_SIZE_simd384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-512.
|
||||
*/
|
||||
#define SPH_SIZE_simd512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an SIMD computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for SIMD-224
|
||||
* and SIMD-256.
|
||||
*
|
||||
* The contents of this structure are private. A running SIMD computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[16];
|
||||
sph_u32 count_low, count_high;
|
||||
#endif
|
||||
} sph_simd_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an SIMD computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for SIMD-384
|
||||
* and SIMD-512.
|
||||
*
|
||||
* The contents of this structure are private. A running SIMD computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[32];
|
||||
sph_u32 count_low, count_high;
|
||||
#endif
|
||||
} sph_simd_big_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_simd_small_context sph_simd224_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_simd_small_context sph_simd256_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_simd_big_context sph_simd384_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_simd_big_context sph_simd512_context;
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-224 context (pointer to a
|
||||
* <code>sph_simd224_context</code>)
|
||||
*/
|
||||
void sph_simd224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-256 context (pointer to a
|
||||
* <code>sph_simd256_context</code>)
|
||||
*/
|
||||
void sph_simd256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-384 context (pointer to a
|
||||
* <code>sph_simd384_context</code>)
|
||||
*/
|
||||
void sph_simd384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-512 context (pointer to a
|
||||
* <code>sph_simd512_context</code>)
|
||||
*/
|
||||
void sph_simd512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1254
src/crypto/ghostrider/sph_skein.c
Normal file
1254
src/crypto/ghostrider/sph_skein.c
Normal file
File diff suppressed because it is too large
Load diff
298
src/crypto/ghostrider/sph_skein.h
Normal file
298
src/crypto/ghostrider/sph_skein.h
Normal file
|
@ -0,0 +1,298 @@
|
|||
/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
|
||||
/**
|
||||
* Skein interface. The Skein specification defines three main
|
||||
* functions, called Skein-256, Skein-512 and Skein-1024, which can be
|
||||
* further parameterized with an output length. For the SHA-3
|
||||
* competition, Skein-512 is used for output sizes of 224, 256, 384 and
|
||||
* 512 bits; this is what this code implements. Thus, we hereafter call
|
||||
* Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
|
||||
* specification defines as Skein-512-224, Skein-512-256, Skein-512-384
|
||||
* and Skein-512-512, respectively.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_skein.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SKEIN_H__
|
||||
#define SPH_SKEIN_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-224.
|
||||
*/
|
||||
#define SPH_SIZE_skein224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-256.
|
||||
*/
|
||||
#define SPH_SIZE_skein256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-384.
|
||||
*/
|
||||
#define SPH_SIZE_skein384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-512.
|
||||
*/
|
||||
#define SPH_SIZE_skein512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Skein computations (with a 384- or
|
||||
* 512-bit output): it contains the intermediate values and some data
|
||||
* from the last entered block. Once a Skein computation has been
|
||||
* performed, the context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Skein computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
sph_u64 bcount;
|
||||
#endif
|
||||
} sph_skein_big_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-224 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein224_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-256 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein256_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein384_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Skein-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-224 context (pointer to a
|
||||
* <code>sph_skein224_context</code>)
|
||||
*/
|
||||
void sph_skein224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-256 context (pointer to a
|
||||
* <code>sph_skein256_context</code>)
|
||||
*/
|
||||
void sph_skein256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-384 context (pointer to a
|
||||
* <code>sph_skein384_context</code>)
|
||||
*/
|
||||
void sph_skein384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-512 context (pointer to a
|
||||
* <code>sph_skein512_context</code>)
|
||||
*/
|
||||
void sph_skein512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1976
src/crypto/ghostrider/sph_types.h
Normal file
1976
src/crypto/ghostrider/sph_types.h
Normal file
File diff suppressed because it is too large
Load diff
3481
src/crypto/ghostrider/sph_whirlpool.c
Normal file
3481
src/crypto/ghostrider/sph_whirlpool.c
Normal file
File diff suppressed because it is too large
Load diff
224
src/crypto/ghostrider/sph_whirlpool.h
Normal file
224
src/crypto/ghostrider/sph_whirlpool.h
Normal file
|
@ -0,0 +1,224 @@
|
|||
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* WHIRLPOOL interface.
|
||||
*
|
||||
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
|
||||
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
|
||||
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
|
||||
* version, 2003, with a new diffusion matrix, also described as "plain
|
||||
* WHIRLPOOL"). All three variants are implemented here.
|
||||
*
|
||||
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
|
||||
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
|
||||
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
|
||||
*
|
||||
* The current WHIRLPOOL specification and a reference implementation
|
||||
* can be found on the WHIRLPOOL web page:
|
||||
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_whirlpool.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_WHIRLPOOL_H__
|
||||
#define SPH_WHIRLPOOL_H__
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-0.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool0 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-1.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool1 512
|
||||
|
||||
/**
|
||||
* This structure is a context for WHIRLPOOL computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a WHIRLPOOL computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running WHIRLPOOL computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u64 state[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_whirlpool_context;
|
||||
|
||||
/**
|
||||
* Initialize a WHIRLPOOL context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool_context</code>)
|
||||
*/
|
||||
void sph_whirlpool_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* plain WHIRLPOOL algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool_close(void *cc, void *dst);
|
||||
|
||||
#define sph_whirlpool512_full(cc, dst, data, len) \
|
||||
do { \
|
||||
sph_whirlpool_init(cc); \
|
||||
sph_whirlpool(cc, data, len); \
|
||||
sph_whirlpool_close(cc, dst); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool0_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-0 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool0_context</code>)
|
||||
*/
|
||||
void sph_whirlpool0_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool0_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-0 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool0(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-0 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-0 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool0_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool1_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-1 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool1_context</code>)
|
||||
*/
|
||||
void sph_whirlpool1_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool1_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-1 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool1(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-1 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-1 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool1_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -91,6 +91,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
|||
if ((f != Algorithm::RANDOM_X)
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
&& (f != Algorithm::CN_HEAVY)
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
&& (f != Algorithm::GHOSTRIDER)
|
||||
# endif
|
||||
) {
|
||||
# ifdef XMRIG_FEATURE_MSR
|
||||
|
@ -112,6 +115,12 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (f == Algorithm::GHOSTRIDER) {
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
|
||||
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
|
||||
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
|
||||
|
|
|
@ -282,8 +282,13 @@ void xmrig::Network::setJob(IClient *client, const Job &job, bool donate)
|
|||
snprintf(tx_buf, sizeof(tx_buf), " (%u tx)", num_transactions);
|
||||
}
|
||||
|
||||
LOG_INFO("%s " MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d%s") " diff " WHITE_BOLD("%" PRIu64 "%s") " algo " WHITE_BOLD("%s") " height " WHITE_BOLD("%" PRIu64) "%s",
|
||||
Tags::network(), client->pool().host().data(), client->pool().port(), zmq_buf, diff, scale, job.algorithm().name(), job.height(), tx_buf);
|
||||
char height_buf[64] = {};
|
||||
if (job.height() > 0) {
|
||||
snprintf(height_buf, sizeof(height_buf), " height " WHITE_BOLD("%" PRIu64), job.height());
|
||||
}
|
||||
|
||||
LOG_INFO("%s " MAGENTA_BOLD("new job") " from " WHITE_BOLD("%s:%d%s") " diff " WHITE_BOLD("%" PRIu64 "%s") " algo " WHITE_BOLD("%s") "%s%s",
|
||||
Tags::network(), client->pool().host().data(), client->pool().port(), zmq_buf, diff, scale, job.algorithm().name(), height_buf, tx_buf);
|
||||
}
|
||||
|
||||
if (!donate && m_donate) {
|
||||
|
|
Loading…
Reference in a new issue