diff --git a/CMakeLists.txt b/CMakeLists.txt index 152883198..b3a4b5dc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,14 +1,15 @@ cmake_minimum_required(VERSION 2.8) project(xmrig) -option(WITH_LIBCPUID "Use Libcpuid" ON) -option(WITH_HWLOC "Use hwloc" ON) -option(WITH_CN_LITE "CryptoNight-Lite support" ON) -option(WITH_CN_HEAVY "CryptoNight-Heavy support" ON) -option(WITH_CN_PICO "CryptoNight-Pico support" ON) -option(WITH_CN_GPU "CryptoNight-GPU support" ON) -option(WITH_RANDOMX "RandomX support" ON) -option(WITH_HTTP "HTTP protocol support (client/server)" ON) +option(WITH_LIBCPUID "Enable libcpuid support" ON) +option(WITH_HWLOC "Enable hwloc support" ON) +option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON) +option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON) +option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON) +option(WITH_CN_GPU "Enable CryptoNight-GPU algorithm" ON) +option(WITH_RANDOMX "Enable RandomX algorithms family" ON) +option(WITH_ARGON2 "Enable Argon2 algorithms family" ON) +option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_ASM "Enable ASM PoW implementations" ON) @@ -151,66 +152,9 @@ add_definitions(/DUNICODE) find_package(UV REQUIRED) -if (WITH_RANDOMX) - include_directories(src/crypto/randomx) - add_definitions(/DXMRIG_ALGO_RANDOMX) - set(SOURCES_CRYPTO - "${SOURCES_CRYPTO}" - src/crypto/randomx/aes_hash.cpp - src/crypto/randomx/allocator.cpp - src/crypto/randomx/argon2_core.c - src/crypto/randomx/argon2_ref.c - src/crypto/randomx/blake2_generator.cpp - src/crypto/randomx/blake2/blake2b.c - src/crypto/randomx/bytecode_machine.cpp - src/crypto/randomx/dataset.cpp - src/crypto/randomx/instructions_portable.cpp - src/crypto/randomx/randomx.cpp - src/crypto/randomx/reciprocal.c - src/crypto/randomx/soft_aes.cpp - src/crypto/randomx/superscalar.cpp - src/crypto/randomx/virtual_machine.cpp - src/crypto/randomx/virtual_memory.cpp - src/crypto/randomx/vm_compiled_light.cpp - src/crypto/randomx/vm_compiled.cpp - src/crypto/randomx/vm_interpreted_light.cpp - src/crypto/randomx/vm_interpreted.cpp - src/crypto/rx/Rx.cpp - src/crypto/rx/Rx.h - src/crypto/rx/RxAlgo.cpp - src/crypto/rx/RxAlgo.h - src/crypto/rx/RxCache.cpp - src/crypto/rx/RxCache.h - src/crypto/rx/RxConfig.cpp - src/crypto/rx/RxConfig.h - src/crypto/rx/RxDataset.cpp - src/crypto/rx/RxDataset.h - src/crypto/rx/RxVm.cpp - src/crypto/rx/RxVm.h - ) - if (NOT ARCH_ID) - set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR}) - endif() - if (CMAKE_C_COMPILER_ID MATCHES MSVC) - enable_language(ASM_MASM) - list(APPEND SOURCES_CRYPTO - src/crypto/randomx/jit_compiler_x86_static.asm - src/crypto/randomx/jit_compiler_x86.cpp - ) - elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) - list(APPEND SOURCES_CRYPTO - src/crypto/randomx/jit_compiler_x86_static.S - src/crypto/randomx/jit_compiler_x86.cpp - ) - # cheat because cmake and ccache hate each other - set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C) - endif() -else() - remove_definitions(/DXMRIG_ALGO_RANDOMX) -endif() - include(cmake/flags.cmake) - +include(cmake/randomx.cmake) +include(cmake/argon2.cmake) include(cmake/OpenSSL.cmake) include(cmake/asm.cmake) include(cmake/cn-gpu.cmake) @@ -244,4 +188,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY}) diff --git a/cmake/argon2.cmake b/cmake/argon2.cmake new file mode 100644 index 000000000..29ac9094f --- /dev/null +++ b/cmake/argon2.cmake @@ -0,0 +1,13 @@ +if (WITH_ARGON2) + add_definitions(/DXMRIG_ALGO_ARGON2) + + list(APPEND HEADERS_CRYPTO + src/crypto/argon2/Argon2.h + ) + + add_subdirectory(src/3rdparty/argon2) + set(ARGON2_LIBRARY argon2) +else() + remove_definitions(/DXMRIG_ALGO_ARGON2) + set(ARGON2_LIBRARY "") +endif() diff --git a/cmake/randomx.cmake b/cmake/randomx.cmake new file mode 100644 index 000000000..49f9f498a --- /dev/null +++ b/cmake/randomx.cmake @@ -0,0 +1,58 @@ +if (WITH_RANDOMX) + include_directories(src/crypto/randomx) + add_definitions(/DXMRIG_ALGO_RANDOMX) + + list(APPEND HEADERS_CRYPTO + src/crypto/rx/Rx.h + src/crypto/rx/RxAlgo.h + src/crypto/rx/RxCache.h + src/crypto/rx/RxConfig.h + src/crypto/rx/RxDataset.h + src/crypto/rx/RxVm.h + ) + + list(APPEND SOURCES_CRYPTO + src/crypto/randomx/aes_hash.cpp + src/crypto/randomx/allocator.cpp + src/crypto/randomx/argon2_core.c + src/crypto/randomx/argon2_ref.c + src/crypto/randomx/blake2_generator.cpp + src/crypto/randomx/blake2/blake2b.c + src/crypto/randomx/bytecode_machine.cpp + src/crypto/randomx/dataset.cpp + src/crypto/randomx/instructions_portable.cpp + src/crypto/randomx/randomx.cpp + src/crypto/randomx/reciprocal.c + src/crypto/randomx/soft_aes.cpp + src/crypto/randomx/superscalar.cpp + src/crypto/randomx/virtual_machine.cpp + src/crypto/randomx/virtual_memory.cpp + src/crypto/randomx/vm_compiled_light.cpp + src/crypto/randomx/vm_compiled.cpp + src/crypto/randomx/vm_interpreted_light.cpp + src/crypto/randomx/vm_interpreted.cpp + src/crypto/rx/Rx.cpp + src/crypto/rx/RxAlgo.cpp + src/crypto/rx/RxCache.cpp + src/crypto/rx/RxConfig.cpp + src/crypto/rx/RxDataset.cpp + src/crypto/rx/RxVm.cpp + ) + + if (CMAKE_C_COMPILER_ID MATCHES MSVC) + enable_language(ASM_MASM) + list(APPEND SOURCES_CRYPTO + src/crypto/randomx/jit_compiler_x86_static.asm + src/crypto/randomx/jit_compiler_x86.cpp + ) + elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) + list(APPEND SOURCES_CRYPTO + src/crypto/randomx/jit_compiler_x86_static.S + src/crypto/randomx/jit_compiler_x86.cpp + ) + # cheat because cmake and ccache hate each other + set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C) + endif() +else() + remove_definitions(/DXMRIG_ALGO_RANDOMX) +endif() diff --git a/src/3rdparty/argon2.h b/src/3rdparty/argon2.h new file mode 100644 index 000000000..6d5f2e694 --- /dev/null +++ b/src/3rdparty/argon2.h @@ -0,0 +1,33 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef XMRIG_3RDPARTY_ARGON2_H +#define XMRIG_3RDPARTY_ARGON2_H + + +#include "3rdparty/argon2/include/argon2.h" + + +#endif /* XMRIG_3RDPARTY_ARGON2_H */ diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt new file mode 100644 index 000000000..2dd9ecd4a --- /dev/null +++ b/src/3rdparty/argon2/CMakeLists.txt @@ -0,0 +1,95 @@ +cmake_minimum_required(VERSION 2.6) + +project(Argon2 C) +set(ARGON2_VERSION 1.0) +set(CMAKE_C_STANDARD 90) +set(CMAKE_C_STANDARD_REQUIRED ON) + +include(CheckCSourceCompiles) +find_package(Threads REQUIRED) + +add_library(argon2-interface INTERFACE) +target_include_directories(argon2-interface INTERFACE + $ + $ +) + +add_library(argon2-internal INTERFACE) +target_include_directories(argon2-internal INTERFACE lib lib/blake2) +target_link_libraries(argon2-internal INTERFACE argon2-interface) + +add_library(argon2 STATIC + lib/argon2.c + lib/core.c + lib/encoding.c + lib/genkat.c + lib/impl-select.c + lib/thread.c + lib/blake2/blake2.c +) +target_link_libraries(argon2 + PUBLIC argon2-interface ${CMAKE_THREAD_LIBS_INIT} + PRIVATE argon2-internal +) + +set_property(TARGET argon2 PROPERTY VERSION ${Upstream_VERSION}) +set_property(TARGET argon2 PROPERTY SOVERSION 1) +set_property(TARGET argon2 PROPERTY INTERFACE_ARGON2_MAJOR_VERSION 1) +set_property(TARGET argon2 APPEND PROPERTY + COMPATIBLE_INTERFACE_STRING ARGON2_MAJOR_VERSION +) + +#if (CMAKE_C_COMPILER_ID MATCHES MSVC) + +#elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) +# function(add_feature_impl FEATURE GCC_FLAG DEF) +# add_library(argon2-${FEATURE} STATIC +# arch/x86_64/lib/argon2-${FEATURE}.c +# ) +# target_link_libraries(argon2-${FEATURE} PRIVATE argon2-internal) +# set_target_properties(argon2-${FEATURE} +# PROPERTIES POSITION_INDEPENDENT_CODE True +# ) + +# message("-- Detecting feature '${FEATURE}'...") +# file(READ arch/x86_64/src/test-feature-${FEATURE}.c SOURCE_CODE) + +# # try without flag: +# check_c_source_compiles("${SOURCE_CODE}" FEATURE_${FEATURE}_NOFLAG) +# set(HAS_FEATURE ${FEATURE_${FEATURE}_NOFLAG}) +# if(NOT "${HAS_FEATURE}") +# # try with -m flag: +# set(CMAKE_REQUIRED_FLAGS ${GCC_FLAG}) +# check_c_source_compiles("${SOURCE_CODE}" FEATURE_${FEATURE}_FLAG) +# set(CMAKE_REQUIRED_FLAGS "") + +# set(HAS_FEATURE ${FEATURE_${FEATURE}_FLAG}) +# if(${HAS_FEATURE}) +# target_compile_options(argon2-${FEATURE} PRIVATE ${GCC_FLAG}) +# endif() +# endif() + +# if(${HAS_FEATURE}) +# message("-- Feature '${FEATURE}' detected!") +# target_compile_definitions(argon2-${FEATURE} PRIVATE ${DEF}) +# endif() +# target_link_libraries(argon2 PUBLIC argon2-${FEATURE}) +# endfunction() + +# target_include_directories(argon2-internal INTERFACE arch/x86_64/lib) + +# add_feature_impl(sse2 -msse2 HAVE_SSE2) +# add_feature_impl(ssse3 -mssse3 HAVE_SSSE3) +# add_feature_impl(xop -mxop HAVE_XOP) +# add_feature_impl(avx2 -mavx2 HAVE_AVX2) +# add_feature_impl(avx512f -mavx512f HAVE_AVX512F) + +# target_sources(argon2 PRIVATE +# arch/x86_64/lib/argon2-arch.c +# arch/x86_64/lib/cpu-flags.c +# ) +#else() + target_sources(argon2 PRIVATE + arch/generic/lib/argon2-arch.c + ) +#endif() diff --git a/src/3rdparty/argon2/LICENSE b/src/3rdparty/argon2/LICENSE new file mode 100644 index 000000000..f9b00035c --- /dev/null +++ b/src/3rdparty/argon2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Ondrej Mosnáček + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/3rdparty/argon2/README.md b/src/3rdparty/argon2/README.md new file mode 100644 index 000000000..254e26dad --- /dev/null +++ b/src/3rdparty/argon2/README.md @@ -0,0 +1,58 @@ +# Argon2 [![Build Status](https://travis-ci.org/WOnder93/argon2.svg?branch=master)](https://travis-ci.org/WOnder93/argon2) +A multi-arch library implementing the Argon2 password hashing algorithm. + +This project is based on the [original source code](https://github.com/P-H-C/phc-winner-argon2) by the Argon2 authors. The goal of this project is to provide efficient Argon2 implementations for various HW architectures (x86, SSE, ARM, PowerPC, ...). + +For the x86_64 architecture, the library implements a simple CPU dispatch which automatically selects the best implementation based on CPU flags and quick benchmarks. + +# Building +## Using GNU autotools + +To prepare the build environment, run: +```bash +autoreconf -i +./configure +``` + +After that, just run `make` to build the library. + +### Running tests +After configuring the build environment, run `make check` to run the tests. + +### Architecture options +You can specify the target architecture by passing the `--host=...` flag to `./configure`. + +Supported architectures: + * `x86_64` – 64-bit x86 architecture + * `generic` – use generic C impementation + +## Using CMake + +To prepare the build environment, run: +```bash +cmake -DCMAKE_BUILD_TYPE=Release . +``` + +Then you can run `make` to build the library. + +## Using QMake/Qt Creator +A [QMake](http://doc.qt.io/qt-4.8/qmake-manual.html) project is also available in the `qmake` directory. You can open it in the [Qt Creator IDE](http://wiki.qt.io/Category:Tools::QtCreator) or build it from terminal: +```bash +cd qmake +# see table below for the list of possible ARCH and CONFIG values +qmake ARCH=... CONFIG+=... +make +``` + +### Architecture options +For QMake builds you can configure support for different architectures. Use the `ARCH` variable to choose the architecture and the `CONFIG` variable to set additional options. + +Supported architectures: + * `x86_64` – 64-bit x86 architecture + * QMake config flags: + * `USE_SSE2` – use SSE2 instructions + * `USE_SSSE3` – use SSSE3 instructions + * `USE_XOP` – use XOP instructions + * `USE_AVX2` – use AVX2 instructions + * `USE_AVX512F` – use AVX-512F instructions + * `generic` – use generic C impementation diff --git a/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c b/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c new file mode 100644 index 000000000..39abadeeb --- /dev/null +++ b/src/3rdparty/argon2/arch/generic/lib/argon2-arch.c @@ -0,0 +1,20 @@ +#include +#include +#include + +#include "impl-select.h" + +#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +#include "argon2-template-64.h" + +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_64(instance, position); +} + +void argon2_get_impl_list(argon2_impl_list *list) +{ + list->count = 0; +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c new file mode 100644 index 000000000..4486e3951 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#include "impl-select.h" + +#include "cpu-flags.h" +#include "argon2-sse2.h" +#include "argon2-ssse3.h" +#include "argon2-xop.h" +#include "argon2-avx2.h" +#include "argon2-avx512f.h" + +/* NOTE: there is no portable intrinsic for 64-bit rotate, but any + * sane compiler should be able to compile this into a ROR instruction: */ +#define rotr64(x, n) ((x) >> (n)) | ((x) << (64 - (n))) + +#include "argon2-template-64.h" + +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_64(instance, position); +} + +void argon2_get_impl_list(argon2_impl_list *list) +{ + static const argon2_impl IMPLS[] = { + { "x86_64", NULL, fill_segment_default }, + { "SSE2", check_sse2, fill_segment_sse2 }, + { "SSSE3", check_ssse3, fill_segment_ssse3 }, + { "XOP", check_xop, fill_segment_xop }, + { "AVX2", check_avx2, fill_segment_avx2 }, + { "AVX-512F", check_avx512f, fill_segment_avx512f }, + }; + + cpu_flags_get(); + + list->count = sizeof(IMPLS) / sizeof(IMPLS[0]); + list->entries = IMPLS; +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c new file mode 100644 index 000000000..01cb9078d --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c @@ -0,0 +1,343 @@ +#include "argon2-avx2.h" + +#ifdef HAVE_AVX2 +#include + +#include + +#include "cpu-flags.h" + +#define r16 (_mm256_setr_epi8( \ + 2, 3, 4, 5, 6, 7, 0, 1, \ + 10, 11, 12, 13, 14, 15, 8, 9, \ + 18, 19, 20, 21, 22, 23, 16, 17, \ + 26, 27, 28, 29, 30, 31, 24, 25)) + +#define r24 (_mm256_setr_epi8( \ + 3, 4, 5, 6, 7, 0, 1, 2, \ + 11, 12, 13, 14, 15, 8, 9, 10, \ + 19, 20, 21, 22, 23, 16, 17, 18, \ + 27, 28, 29, 30, 31, 24, 25, 26)) + +#define ror64_16(x) _mm256_shuffle_epi8((x), r16) +#define ror64_24(x) _mm256_shuffle_epi8((x), r24) +#define ror64_32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) + +static __m256i f(__m256i x, __m256i y) +{ + __m256i z = _mm256_mul_epu32(x, y); + return _mm256_add_epi64(_mm256_add_epi64(x, y), _mm256_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm256_xor_si256(D0, A0); \ + D1 = _mm256_xor_si256(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm256_xor_si256(B0, C0); \ + B1 = _mm256_xor_si256(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm256_xor_si256(D0, A0); \ + D1 = _mm256_xor_si256(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm256_xor_si256(B0, C0); \ + B1 = _mm256_xor_si256(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m256i tmp1, tmp2; \ + tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ +\ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ +\ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m256i tmp1, tmp2; \ + tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ +\ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ +\ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +enum { + ARGON2_HWORDS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 2, +}; + +static void fill_block(__m256i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + s[i] =_mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + block_XY[i] = _mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); + } + + } else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + block_XY[i] = s[i] =_mm256_xor_si256( + s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + } + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND1( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND2( + s[4 * 0 + i], s[4 * 1 + i], s[4 * 2 + i], s[4 * 3 + i], + s[4 * 4 + i], s[4 * 5 + i], s[4 * 6 + i], s[4 * 7 + i]); + } + + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + s[i] = _mm256_xor_si256(s[i], block_XY[i]); + _mm256_storeu_si256((__m256i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; + __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m256i state[ARGON2_HWORDS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} + +int check_avx2(void) +{ + return cpu_flags_have_avx2(); +} + +#else + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_avx2(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h new file mode 100644 index 000000000..8abdb8a53 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_AVX2_H +#define ARGON2_AVX2_H + +#include "core.h" + +void fill_segment_avx2(const argon2_instance_t *instance, + argon2_position_t position); + +int check_avx2(void); + +#endif // ARGON2_AVX2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c new file mode 100644 index 000000000..0d329ea23 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c @@ -0,0 +1,328 @@ +#include "argon2-avx512f.h" + +#ifdef HAVE_AVX512F +#include +#include + +#include + +#include "cpu-flags.h" + +#define ror64(x, n) _mm512_ror_epi64((x), (n)) + +static __m512i f(__m512i x, __m512i y) +{ + __m512i z = _mm512_mul_epu32(x, y); + return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define SWAP_HALVES(A0, A1) \ + do { \ + __m512i t0, t1; \ + t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ + t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ + A0 = t0; \ + A1 = t1; \ + } while((void)0, 0) + +#define SWAP_QUARTERS(A0, A1) \ + do { \ + SWAP_HALVES(A0, A1); \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + } while((void)0, 0) + +#define UNSWAP_QUARTERS(A0, A1) \ + do { \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + SWAP_HALVES(A0, A1); \ + } while((void)0, 0) + +#define BLAKE2_ROUND1(A0, C0, B0, D0, A1, C1, B1, D1) \ + do { \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + SWAP_QUARTERS(A0, A1); \ + SWAP_QUARTERS(B0, B1); \ + SWAP_QUARTERS(C0, C1); \ + SWAP_QUARTERS(D0, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + UNSWAP_QUARTERS(A0, A1); \ + UNSWAP_QUARTERS(B0, B1); \ + UNSWAP_QUARTERS(C0, C1); \ + UNSWAP_QUARTERS(D0, D1); \ + } while ((void)0, 0) + +enum { + ARGON2_VECS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 4, +}; + +static void fill_block(__m512i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m512i block_XY[ARGON2_VECS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + s[i] =_mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + block_XY[i] = _mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); + } + + } else { + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + block_XY[i] = s[i] =_mm512_xor_si512( + s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + } + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND1( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND2( + s[2 * 0 + i], s[2 * 1 + i], s[2 * 2 + i], s[2 * 3 + i], + s[2 * 4 + i], s[2 * 5 + i], s[2 * 6 + i], s[2 * 7 + i]); + } + + for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) { + s[i] = _mm512_xor_si512(s[i], block_XY[i]); + _mm512_storeu_si512((__m512i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m512i zero_block[ARGON2_VECS_IN_BLOCK]; + __m512i zero2_block[ARGON2_VECS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m512i state[ARGON2_VECS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} + +int check_avx512f(void) +{ + return cpu_flags_have_avx512f(); +} + +#else + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_avx512f(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h new file mode 100644 index 000000000..ba4311142 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_AVX512F_H +#define ARGON2_AVX512F_H + +#include "core.h" + +void fill_segment_avx512f(const argon2_instance_t *instance, + argon2_position_t position); + +int check_avx512f(void); + +#endif // ARGON2_AVX512F_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c new file mode 100644 index 000000000..915884190 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c @@ -0,0 +1,124 @@ +#include "argon2-sse2.h" + +#ifdef HAVE_SSE2 +#include + +#include "cpu-flags.h" + +#define ror64_16(x) \ + _mm_shufflehi_epi16( \ + _mm_shufflelo_epi16((x), _MM_SHUFFLE(0, 3, 2, 1)), \ + _MM_SHUFFLE(0, 3, 2, 1)) +#define ror64_24(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 24), _mm_slli_epi64((x), 40)) +#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = B0; \ + __m128i t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_sse2(void) +{ + return cpu_flags_have_sse2(); +} + +#else + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_sse2(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h new file mode 100644 index 000000000..024d503d3 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_SSE2_H +#define ARGON2_SSE2_H + +#include "core.h" + +void fill_segment_sse2(const argon2_instance_t *instance, + argon2_position_t position); + +int check_sse2(void); + +#endif // ARGON2_SSE2_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c new file mode 100644 index 000000000..dead71f23 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c @@ -0,0 +1,136 @@ +#include "argon2-ssse3.h" + +#ifdef HAVE_SSSE3 +#include + +#include + +#include "cpu-flags.h" + +#define r16 (_mm_setr_epi8( \ + 2, 3, 4, 5, 6, 7, 0, 1, \ + 10, 11, 12, 13, 14, 15, 8, 9)) + +#define r24 (_mm_setr_epi8( \ + 3, 4, 5, 6, 7, 0, 1, 2, \ + 11, 12, 13, 14, 15, 8, 9, 10)) + +#define ror64_16(x) _mm_shuffle_epi8((x), r16) +#define ror64_24(x) _mm_shuffle_epi8((x), r24) +#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) +#define ror64_63(x) \ + _mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x))) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_32(D0); \ + D1 = ror64_32(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_24(B0); \ + B1 = ror64_24(B1); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64_16(D0); \ + D1 = ror64_16(D1); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64_63(B0); \ + B1 = ror64_63(B1); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_ssse3(void) +{ + return cpu_flags_have_ssse3(); +} + +#else + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_ssse3(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h new file mode 100644 index 000000000..139fdacca --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_SSSE3_H +#define ARGON2_SSSE3_H + +#include "core.h" + +void fill_segment_ssse3(const argon2_instance_t *instance, + argon2_position_t position); + +int check_ssse3(void); + +#endif // ARGON2_SSSE3_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h new file mode 100644 index 000000000..4eeec2e0d --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h @@ -0,0 +1,164 @@ +#include + +#include + +#include "core.h" + +static void fill_block(__m128i *s, const block *ref_block, block *next_block, + int with_xor) +{ + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + s[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = s[i] = _mm_xor_si128( + s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND( + s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3], + s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND( + s[8 * 0 + i], s[8 * 1 + i], s[8 * 2 + i], s[8 * 3 + i], + s[8 * 4 + i], s[8 * 5 + i], s[8 * 6 + i], s[8 * 7 + i]); + } + + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + s[i] = _mm_xor_si128(s[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, s[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) +{ + /*Temporary zero-initialized blocks*/ + __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; + __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +static void fill_segment_128(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m128i state[ARGON2_OWORDS_IN_BLOCK]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } +} diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c new file mode 100644 index 000000000..87178bdb8 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c @@ -0,0 +1,124 @@ +#include "argon2-xop.h" + +#ifdef HAVE_XOP +#include + +#include + +#include "cpu-flags.h" + +#define ror64(x, c) _mm_roti_epi64((x), -(c)) + +static __m128i f(__m128i x, __m128i y) +{ + __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = f(A0, B0); \ + A1 = f(A1, B1); \ +\ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = f(C0, D0); \ + C1 = f(C1, D1); \ +\ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ +\ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C1, D0, A1, B1, C0, D1); \ + G2(A0, B0, C1, D0, A1, B1, C0, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#include "argon2-template-128.h" + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position) +{ + fill_segment_128(instance, position); +} + +int check_xop(void) +{ + return cpu_flags_have_xop(); +} + +#else + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position) +{ +} + +int check_xop(void) +{ + return 0; +} + +#endif diff --git a/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h new file mode 100644 index 000000000..1474a11c0 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h @@ -0,0 +1,11 @@ +#ifndef ARGON2_XOP_H +#define ARGON2_XOP_H + +#include "core.h" + +void fill_segment_xop(const argon2_instance_t *instance, + argon2_position_t position); + +int check_xop(void); + +#endif // ARGON2_XOP_H diff --git a/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.c b/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.c new file mode 100644 index 000000000..ec8b7c44b --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.c @@ -0,0 +1,91 @@ +#include "cpu-flags.h" + +#include + +enum { + X86_64_FEATURE_SSE2 = (1 << 0), + X86_64_FEATURE_SSSE3 = (1 << 1), + X86_64_FEATURE_XOP = (1 << 2), + X86_64_FEATURE_AVX2 = (1 << 3), + X86_64_FEATURE_AVX512F = (1 << 4), +}; + +static unsigned int cpu_flags; + +static unsigned int get_cpuid(int ext, unsigned int level, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned int eax; + __cpuid(ext ? (0x80000000 | level) : level, + eax, *ebx, *ecx, *edx); + return eax; +} + +static unsigned int get_cpuid_count(int ext, unsigned int level, + unsigned int count, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned int eax; + __cpuid_count(ext ? (0x80000000 | level) : level, + count, eax, *ebx, *ecx, *edx); + return 1; +} + +void cpu_flags_get(void) +{ + unsigned int ebx, ecx, edx; + unsigned int level, level_ext; + + cpu_flags = 0; + level = get_cpuid(0, 0, &ebx, &ecx, &edx); + level_ext = get_cpuid(1, 0, &ebx, &ecx, &edx); + + if (level >= 1 && get_cpuid(0, 1, &ebx, &ecx, &edx)) { + if (edx & (1 << 26)) { + cpu_flags |= X86_64_FEATURE_SSE2; + } + if (ecx & (1 << 9)) { + cpu_flags |= X86_64_FEATURE_SSSE3; + } + } + if (level >= 7 && get_cpuid_count(0, 7, 0, &ebx, &ecx, &edx)) { + if (ebx & (1 << 5)) { + cpu_flags |= X86_64_FEATURE_AVX2; + } + if (ebx & (1 << 16)) { + cpu_flags |= X86_64_FEATURE_AVX512F; + } + } + if (level_ext >= 1 && get_cpuid(1, 1, &ebx, &ecx, &edx)) { + if (ecx & (1 << 11)) { + cpu_flags |= X86_64_FEATURE_XOP; + } + } + /* FIXME: check also OS support! */ +} + +int cpu_flags_have_sse2(void) +{ + return cpu_flags & X86_64_FEATURE_SSE2; +} + +int cpu_flags_have_ssse3(void) +{ + return cpu_flags & X86_64_FEATURE_SSSE3; +} + +int cpu_flags_have_xop(void) +{ + return cpu_flags & X86_64_FEATURE_XOP; +} + +int cpu_flags_have_avx2(void) +{ + return cpu_flags & X86_64_FEATURE_AVX2; +} + +int cpu_flags_have_avx512f(void) +{ + return cpu_flags & X86_64_FEATURE_AVX512F; +} + diff --git a/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.h b/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.h new file mode 100644 index 000000000..b546a8ba1 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.h @@ -0,0 +1,12 @@ +#ifndef ARGON2_CPU_FLAGS_H +#define ARGON2_CPU_FLAGS_H + +void cpu_flags_get(void); + +int cpu_flags_have_sse2(void); +int cpu_flags_have_ssse3(void); +int cpu_flags_have_xop(void); +int cpu_flags_have_avx2(void); +int cpu_flags_have_avx512f(void); + +#endif // ARGON2_CPU_FLAGS_H diff --git a/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx2.c b/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx2.c new file mode 100644 index 000000000..01e5b156d --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx2.c @@ -0,0 +1,8 @@ +#include + +void function_avx2(__m256i *dst, const __m256i *a, const __m256i *b) +{ + *dst = _mm256_xor_si256(*a, *b); +} + +int main(void) { return 0; } diff --git a/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx512f.c b/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx512f.c new file mode 100644 index 000000000..372eb2561 --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/src/test-feature-avx512f.c @@ -0,0 +1,8 @@ +#include + +void function_avx512f(__m512i *dst, const __m512i *a) +{ + *dst = _mm512_ror_epi64(*a, 57); +} + +int main(void) { return 0; } diff --git a/src/3rdparty/argon2/arch/x86_64/src/test-feature-sse2.c b/src/3rdparty/argon2/arch/x86_64/src/test-feature-sse2.c new file mode 100644 index 000000000..4a3b70fda --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/src/test-feature-sse2.c @@ -0,0 +1,8 @@ +#include + +void function_sse2(__m128i *dst, const __m128i *a, const __m128i *b) +{ + *dst = _mm_xor_si128(*a, *b); +} + +int main(void) { return 0; } diff --git a/src/3rdparty/argon2/arch/x86_64/src/test-feature-ssse3.c b/src/3rdparty/argon2/arch/x86_64/src/test-feature-ssse3.c new file mode 100644 index 000000000..db2b3551a --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/src/test-feature-ssse3.c @@ -0,0 +1,8 @@ +#include + +void function_ssse3(__m128i *dst, const __m128i *a, const __m128i *b) +{ + *dst = _mm_shuffle_epi8(*a, *b); +} + +int main(void) { return 0; } diff --git a/src/3rdparty/argon2/arch/x86_64/src/test-feature-xop.c b/src/3rdparty/argon2/arch/x86_64/src/test-feature-xop.c new file mode 100644 index 000000000..5897fe14d --- /dev/null +++ b/src/3rdparty/argon2/arch/x86_64/src/test-feature-xop.c @@ -0,0 +1,8 @@ +#include + +void function_xop(__m128i *dst, const __m128i *a, int b) +{ + *dst = _mm_roti_epi64(*a, b); +} + +int main(void) { return 0; } diff --git a/src/3rdparty/argon2/include/argon2.h b/src/3rdparty/argon2/include/argon2.h new file mode 100644 index 000000000..ab4bf2f05 --- /dev/null +++ b/src/3rdparty/argon2/include/argon2.h @@ -0,0 +1,455 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication + * along with this software. If not, see + * . + */ + +#ifndef ARGON2_H +#define ARGON2_H + +#include +#include +#include +#include + +/* Symbols visibility control */ +#define ARGON2_PUBLIC + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Argon2 input parameter restrictions + */ + +/* Minimum and maximum number of lanes (degree of parallelism) */ +#define ARGON2_MIN_LANES UINT32_C(1) +#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) + +/* Minimum and maximum number of threads */ +#define ARGON2_MIN_THREADS UINT32_C(1) +#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) + +/* Number of synchronization points between lanes per pass */ +#define ARGON2_SYNC_POINTS UINT32_C(4) + +/* Minimum and maximum digest size in bytes */ +#define ARGON2_MIN_OUTLEN UINT32_C(4) +#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ +#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ + +#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) +/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ +#define ARGON2_MAX_MEMORY_BITS \ + ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) +#define ARGON2_MAX_MEMORY \ + ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) + +/* Minimum and maximum number of passes */ +#define ARGON2_MIN_TIME UINT32_C(1) +#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum password length in bytes */ +#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) +#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum associated data length in bytes */ +#define ARGON2_MIN_AD_LENGTH UINT32_C(0) +#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum salt length in bytes */ +#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) +#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum key length in bytes */ +#define ARGON2_MIN_SECRET UINT32_C(0) +#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) + +/* Flags to determine which fields are securely wiped (default = no wipe). */ +#define ARGON2_DEFAULT_FLAGS UINT32_C(0) +#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) +#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) +#define ARGON2_FLAG_GENKAT (UINT32_C(1) << 3) + +/* Global flag to determine if we are wiping internal memory buffers. This flag + * is defined in core.c and deafults to 1 (wipe internal memory). */ +extern int FLAG_clear_internal_memory; + +/* Error codes */ +typedef enum Argon2_ErrorCodes { + ARGON2_OK = 0, + + ARGON2_OUTPUT_PTR_NULL = -1, + + ARGON2_OUTPUT_TOO_SHORT = -2, + ARGON2_OUTPUT_TOO_LONG = -3, + + ARGON2_PWD_TOO_SHORT = -4, + ARGON2_PWD_TOO_LONG = -5, + + ARGON2_SALT_TOO_SHORT = -6, + ARGON2_SALT_TOO_LONG = -7, + + ARGON2_AD_TOO_SHORT = -8, + ARGON2_AD_TOO_LONG = -9, + + ARGON2_SECRET_TOO_SHORT = -10, + ARGON2_SECRET_TOO_LONG = -11, + + ARGON2_TIME_TOO_SMALL = -12, + ARGON2_TIME_TOO_LARGE = -13, + + ARGON2_MEMORY_TOO_LITTLE = -14, + ARGON2_MEMORY_TOO_MUCH = -15, + + ARGON2_LANES_TOO_FEW = -16, + ARGON2_LANES_TOO_MANY = -17, + + ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ + ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ + ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ + ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ + + ARGON2_MEMORY_ALLOCATION_ERROR = -22, + + ARGON2_FREE_MEMORY_CBK_NULL = -23, + ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, + + ARGON2_INCORRECT_PARAMETER = -25, + ARGON2_INCORRECT_TYPE = -26, + + ARGON2_OUT_PTR_MISMATCH = -27, + + ARGON2_THREADS_TOO_FEW = -28, + ARGON2_THREADS_TOO_MANY = -29, + + ARGON2_MISSING_ARGS = -30, + + ARGON2_ENCODING_FAIL = -31, + + ARGON2_DECODING_FAIL = -32, + + ARGON2_THREAD_FAIL = -33, + + ARGON2_DECODING_LENGTH_FAIL = -34, + + ARGON2_VERIFY_MISMATCH = -35 +} argon2_error_codes; + +/* Memory allocator types --- for external allocation */ +typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); +typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); + +/* Argon2 external data structures */ + +/* + ***** + * Context: structure to hold Argon2 inputs: + * output array and its length, + * password and its length, + * salt and its length, + * secret and its length, + * associated data and its length, + * number of passes, amount of used memory (in KBytes, can be rounded up a bit) + * number of parallel threads that will be run. + * All the parameters above affect the output hash value. + * Additionally, two function pointers can be provided to allocate and + * deallocate the memory (if NULL, memory will be allocated internally). + * Also, three flags indicate whether to erase password, secret as soon as they + * are pre-hashed (and thus not needed anymore), and the entire memory + ***** + * Simplest situation: you have output array out[8], password is stored in + * pwd[32], salt is stored in salt[16], you do not have keys nor associated + * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with + * 4 parallel lanes. + * You want to erase the password, but you're OK with last pass not being + * erased. You want to use the default memory allocator. + * Then you initialize: + Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) + */ +typedef struct Argon2_Context { + uint8_t *out; /* output array */ + uint32_t outlen; /* digest length */ + + uint8_t *pwd; /* password array */ + uint32_t pwdlen; /* password length */ + + uint8_t *salt; /* salt array */ + uint32_t saltlen; /* salt length */ + + uint8_t *secret; /* key array */ + uint32_t secretlen; /* key length */ + + uint8_t *ad; /* associated data array */ + uint32_t adlen; /* associated data length */ + + uint32_t t_cost; /* number of passes */ + uint32_t m_cost; /* amount of memory requested (KB) */ + uint32_t lanes; /* number of lanes */ + uint32_t threads; /* maximum number of threads */ + + uint32_t version; /* version number */ + + allocate_fptr allocate_cbk; /* pointer to memory allocator */ + deallocate_fptr free_cbk; /* pointer to memory deallocator */ + + uint32_t flags; /* array of bool options */ +} argon2_context; + +/* Argon2 primitive type */ +typedef enum Argon2_type { + Argon2_d = 0, + Argon2_i = 1, + Argon2_id = 2 +} argon2_type; + +/* Version of the algorithm */ +typedef enum Argon2_version { + ARGON2_VERSION_10 = 0x10, + ARGON2_VERSION_13 = 0x13, + ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 +} argon2_version; + +/* + * Function that gives the string representation of an argon2_type. + * @param type The argon2_type that we want the string for + * @param uppercase Whether the string should have the first letter uppercase + * @return NULL if invalid type, otherwise the string representation. + */ +ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase); + +/* + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @return Error code if smth is wrong, ARGON2_OK otherwise + */ +ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type); + +/** + * Hashes a password with Argon2i, producing an encoded hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hashlen Desired length of the hash in bytes + * @param encoded Buffer where to write the encoded hash + * @param encodedlen Size of the buffer (thus max size of the encoded hash) + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +/** + * Hashes a password with Argon2i, producing a raw hash by allocating memory at + * @hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hash Buffer where to write the raw hash - updated by the function + * @param hashlen Desired length of the hash in bytes + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version); + +/** + * Verifies a password against an encoded string + * Encoded string is restricted as in validate_inputs() + * @param encoded String encoding parameters, salt, hash + * @param pwd Pointer to password + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd, + const size_t pwdlen, argon2_type type); + +/** + * Argon2d: Version of Argon2 that picks memory blocks depending + * on the password and salt. Only for side-channel-free + * environment!! + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2d_ctx(argon2_context *context); + +/** + * Argon2i: Version of Argon2 that picks memory blocks + * independent on the password and salt. Good for side-channels, + * but worse w.r.t. tradeoff attacks if only one pass is used. + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2i_ctx(argon2_context *context); + +/** + * Argon2id: Version of Argon2 where the first half-pass over memory is + * password-independent, the rest are password-dependent (on the password and + * salt). OK against side channels (they reduce to 1/2-pass Argon2i), and + * better with w.r.t. tradeoff attacks (similar to Argon2d). + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2id_ctx(argon2_context *context); + +/** + * Verify if a given password is correct for Argon2d hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2i hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2id hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context, + const char *hash); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type); + +/** + * Get the associated error message for given error code + * @return The error message associated with the given error code + */ +ARGON2_PUBLIC const char *argon2_error_message(int error_code); + +/** + * Returns the encoded hash length for the given input parameters + * @param t_cost Number of iterations + * @param m_cost Memory usage in kibibytes + * @param parallelism Number of threads; used to compute lanes + * @param saltlen Salt size in bytes + * @param hashlen Hash size in bytes + * @param type The argon2_type that we want the encoded length for + * @return The encoded hash length in bytes + */ +ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, + uint32_t parallelism, uint32_t saltlen, + uint32_t hashlen, argon2_type type); + +/* signals availability of argon2_select_impl: */ +#define ARGON2_SELECTABLE_IMPL + +/** + * Selects the fastest available optimized implementation. + * @param out The file for debug output (e. g. stderr; pass NULL for no + * debug output) + * @param prefix What to print before each line; NULL is equivalent to empty + * string + */ +ARGON2_PUBLIC void argon2_select_impl(FILE *out, const char *prefix); + +/* signals support for passing preallocated memory: */ +#define ARGON2_PREALLOCATED_MEMORY + +ARGON2_PUBLIC size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism); + +/** + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @param type The Argon2 type + * @param memory Preallocated memory for blocks (or NULL) + * @param memory_size The size of preallocated memory + * @return Error code if smth is wrong, ARGON2_OK otherwise + */ +ARGON2_PUBLIC int argon2_ctx_mem(argon2_context *context, argon2_type type, + void *memory, size_t memory_size); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/3rdparty/argon2/lib/argon2-template-64.h b/src/3rdparty/argon2/lib/argon2-template-64.h new file mode 100644 index 000000000..16ddbd35e --- /dev/null +++ b/src/3rdparty/argon2/lib/argon2-template-64.h @@ -0,0 +1,193 @@ +#include + +#include "core.h" + +#define MASK_32 UINT64_C(0xFFFFFFFF) + +#define F(x, y) ((x) + (y) + 2 * ((x) & MASK_32) * ((y) & MASK_32)) + +#define G(a, b, c, d) \ + do { \ + a = F(a, b); \ + d = rotr64(d ^ a, 32); \ + c = F(c, d); \ + b = rotr64(b ^ c, 24); \ + a = F(a, b); \ + d = rotr64(d ^ a, 16); \ + c = F(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, \ + v8, v9, v10, v11, v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG1(v) \ + BLAKE2_ROUND_NOMSG( \ + (v)[ 0], (v)[ 1], (v)[ 2], (v)[ 3], \ + (v)[ 4], (v)[ 5], (v)[ 6], (v)[ 7], \ + (v)[ 8], (v)[ 9], (v)[10], (v)[11], \ + (v)[12], (v)[13], (v)[14], (v)[15]) + +#define BLAKE2_ROUND_NOMSG2(v) \ + BLAKE2_ROUND_NOMSG( \ + (v)[ 0], (v)[ 1], (v)[ 16], (v)[ 17], \ + (v)[ 32], (v)[ 33], (v)[ 48], (v)[ 49], \ + (v)[ 64], (v)[ 65], (v)[ 80], (v)[ 81], \ + (v)[ 96], (v)[ 97], (v)[112], (v)[113]) + +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block, int with_xor) +{ + block blockR, block_tmp; + + copy_block(&blockR, ref_block); + xor_block(&blockR, prev_block); + copy_block(&block_tmp, &blockR); + if (with_xor) { + xor_block(&block_tmp, next_block); + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + BLAKE2_ROUND_NOMSG1(blockR.v + 0 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 1 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 2 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 3 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 4 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 5 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 6 * 16); + BLAKE2_ROUND_NOMSG1(blockR.v + 7 * 16); + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + BLAKE2_ROUND_NOMSG2(blockR.v + 0 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 1 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 2 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 3 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 4 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 5 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 6 * 2); + BLAKE2_ROUND_NOMSG2(blockR.v + 7 * 2); + + copy_block(next_block, &block_tmp); + xor_block(next_block, &blockR); +} + +static void next_addresses(block *address_block, block *input_block, + const block *zero_block) +{ + input_block->v[6]++; + fill_block(zero_block, input_block, address_block, 0); + fill_block(zero_block, address_block, address_block, 0); +} + +static void fill_segment_64(const argon2_instance_t *instance, + argon2_position_t position) +{ + block *ref_block, *curr_block, *prev_block; + block address_block, input_block, zero_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&zero_block, 0); + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block, &zero_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block, &zero_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + prev_block = instance->memory + prev_offset; + + /* version 1.2.1 and earlier: overwrite, not XOR */ + if (0 == position.pass || ARGON2_VERSION_10 == instance->version) { + fill_block(prev_block, ref_block, curr_block, 0); + } else { + fill_block(prev_block, ref_block, curr_block, 1); + } + } +} diff --git a/src/3rdparty/argon2/lib/argon2.c b/src/3rdparty/argon2/lib/argon2.c new file mode 100644 index 000000000..28d3d4029 --- /dev/null +++ b/src/3rdparty/argon2/lib/argon2.c @@ -0,0 +1,476 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#include +#include +#include + +#include "argon2.h" +#include "encoding.h" +#include "core.h" + +const char *argon2_type2string(argon2_type type, int uppercase) { + switch (type) { + case Argon2_d: + return uppercase ? "Argon2d" : "argon2d"; + case Argon2_i: + return uppercase ? "Argon2i" : "argon2i"; + case Argon2_id: + return uppercase ? "Argon2id" : "argon2id"; + } + + return NULL; +} + +static void argon2_compute_memory_blocks(uint32_t *memory_blocks, + uint32_t *segment_length, + uint32_t m_cost, uint32_t lanes) +{ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + *memory_blocks = m_cost; + if (*memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) { + *memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes; + } + + *segment_length = *memory_blocks / (lanes * ARGON2_SYNC_POINTS); + /* Ensure that all segments have equal length */ + *memory_blocks = *segment_length * (lanes * ARGON2_SYNC_POINTS); +} + +size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism) { + uint32_t memory_blocks, segment_length; + argon2_compute_memory_blocks(&memory_blocks, &segment_length, m_cost, + parallelism); + return memory_blocks * ARGON2_BLOCK_SIZE; +} + +int argon2_ctx_mem(argon2_context *context, argon2_type type, void *memory, + size_t memory_size) { + /* 1. Validate all inputs */ + int result = validate_inputs(context); + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + + if (ARGON2_OK != result) { + return result; + } + + if (Argon2_d != type && Argon2_i != type && Argon2_id != type) { + return ARGON2_INCORRECT_TYPE; + } + + /* 2. Align memory size */ + argon2_compute_memory_blocks(&memory_blocks, &segment_length, + context->m_cost, context->lanes); + + /* check for sufficient memory size: */ + if (memory != NULL && (memory_size % ARGON2_BLOCK_SIZE != 0 || + memory_size / ARGON2_BLOCK_SIZE < memory_blocks)) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + instance.version = context->version; + instance.memory = (block *)memory; + instance.passes = context->t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context->lanes; + instance.threads = context->threads; + instance.type = type; + instance.print_internals = !!(context->flags & ARGON2_FLAG_GENKAT); + instance.keep_memory = memory != NULL; + + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } + + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + result = initialize(&instance, context); + + if (ARGON2_OK != result) { + return result; + } + + /* 4. Filling memory */ + result = fill_memory_blocks(&instance); + + if (ARGON2_OK != result) { + return result; + } + /* 5. Finalization */ + finalize(context, &instance); + + return ARGON2_OK; +} + +int argon2_ctx(argon2_context *context, argon2_type type) { + return argon2_ctx_mem(context, type, NULL, 0); +} + +int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, const size_t saltlen, + void *hash, const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version){ + + argon2_context context; + int result; + uint8_t *out; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (saltlen > ARGON2_MAX_SALT_LENGTH) { + return ARGON2_SALT_TOO_LONG; + } + + if (hashlen > ARGON2_MAX_OUTLEN) { + return ARGON2_OUTPUT_TOO_LONG; + } + + if (hashlen < ARGON2_MIN_OUTLEN) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + out = malloc(hashlen); + if (!out) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + context.out = (uint8_t *)out; + context.outlen = (uint32_t)hashlen; + context.pwd = CONST_CAST(uint8_t *)pwd; + context.pwdlen = (uint32_t)pwdlen; + context.salt = CONST_CAST(uint8_t *)salt; + context.saltlen = (uint32_t)saltlen; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = t_cost; + context.m_cost = m_cost; + context.lanes = parallelism; + context.threads = parallelism; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = version; + + result = argon2_ctx(&context, type); + + if (result != ARGON2_OK) { + clear_internal_memory(out, hashlen); + free(out); + return result; + } + + /* if raw hash requested, write it */ + if (hash) { + memcpy(hash, out, hashlen); + } + + /* if encoding requested, write it */ + if (encoded && encodedlen) { + if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { + clear_internal_memory(out, hashlen); /* wipe buffers if error */ + clear_internal_memory(encoded, encodedlen); + free(out); + return ARGON2_ENCODING_FAIL; + } + } + clear_internal_memory(out, hashlen); + free(out); + + return ARGON2_OK; +} + +int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_i, + ARGON2_VERSION_NUMBER); +} + +int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER); +} + +int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_d, + ARGON2_VERSION_NUMBER); +} + +int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER); +} + +int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { + size_t i; + uint8_t d = 0U; + + for (i = 0U; i < len; i++) { + d |= b1[i] ^ b2[i]; + } + return (int)((1 & ((d - 1) >> 8)) - 1); +} + +int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, + argon2_type type) { + + argon2_context ctx; + uint8_t *desired_result = NULL; + + int ret = ARGON2_OK; + + size_t encoded_len; + uint32_t max_field_len; + + if (pwdlen > ARGON2_MAX_PWD_LENGTH) { + return ARGON2_PWD_TOO_LONG; + } + + if (encoded == NULL) { + return ARGON2_DECODING_FAIL; + } + + encoded_len = strlen(encoded); + if (encoded_len > UINT32_MAX) { + return ARGON2_DECODING_FAIL; + } + + /* No field can be longer than the encoded length */ + max_field_len = (uint32_t)encoded_len; + + ctx.saltlen = max_field_len; + ctx.outlen = max_field_len; + + ctx.salt = malloc(ctx.saltlen); + ctx.out = malloc(ctx.outlen); + if (!ctx.salt || !ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ctx.pwd = (uint8_t *)pwd; + ctx.pwdlen = (uint32_t)pwdlen; + + ret = decode_string(&ctx, encoded, type); + if (ret != ARGON2_OK) { + goto fail; + } + + /* Set aside the desired result, and get a new buffer. */ + desired_result = ctx.out; + ctx.out = malloc(ctx.outlen); + if (!ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ret = argon2_verify_ctx(&ctx, (char *)desired_result, type); + if (ret != ARGON2_OK) { + goto fail; + } + +fail: + free(ctx.salt); + free(ctx.out); + free(desired_result); + + return ret; +} + +int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_i); +} + +int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_d); +} + +int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return argon2_verify(encoded, pwd, pwdlen, Argon2_id); +} + +int argon2d_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_d); +} + +int argon2i_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_i); +} + +int argon2id_ctx(argon2_context *context) { + return argon2_ctx(context, Argon2_id); +} + +int argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type) { + int ret = argon2_ctx(context, type); + if (ret != ARGON2_OK) { + return ret; + } + + if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) { + return ARGON2_VERIFY_MISMATCH; + } + + return ARGON2_OK; +} + +int argon2d_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_d); +} + +int argon2i_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_i); +} + +int argon2id_verify_ctx(argon2_context *context, const char *hash) { + return argon2_verify_ctx(context, hash, Argon2_id); +} + +const char *argon2_error_message(int error_code) { + switch (error_code) { + case ARGON2_OK: + return "OK"; + case ARGON2_OUTPUT_PTR_NULL: + return "Output pointer is NULL"; + case ARGON2_OUTPUT_TOO_SHORT: + return "Output is too short"; + case ARGON2_OUTPUT_TOO_LONG: + return "Output is too long"; + case ARGON2_PWD_TOO_SHORT: + return "Password is too short"; + case ARGON2_PWD_TOO_LONG: + return "Password is too long"; + case ARGON2_SALT_TOO_SHORT: + return "Salt is too short"; + case ARGON2_SALT_TOO_LONG: + return "Salt is too long"; + case ARGON2_AD_TOO_SHORT: + return "Associated data is too short"; + case ARGON2_AD_TOO_LONG: + return "Associated data is too long"; + case ARGON2_SECRET_TOO_SHORT: + return "Secret is too short"; + case ARGON2_SECRET_TOO_LONG: + return "Secret is too long"; + case ARGON2_TIME_TOO_SMALL: + return "Time cost is too small"; + case ARGON2_TIME_TOO_LARGE: + return "Time cost is too large"; + case ARGON2_MEMORY_TOO_LITTLE: + return "Memory cost is too small"; + case ARGON2_MEMORY_TOO_MUCH: + return "Memory cost is too large"; + case ARGON2_LANES_TOO_FEW: + return "Too few lanes"; + case ARGON2_LANES_TOO_MANY: + return "Too many lanes"; + case ARGON2_PWD_PTR_MISMATCH: + return "Password pointer is NULL, but password length is not 0"; + case ARGON2_SALT_PTR_MISMATCH: + return "Salt pointer is NULL, but salt length is not 0"; + case ARGON2_SECRET_PTR_MISMATCH: + return "Secret pointer is NULL, but secret length is not 0"; + case ARGON2_AD_PTR_MISMATCH: + return "Associated data pointer is NULL, but ad length is not 0"; + case ARGON2_MEMORY_ALLOCATION_ERROR: + return "Memory allocation error"; + case ARGON2_FREE_MEMORY_CBK_NULL: + return "The free memory callback is NULL"; + case ARGON2_ALLOCATE_MEMORY_CBK_NULL: + return "The allocate memory callback is NULL"; + case ARGON2_INCORRECT_PARAMETER: + return "Argon2_Context context is NULL"; + case ARGON2_INCORRECT_TYPE: + return "There is no such version of Argon2"; + case ARGON2_OUT_PTR_MISMATCH: + return "Output pointer mismatch"; + case ARGON2_THREADS_TOO_FEW: + return "Not enough threads"; + case ARGON2_THREADS_TOO_MANY: + return "Too many threads"; + case ARGON2_MISSING_ARGS: + return "Missing arguments"; + case ARGON2_ENCODING_FAIL: + return "Encoding failed"; + case ARGON2_DECODING_FAIL: + return "Decoding failed"; + case ARGON2_THREAD_FAIL: + return "Threading failure"; + case ARGON2_DECODING_LENGTH_FAIL: + return "Some of encoded parameters are too long or too short"; + case ARGON2_VERIFY_MISMATCH: + return "The password does not match the supplied hash"; + default: + return "Unknown error code"; + } +} + +size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, + uint32_t saltlen, uint32_t hashlen, argon2_type type) { + return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) + + numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + + b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + + 1; +} diff --git a/src/3rdparty/argon2/lib/blake2/blake2-impl.h b/src/3rdparty/argon2/lib/blake2/blake2-impl.h new file mode 100644 index 000000000..e6cdf7c4e --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2-impl.h @@ -0,0 +1,90 @@ +#ifndef ARGON2_BLAKE2_IMPL_H +#define ARGON2_BLAKE2_IMPL_H + +#include + +/* Argon2 Team - Begin Code */ +/* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. +*/ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif +/* Argon2 Team - End Code */ + +static inline uint32_t load32(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return *(const uint32_t *)src; +#else + const uint8_t *p = (const uint8_t *)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static inline uint64_t load64(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + return *(const uint64_t *)src; +#else + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static inline void store32(void *dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + *(uint32_t *)dst = w; +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static inline void store64(void *dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + *(uint64_t *)dst = w; +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +#endif // ARGON2_BLAKE2_IMPL_H diff --git a/src/3rdparty/argon2/lib/blake2/blake2.c b/src/3rdparty/argon2/lib/blake2/blake2.c new file mode 100644 index 000000000..d32028ed5 --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2.c @@ -0,0 +1,225 @@ +#include + +#include "blake2/blake2.h" +#include "blake2/blake2-impl.h" + +#include "core.h" + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) +}; + +#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +#define G(m, r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(m, v, r) \ + do { \ + G(m, r, 0, v[0], v[4], v[ 8], v[12]); \ + G(m, r, 1, v[1], v[5], v[ 9], v[13]); \ + G(m, r, 2, v[2], v[6], v[10], v[14]); \ + G(m, r, 3, v[3], v[7], v[11], v[15]); \ + G(m, r, 4, v[0], v[5], v[10], v[15]); \ + G(m, r, 5, v[1], v[6], v[11], v[12]); \ + G(m, r, 6, v[2], v[7], v[ 8], v[13]); \ + G(m, r, 7, v[3], v[4], v[ 9], v[14]); \ + } while ((void)0, 0) + +void blake2b_compress(blake2b_state *S, const void *block, uint64_t f0) +{ + uint64_t m[16]; + uint64_t v[16]; + + m[ 0] = load64((const uint64_t *)block + 0); + m[ 1] = load64((const uint64_t *)block + 1); + m[ 2] = load64((const uint64_t *)block + 2); + m[ 3] = load64((const uint64_t *)block + 3); + m[ 4] = load64((const uint64_t *)block + 4); + m[ 5] = load64((const uint64_t *)block + 5); + m[ 6] = load64((const uint64_t *)block + 6); + m[ 7] = load64((const uint64_t *)block + 7); + m[ 8] = load64((const uint64_t *)block + 8); + m[ 9] = load64((const uint64_t *)block + 9); + m[10] = load64((const uint64_t *)block + 10); + m[11] = load64((const uint64_t *)block + 11); + m[12] = load64((const uint64_t *)block + 12); + m[13] = load64((const uint64_t *)block + 13); + m[14] = load64((const uint64_t *)block + 14); + m[15] = load64((const uint64_t *)block + 15); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ f0; + v[15] = blake2b_IV[7]; + + ROUND(m, v, 0); + ROUND(m, v, 1); + ROUND(m, v, 2); + ROUND(m, v, 3); + ROUND(m, v, 4); + ROUND(m, v, 5); + ROUND(m, v, 6); + ROUND(m, v, 7); + ROUND(m, v, 8); + ROUND(m, v, 9); + ROUND(m, v, 10); + ROUND(m, v, 11); + + S->h[0] ^= v[0] ^ v[ 8]; + S->h[1] ^= v[1] ^ v[ 9]; + S->h[2] ^= v[2] ^ v[10]; + S->h[3] ^= v[3] ^ v[11]; + S->h[4] ^= v[4] ^ v[12]; + S->h[5] ^= v[5] ^ v[13]; + S->h[6] ^= v[6] ^ v[14]; + S->h[7] ^= v[7] ^ v[15]; +} + +static void blake2b_increment_counter(blake2b_state *S, uint64_t inc) +{ + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static void blake2b_init_state(blake2b_state *S) +{ + memcpy(S->h, blake2b_IV, sizeof(S->h)); + S->t[1] = S->t[0] = 0; + S->buflen = 0; +} + +void blake2b_init(blake2b_state *S, size_t outlen) +{ + blake2b_init_state(S); + /* XOR initial state with param block: */ + S->h[0] ^= (uint64_t)outlen | (UINT64_C(1) << 16) | (UINT64_C(1) << 24); +} + +void blake2b_update(blake2b_state *S, const void *in, size_t inlen) +{ + const uint8_t *pin = (const uint8_t *)in; + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf, 0); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin, 0); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += inlen; +} + +void blake2b_final(blake2b_state *S, void *out, size_t outlen) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + unsigned int i; + + blake2b_increment_counter(S, S->buflen); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf, UINT64_C(0xFFFFFFFFFFFFFFFF)); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + i * sizeof(uint64_t), S->h[i]); + } + + memcpy(out, buffer, outlen); + clear_internal_memory(buffer, sizeof(buffer)); + clear_internal_memory(S->buf, sizeof(S->buf)); + clear_internal_memory(S->h, sizeof(S->h)); +} + +void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen) +{ + uint8_t *pout = (uint8_t *)out; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; + + store32(outlen_bytes, (uint32_t)outlen); + if (outlen <= BLAKE2B_OUTBYTES) { + blake2b_init(&blake_state, outlen); + blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); + blake2b_update(&blake_state, in, inlen); + blake2b_final(&blake_state, pout, outlen); + } else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + + blake2b_init(&blake_state, BLAKE2B_OUTBYTES); + blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)); + blake2b_update(&blake_state, in, inlen); + blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); + pout += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + blake2b_init(&blake_state, BLAKE2B_OUTBYTES); + blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2); + pout += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + blake2b_init(&blake_state, toproduce); + blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES); + blake2b_final(&blake_state, out_buffer, toproduce); + + memcpy(pout, out_buffer, toproduce); + + clear_internal_memory(out_buffer, sizeof(out_buffer)); + } +} diff --git a/src/3rdparty/argon2/lib/blake2/blake2.h b/src/3rdparty/argon2/lib/blake2/blake2.h new file mode 100644 index 000000000..7deeaa1fa --- /dev/null +++ b/src/3rdparty/argon2/lib/blake2/blake2.h @@ -0,0 +1,30 @@ +#ifndef ARGON2_BLAKE2_H +#define ARGON2_BLAKE2_H + +#include +#include + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +typedef struct __blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; +} blake2b_state; + +/* Streaming API */ +void blake2b_init(blake2b_state *S, size_t outlen); +void blake2b_update(blake2b_state *S, const void *in, size_t inlen); +void blake2b_final(blake2b_state *S, void *out, size_t outlen); + +void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); + +#endif // ARGON2_BLAKE2_H + diff --git a/src/3rdparty/argon2/lib/core.c b/src/3rdparty/argon2/lib/core.c new file mode 100644 index 000000000..d6592a6aa --- /dev/null +++ b/src/3rdparty/argon2/lib/core.c @@ -0,0 +1,633 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +/*For memory wiping*/ +#ifdef _MSC_VER +#include +#include /* For SecureZeroMemory */ +#endif +#if defined __STDC_LIB_EXT1__ +#define __STDC_WANT_LIB_EXT1__ 1 +#endif +#define VC_GE_2005(version) (version >= 1400) + +#include +#include +#include +#include + +#include "core.h" +#include "thread.h" +#include "blake2/blake2.h" +#include "blake2/blake2-impl.h" + +#include "genkat.h" + +#if defined(__clang__) +#if __has_attribute(optnone) +#define NOT_OPTIMIZED __attribute__((optnone)) +#endif +#elif defined(__GNUC__) +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 40400 +#define NOT_OPTIMIZED __attribute__((optimize("O0"))) +#endif +#endif +#ifndef NOT_OPTIMIZED +#define NOT_OPTIMIZED +#endif + +/***************Instance and Position constructors**********/ +void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } + +void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +static void load_block(block *dst, const void *input) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); + } +} + +static void store_block(void *output, const block *src) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); + } +} + +/***************Memory functions*****************/ + +int allocate_memory(const argon2_context *context, + argon2_instance_t *instance) { + size_t blocks = instance->memory_blocks; + size_t memory_size = blocks * ARGON2_BLOCK_SIZE; + + /* 0. Check for memory supplied by user: */ + /* NOTE: Sufficient memory size is already checked in argon2_ctx_mem() */ + if (instance->memory != NULL) { + return ARGON2_OK; + } + + /* 1. Check for multiplication overflow */ + if (blocks != 0 && memory_size / ARGON2_BLOCK_SIZE != blocks) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 2. Try to allocate with appropriate allocator */ + if (context->allocate_cbk) { + (context->allocate_cbk)((uint8_t **)&instance->memory, memory_size); + } else { + instance->memory = malloc(memory_size); + } + + if (instance->memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + return ARGON2_OK; +} + +void free_memory(const argon2_context *context, + const argon2_instance_t *instance) { + size_t memory_size = instance->memory_blocks * ARGON2_BLOCK_SIZE; + + clear_internal_memory(instance->memory, memory_size); + + if (instance->keep_memory) { + /* user-supplied memory -- do not free */ + return; + } + + if (context->free_cbk) { + (context->free_cbk)((uint8_t *)instance->memory, memory_size); + } else { + free(instance->memory); + } +} + +void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { +#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) + SecureZeroMemory(v, n); +#elif defined memset_s + memset_s(v, n, 0, n); +#elif defined(__OpenBSD__) + explicit_bzero(v, n); +#else + static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; + memset_sec(v, 0, n); +#endif +} + +/* Memory clear flag defaults to true. */ +int FLAG_clear_internal_memory = 1; +void clear_internal_memory(void *v, size_t n) { + if (FLAG_clear_internal_memory && v) { + secure_wipe_memory(v, n); + } +} + +void finalize(const argon2_context *context, argon2_instance_t *instance) { + if (context != NULL && instance != NULL) { + block blockhash; + uint32_t l; + + copy_block(&blockhash, instance->memory + instance->lane_length - 1); + + /* XOR the last blocks */ + for (l = 1; l < instance->lanes; ++l) { + uint32_t last_block_in_lane = + l * instance->lane_length + (instance->lane_length - 1); + xor_block(&blockhash, instance->memory + last_block_in_lane); + } + + /* Hash the result */ + { + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + store_block(blockhash_bytes, &blockhash); + blake2b_long(context->out, context->outlen, blockhash_bytes, + ARGON2_BLOCK_SIZE); + /* clear blockhash and blockhash_bytes */ + clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); + } + + if (instance->print_internals) { + print_tag(context->out, context->outlen); + } + + free_memory(context, instance); + } +} + +uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane) { + /* + * Pass 0: + * This lane : all already finished segments plus already constructed + * blocks in this segment + * Other lanes : all already finished segments + * Pass 1+: + * This lane : (SYNC_POINTS - 1) last segments plus already constructed + * blocks in this segment + * Other lanes : (SYNC_POINTS - 1) last segments + */ + uint32_t reference_area_size; + uint64_t relative_position; + uint32_t start_position, absolute_position; + + if (0 == position->pass) { + /* First pass */ + if (0 == position->slice) { + /* First slice */ + reference_area_size = + position->index - 1; /* all but the previous */ + } else { + if (same_lane) { + /* The same lane => add current segment */ + reference_area_size = + position->slice * instance->segment_length + + position->index - 1; + } else { + reference_area_size = + position->slice * instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + } else { + /* Second pass */ + if (same_lane) { + reference_area_size = instance->lane_length - + instance->segment_length + position->index - + 1; + } else { + reference_area_size = instance->lane_length - + instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + + /* 1.2.4. Mapping pseudo_rand to 0.. and produce + * relative position */ + relative_position = pseudo_rand; + relative_position = relative_position * relative_position >> 32; + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + /* 1.2.5 Computing starting position */ + start_position = 0; + + if (0 != position->pass) { + start_position = (position->slice == ARGON2_SYNC_POINTS - 1) + ? 0 + : (position->slice + 1) * instance->segment_length; + } + + /* 1.2.6. Computing absolute position */ + absolute_position = (start_position + relative_position) % + instance->lane_length; /* absolute position */ + return absolute_position; +} + +#ifdef _WIN32 +static unsigned __stdcall fill_segment_thr(void *thread_data) +#else +static void *fill_segment_thr(void *thread_data) +#endif +{ + argon2_thread_data *my_data = thread_data; + fill_segment(my_data->instance_ptr, my_data->pos); + argon2_thread_exit(); + return 0; +} + +/* Single-threaded version for p=1 case */ +static int fill_memory_blocks_st(argon2_instance_t *instance) { + uint32_t r, s, l; + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position = { r, l, (uint8_t)s, 0 }; + fill_segment(instance, position); + } + } + + if (instance->print_internals) { + internal_kat(instance, r); /* Print all memory blocks */ + } + } + return ARGON2_OK; +} + +/* Multi-threaded version for p > 1 case */ +static int fill_memory_blocks_mt(argon2_instance_t *instance) { + uint32_t r, s; + argon2_thread_handle_t *thread = NULL; + argon2_thread_data *thr_data = NULL; + int rc = ARGON2_OK; + + /* 1. Allocating space for threads */ + thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); + if (thread == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); + if (thr_data == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + uint32_t l; + + /* 2. Calling threads */ + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position; + + /* 2.1 Join a thread if limit is exceeded */ + if (l >= instance->threads) { + if (argon2_thread_join(thread[l - instance->threads])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + + /* 2.2 Create thread */ + position.pass = r; + position.lane = l; + position.slice = (uint8_t)s; + position.index = 0; + thr_data[l].instance_ptr = + instance; /* preparing the thread input */ + memcpy(&(thr_data[l].pos), &position, + sizeof(argon2_position_t)); + if (argon2_thread_create(&thread[l], &fill_segment_thr, + (void *)&thr_data[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + + /* fill_segment(instance, position); */ + /*Non-thread equivalent of the lines above */ + } + + /* 3. Joining remaining threads */ + for (l = instance->lanes - instance->threads; l < instance->lanes; + ++l) { + if (argon2_thread_join(thread[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + } + + if (instance->print_internals) { + internal_kat(instance, r); /* Print all memory blocks */ + } + } + +fail: + if (thread != NULL) { + free(thread); + } + if (thr_data != NULL) { + free(thr_data); + } + return rc; +} + +int fill_memory_blocks(argon2_instance_t *instance) { + if (instance == NULL || instance->lanes == 0) { + return ARGON2_INCORRECT_PARAMETER; + } + + return instance->threads == 1 ? + fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); +} + +int validate_inputs(const argon2_context *context) { + if (NULL == context) { + return ARGON2_INCORRECT_PARAMETER; + } + + if (NULL == context->out) { + return ARGON2_OUTPUT_PTR_NULL; + } + + /* Validate output length */ + if (ARGON2_MIN_OUTLEN > context->outlen) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + if (ARGON2_MAX_OUTLEN < context->outlen) { + return ARGON2_OUTPUT_TOO_LONG; + } + + /* Validate password (required param) */ + if (NULL == context->pwd) { + if (0 != context->pwdlen) { + return ARGON2_PWD_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { + return ARGON2_PWD_TOO_SHORT; + } + + if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { + return ARGON2_PWD_TOO_LONG; + } + + /* Validate salt (required param) */ + if (NULL == context->salt) { + if (0 != context->saltlen) { + return ARGON2_SALT_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { + return ARGON2_SALT_TOO_SHORT; + } + + if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { + return ARGON2_SALT_TOO_LONG; + } + + /* Validate secret (optional param) */ + if (NULL == context->secret) { + if (0 != context->secretlen) { + return ARGON2_SECRET_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_SECRET > context->secretlen) { + return ARGON2_SECRET_TOO_SHORT; + } + if (ARGON2_MAX_SECRET < context->secretlen) { + return ARGON2_SECRET_TOO_LONG; + } + } + + /* Validate associated data (optional param) */ + if (NULL == context->ad) { + if (0 != context->adlen) { + return ARGON2_AD_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_AD_LENGTH > context->adlen) { + return ARGON2_AD_TOO_SHORT; + } + if (ARGON2_MAX_AD_LENGTH < context->adlen) { + return ARGON2_AD_TOO_LONG; + } + } + + /* Validate memory cost */ + if (ARGON2_MIN_MEMORY > context->m_cost) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + if (ARGON2_MAX_MEMORY < context->m_cost) { + return ARGON2_MEMORY_TOO_MUCH; + } + + if (context->m_cost < 8 * context->lanes) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + /* Validate time cost */ + if (ARGON2_MIN_TIME > context->t_cost) { + return ARGON2_TIME_TOO_SMALL; + } + + if (ARGON2_MAX_TIME < context->t_cost) { + return ARGON2_TIME_TOO_LARGE; + } + + /* Validate lanes */ + if (ARGON2_MIN_LANES > context->lanes) { + return ARGON2_LANES_TOO_FEW; + } + + if (ARGON2_MAX_LANES < context->lanes) { + return ARGON2_LANES_TOO_MANY; + } + + /* Validate threads */ + if (ARGON2_MIN_THREADS > context->threads) { + return ARGON2_THREADS_TOO_FEW; + } + + if (ARGON2_MAX_THREADS < context->threads) { + return ARGON2_THREADS_TOO_MANY; + } + + if (NULL != context->allocate_cbk && NULL == context->free_cbk) { + return ARGON2_FREE_MEMORY_CBK_NULL; + } + + if (NULL == context->allocate_cbk && NULL != context->free_cbk) { + return ARGON2_ALLOCATE_MEMORY_CBK_NULL; + } + + return ARGON2_OK; +} + +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { + uint32_t l; + /* Make the first and second block in each lane as G(H0||0||i) or + G(H0||1||i) */ + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + for (l = 0; l < instance->lanes; ++l) { + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 0], + blockhash_bytes); + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 1], + blockhash_bytes); + } + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); +} + +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type) { + blake2b_state BlakeHash; + uint8_t value[sizeof(uint32_t)]; + + if (NULL == context || NULL == blockhash) { + return; + } + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + store32(&value, context->lanes); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->outlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->m_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->t_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->version); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, (uint32_t)type); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->pwdlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->pwd != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, + context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + secure_wipe_memory(context->pwd, context->pwdlen); + context->pwdlen = 0; + } + } + + store32(&value, context->saltlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->salt != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->salt, + context->saltlen); + } + + store32(&value, context->secretlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->secret != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->secret, + context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + secure_wipe_memory(context->secret, context->secretlen); + context->secretlen = 0; + } + } + + store32(&value, context->adlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->ad != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->ad, + context->adlen); + } + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +int initialize(argon2_instance_t *instance, argon2_context *context) { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + int result = ARGON2_OK; + + if (instance == NULL || context == NULL) + return ARGON2_INCORRECT_PARAMETER; + instance->context_ptr = context; + + /* 1. Memory allocation */ + + result = allocate_memory(context, instance); + if (result != ARGON2_OK) { + return result; + } + + /* 2. Initial hashing */ + /* H_0 + 8 extra bytes to produce the first blocks */ + /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ + /* Hashing all inputs */ + initial_hash(blockhash, context, instance->type); + /* Zeroing 8 extra bytes */ + clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + if (instance->print_internals) { + initial_kat(blockhash, context, instance->type); + } + + /* 3. Creating first blocks, we always have at least two blocks in a slice + */ + fill_first_blocks(blockhash, instance); + /* Clearing the hash */ + clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); + + return ARGON2_OK; +} diff --git a/src/3rdparty/argon2/lib/core.h b/src/3rdparty/argon2/lib/core.h new file mode 100644 index 000000000..5c67fa364 --- /dev/null +++ b/src/3rdparty/argon2/lib/core.h @@ -0,0 +1,226 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#ifndef ARGON2_CORE_H +#define ARGON2_CORE_H + +#include "argon2.h" + +#if defined(_MSC_VER) +#define ALIGN(n) __declspec(align(16)) +#elif defined(__GNUC__) || defined(__clang) +#define ALIGN(x) __attribute__((__aligned__(x))) +#else +#define ALIGN(x) +#endif + +#define CONST_CAST(x) (x)(uintptr_t) + +/**********************Argon2 internal constants*******************************/ + +enum argon2_core_constants { + /* Memory block size in bytes */ + ARGON2_BLOCK_SIZE = 1024, + ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, + ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, + + /* Number of pseudo-random values generated by one call to Blake in Argon2i + to + generate reference block positions */ + ARGON2_ADDRESSES_IN_BLOCK = 128, + + /* Pre-hashing digest length and its extension*/ + ARGON2_PREHASH_DIGEST_LENGTH = 64, + ARGON2_PREHASH_SEED_LENGTH = 72 +}; + +/*************************Argon2 internal data types***********************/ + +/* + * Structure for the (1KB) memory block implemented as 128 64-bit words. + * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no + * bounds checking). + */ +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +/*****************Functions that work with the block******************/ + +/* Initialize each byte of the block with @in */ +void init_block_value(block *b, uint8_t in); + +/* Copy block @src to block @dst */ +void copy_block(block *dst, const block *src); + +/* XOR @src onto @dst bytewise */ +void xor_block(block *dst, const block *src); + +/* + * Argon2 instance: memory pointer, number of passes, amount of memory, type, + * and derived values. + * Used to evaluate the number and location of blocks to construct in each + * thread + */ +typedef struct Argon2_instance_t { + block *memory; /* Memory pointer */ + uint32_t version; + uint32_t passes; /* Number of passes */ + uint32_t memory_blocks; /* Number of blocks in memory */ + uint32_t segment_length; + uint32_t lane_length; + uint32_t lanes; + uint32_t threads; + argon2_type type; + int print_internals; /* whether to print the memory blocks */ + int keep_memory; + argon2_context *context_ptr; /* points back to original context */ +} argon2_instance_t; + +/* + * Argon2 position: where we construct the block right now. Used to distribute + * work between threads. + */ +typedef struct Argon2_position_t { + uint32_t pass; + uint32_t lane; + uint8_t slice; + uint32_t index; +} argon2_position_t; + +/*Struct that holds the inputs for thread handling FillSegment*/ +typedef struct Argon2_thread_data { + argon2_instance_t *instance_ptr; + argon2_position_t pos; +} argon2_thread_data; + +/*************************Argon2 core functions********************************/ + +/* Allocates memory to the given pointer, uses the appropriate allocator as + * specified in the context. Total allocated memory is num*size. + * @param context argon2_context which specifies the allocator + * @param instance the Argon2 instance + * @return ARGON2_OK if memory is allocated successfully + */ +int allocate_memory(const argon2_context *context, + argon2_instance_t *instance); + +/* + * Frees memory at the given pointer, uses the appropriate deallocator as + * specified in the context. Also cleans the memory using clear_internal_memory. + * @param context argon2_context which specifies the deallocator + * @param instance the Argon2 instance + */ +void free_memory(const argon2_context *context, + const argon2_instance_t *instance); + +/* Function that securely cleans the memory. This ignores any flags set + * regarding clearing memory. Usually one just calls clear_internal_memory. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +void secure_wipe_memory(void *v, size_t n); + +/* Function that securely clears the memory if FLAG_clear_internal_memory is + * set. If the flag isn't set, this function does nothing. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +ARGON2_PUBLIC void clear_internal_memory(void *v, size_t n); + +/* + * Computes absolute position of reference block in the lane following a skewed + * distribution and using a pseudo-random value as input + * @param instance Pointer to the current instance + * @param position Pointer to the current position + * @param pseudo_rand 32-bit pseudo-random value used to determine the position + * @param same_lane Indicates if the block will be taken from the current lane. + * If so we can reference the current segment + * @pre All pointers must be valid + */ +uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane); + +/* + * Function that validates all inputs against predefined restrictions and return + * an error code + * @param context Pointer to current Argon2 context + * @return ARGON2_OK if everything is all right, otherwise one of error codes + * (all defined in + */ +int validate_inputs(const argon2_context *context); + +/* + * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears + * password and secret if needed + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param blockhash Buffer for pre-hashing digest + * @param type Argon2 type + * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes + * allocated + */ +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type); + +/* + * Function creates first 2 blocks per lane + * @param instance Pointer to the current instance + * @param blockhash Pointer to the pre-hashing digest + * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values + */ +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); + +/* + * Function allocates memory, hashes the inputs with Blake, and creates first + * two blocks. Returns the pointer to the main memory with 2 blocks per lane + * initialized + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param instance Current Argon2 instance + * @return Zero if successful, -1 if memory failed to allocate. @context->state + * will be modified if successful. + */ +int initialize(argon2_instance_t *instance, argon2_context *context); + +/* + * XORing the last block of each lane, hashing it, making the tag. Deallocates + * the memory. + * @param context Pointer to current Argon2 context (use only the out parameters + * from it) + * @param instance Pointer to current instance of Argon2 + * @pre instance->state must point to necessary amount of memory + * @pre context->out must point to outlen bytes of memory + * @pre if context->free_cbk is not NULL, it should point to a function that + * deallocates memory + */ +void finalize(const argon2_context *context, argon2_instance_t *instance); + +/* + * Function that fills the segment using previous segments also from other + * threads + * @param instance Pointer to the current instance + * @param position Current position + * @pre all block pointers must be valid + */ +void fill_segment(const argon2_instance_t *instance, + argon2_position_t position); + +/* + * Function that fills the entire memory t_cost times based on the first two + * blocks in each lane + * @param instance Pointer to the current instance + * @return ARGON2_OK if successful, @context->state + */ +int fill_memory_blocks(argon2_instance_t *instance); + +#endif diff --git a/src/3rdparty/argon2/lib/encoding.c b/src/3rdparty/argon2/lib/encoding.c new file mode 100644 index 000000000..af56e4477 --- /dev/null +++ b/src/3rdparty/argon2/lib/encoding.c @@ -0,0 +1,432 @@ +#include +#include +#include +#include +#include "encoding.h" +#include "core.h" + +/* + * Example code for a decoder and encoder of "hash strings", with Argon2 + * parameters. + * + * This code comprises three sections: + * + * -- The first section contains generic Base64 encoding and decoding + * functions. It is conceptually applicable to any hash function + * implementation that uses Base64 to encode and decode parameters, + * salts and outputs. It could be made into a library, provided that + * the relevant functions are made public (non-static) and be given + * reasonable names to avoid collisions with other functions. + * + * -- The second section is specific to Argon2. It encodes and decodes + * the parameters, salts and outputs. It does not compute the hash + * itself. + * + * The code was originally written by Thomas Pornin , + * to whom comments and remarks may be sent. It is released under what + * should amount to Public Domain or its closest equivalent; the + * following mantra is supposed to incarnate that fact with all the + * proper legal rituals: + * + * --------------------------------------------------------------------- + * This file is provided under the terms of Creative Commons CC0 1.0 + * Public Domain Dedication. To the extent possible under law, the + * author (Thomas Pornin) has waived all copyright and related or + * neighboring rights to this file. This work is published from: Canada. + * --------------------------------------------------------------------- + * + * Copyright (c) 2015 Thomas Pornin + */ + +/* ==================================================================== */ +/* + * Common code; could be shared between different hash functions. + * + * Note: the Base64 functions below assume that uppercase letters (resp. + * lowercase letters) have consecutive numerical codes, that fit on 8 + * bits. All modern systems use ASCII-compatible charsets, where these + * properties are true. If you are stuck with a dinosaur of a system + * that still defaults to EBCDIC then you already have much bigger + * interoperability issues to deal with. + */ + +/* + * Some macros for constant-time comparisons. These work over values in + * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". + */ +#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) +#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) +#define GE(x, y) (GT(y, x) ^ 0xFF) +#define LT(x, y) GT(y, x) +#define LE(x, y) GE(y, x) + +/* + * Convert value x (0..63) to corresponding Base64 character. + */ +static int b64_byte_to_char(unsigned x) { + return (LT(x, 26) & (x + 'A')) | + (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | + (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | + (EQ(x, 63) & '/'); +} + +/* + * Convert character c to the corresponding 6-bit value. If character c + * is not a Base64 character, then 0xFF (255) is returned. + */ +static unsigned b64_char_to_byte(int c) { + unsigned x; + + x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | + (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | + (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | + (EQ(c, '/') & 63); + return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); +} + +/* + * Convert some bytes to Base64. 'dst_len' is the length (in characters) + * of the output buffer 'dst'; if that buffer is not large enough to + * receive the result (including the terminating 0), then (size_t)-1 + * is returned. Otherwise, the zero-terminated Base64 string is written + * in the buffer, and the output length (counted WITHOUT the terminating + * zero) is returned. + */ +static size_t to_base64(char *dst, size_t dst_len, const void *src, + size_t src_len) { + size_t olen; + const unsigned char *buf; + unsigned acc, acc_len; + + olen = (src_len / 3) << 2; + switch (src_len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + if (dst_len <= olen) { + return (size_t)-1; + } + acc = 0; + acc_len = 0; + buf = (const unsigned char *)src; + while (src_len-- > 0) { + acc = (acc << 8) + (*buf++); + acc_len += 8; + while (acc_len >= 6) { + acc_len -= 6; + *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); + } + } + if (acc_len > 0) { + *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); + } + *dst++ = 0; + return olen; +} + +/* + * Decode Base64 chars into bytes. The '*dst_len' value must initially + * contain the length of the output buffer '*dst'; when the decoding + * ends, the actual number of decoded bytes is written back in + * '*dst_len'. + * + * Decoding stops when a non-Base64 character is encountered, or when + * the output buffer capacity is exceeded. If an error occurred (output + * buffer is too small, invalid last characters leading to unprocessed + * buffered bits), then NULL is returned; otherwise, the returned value + * points to the first non-Base64 character in the source stream, which + * may be the terminating zero. + */ +static const char *from_base64(void *dst, size_t *dst_len, const char *src) { + size_t len; + unsigned char *buf; + unsigned acc, acc_len; + + buf = (unsigned char *)dst; + len = 0; + acc = 0; + acc_len = 0; + for (;;) { + unsigned d; + + d = b64_char_to_byte(*src); + if (d == 0xFF) { + break; + } + src++; + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + if ((len++) >= *dst_len) { + return NULL; + } + *buf++ = (acc >> acc_len) & 0xFF; + } + } + + /* + * If the input length is equal to 1 modulo 4 (which is + * invalid), then there will remain 6 unprocessed bits; + * otherwise, only 0, 2 or 4 bits are buffered. The buffered + * bits must also all be zero. + */ + if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { + return NULL; + } + *dst_len = len; + return src; +} + +/* + * Decode decimal integer from 'str'; the value is written in '*v'. + * Returned value is a pointer to the next non-decimal character in the + * string. If there is no digit at all, or the value encoding is not + * minimal (extra leading zeros), or the value does not fit in an + * 'unsigned long', then NULL is returned. + */ +static const char *decode_decimal(const char *str, unsigned long *v) { + const char *orig; + unsigned long acc; + + acc = 0; + for (orig = str;; str++) { + int c; + + c = *str; + if (c < '0' || c > '9') { + break; + } + c -= '0'; + if (acc > (ULONG_MAX / 10)) { + return NULL; + } + acc *= 10; + if ((unsigned long)c > (ULONG_MAX - acc)) { + return NULL; + } + acc += (unsigned long)c; + } + if (str == orig || (*orig == '0' && str != (orig + 1))) { + return NULL; + } + *v = acc; + return str; +} + +/* ==================================================================== */ +/* + * Code specific to Argon2. + * + * The code below applies the following format: + * + * $argon2[$v=]$m=,t=,p=$$ + * + * where is either 'd', 'id', or 'i', is a decimal integer (positive, + * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding + * characters, no newline or whitespace). + * + * The last two binary chunks (encoded in Base64) are, in that order, + * the salt and the output. Both are required. The binary salt length and the + * output length must be in the allowed ranges defined in argon2.h. + * + * The ctx struct must contain buffers large enough to hold the salt and pwd + * when it is fed into decode_string. + */ + +int decode_string(argon2_context *ctx, const char *str, argon2_type type) { + +/* check for prefix */ +#define CC(prefix) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) != 0) { \ + return ARGON2_DECODING_FAIL; \ + } \ + str += cc_len; \ + } while ((void)0, 0) + +/* optional prefix checking with supplied code */ +#define CC_opt(prefix, code) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) == 0) { \ + str += cc_len; \ + { code; } \ + } \ + } while ((void)0, 0) + +/* Decoding prefix into uint32_t decimal */ +#define DECIMAL_U32(x) \ + do { \ + unsigned long dec_x; \ + str = decode_decimal(str, &dec_x); \ + if (str == NULL || dec_x > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (x) = (uint32_t)dec_x; \ + } while ((void)0, 0) + +/* Decoding base64 into a binary buffer */ +#define BIN(buf, max_len, len) \ + do { \ + size_t bin_len = (max_len); \ + str = from_base64(buf, &bin_len, str); \ + if (str == NULL || bin_len > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (len) = (uint32_t)bin_len; \ + } while ((void)0, 0) + + size_t maxsaltlen = ctx->saltlen; + size_t maxoutlen = ctx->outlen; + int validation_result; + const char* type_string; + + /* We should start with the argon2_type we are using */ + type_string = argon2_type2string(type, 0); + if (!type_string) { + return ARGON2_INCORRECT_TYPE; + } + + CC("$"); + CC(type_string); + + /* Reading the version number if the default is suppressed */ + ctx->version = ARGON2_VERSION_10; + CC_opt("$v=", DECIMAL_U32(ctx->version)); + + CC("$m="); + DECIMAL_U32(ctx->m_cost); + CC(",t="); + DECIMAL_U32(ctx->t_cost); + CC(",p="); + DECIMAL_U32(ctx->lanes); + ctx->threads = ctx->lanes; + + CC("$"); + BIN(ctx->salt, maxsaltlen, ctx->saltlen); + CC("$"); + BIN(ctx->out, maxoutlen, ctx->outlen); + + /* The rest of the fields get the default values */ + ctx->secret = NULL; + ctx->secretlen = 0; + ctx->ad = NULL; + ctx->adlen = 0; + ctx->allocate_cbk = NULL; + ctx->free_cbk = NULL; + ctx->flags = ARGON2_DEFAULT_FLAGS; + + /* On return, must have valid context */ + validation_result = validate_inputs(ctx); + if (validation_result != ARGON2_OK) { + return validation_result; + } + + /* Can't have any additional characters */ + if (*str == 0) { + return ARGON2_OK; + } else { + return ARGON2_DECODING_FAIL; + } +#undef CC +#undef CC_opt +#undef DECIMAL_U32 +#undef BIN +} + +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type) { +#define SS(str) \ + do { \ + size_t pp_len = strlen(str); \ + if (pp_len >= dst_len) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + memcpy(dst, str, pp_len + 1); \ + dst += pp_len; \ + dst_len -= pp_len; \ + } while ((void)0, 0) + +#define SX(x) \ + do { \ + char tmp[30]; \ + sprintf(tmp, "%lu", (unsigned long)(x)); \ + SS(tmp); \ + } while ((void)0, 0) + +#define SB(buf, len) \ + do { \ + size_t sb_len = to_base64(dst, dst_len, buf, len); \ + if (sb_len == (size_t)-1) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + dst += sb_len; \ + dst_len -= sb_len; \ + } while ((void)0, 0) + + const char* type_string = argon2_type2string(type, 0); + int validation_result = validate_inputs(ctx); + + if (!type_string) { + return ARGON2_ENCODING_FAIL; + } + + if (validation_result != ARGON2_OK) { + return validation_result; + } + + SS("$"); + SS(type_string); + + SS("$v="); + SX(ctx->version); + + SS("$m="); + SX(ctx->m_cost); + SS(",t="); + SX(ctx->t_cost); + SS(",p="); + SX(ctx->lanes); + + SS("$"); + SB(ctx->salt, ctx->saltlen); + + SS("$"); + SB(ctx->out, ctx->outlen); + return ARGON2_OK; + +#undef SS +#undef SX +#undef SB +} + +size_t b64len(uint32_t len) { + size_t olen = ((size_t)len / 3) << 2; + + switch (len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + + return olen; +} + +size_t numlen(uint32_t num) { + size_t len = 1; + while (num >= 10) { + ++len; + num = num / 10; + } + return len; +} + diff --git a/src/3rdparty/argon2/lib/encoding.h b/src/3rdparty/argon2/lib/encoding.h new file mode 100644 index 000000000..e7834e4f5 --- /dev/null +++ b/src/3rdparty/argon2/lib/encoding.h @@ -0,0 +1,40 @@ +#ifndef ENCODING_H +#define ENCODING_H +#include "argon2.h" + +#define ARGON2_MAX_DECODED_LANES UINT32_C(255) +#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) +#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) + +/* +* encode an Argon2 hash string into the provided buffer. 'dst_len' +* contains the size, in characters, of the 'dst' buffer; if 'dst_len' +* is less than the number of required characters (including the +* terminating 0), then this function returns ARGON2_ENCODING_ERROR. +* +* on success, ARGON2_OK is returned. +*/ +int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type); + +/* +* Decodes an Argon2 hash string into the provided structure 'ctx'. +* The only fields that must be set prior to this call are ctx.saltlen and +* ctx.outlen (which must be the maximal salt and out length values that are +* allowed), ctx.salt and ctx.out (which must be buffers of the specified +* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. +* +* Invalid input string causes an error. On success, the ctx is valid and all +* fields have been initialized. +* +* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. +*/ +int decode_string(argon2_context *ctx, const char *str, argon2_type type); + +/* Returns the length of the encoded byte stream with length len */ +size_t b64len(uint32_t len); + +/* Returns the length of the encoded number num */ +size_t numlen(uint32_t num); + +#endif diff --git a/src/3rdparty/argon2/lib/genkat.c b/src/3rdparty/argon2/lib/genkat.c new file mode 100644 index 000000000..fd5663bfb --- /dev/null +++ b/src/3rdparty/argon2/lib/genkat.c @@ -0,0 +1,117 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#include +#include + +#include "genkat.h" + +void initial_kat(const uint8_t *blockhash, const argon2_context *context, + argon2_type type) { + unsigned i; + + if (blockhash != NULL && context != NULL) { + printf("=======================================\n"); + + printf("%s version number %d\n", argon2_type2string(type, 1), + context->version); + + printf("=======================================\n"); + + + printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag " + "length: %u bytes\n", + context->m_cost, context->t_cost, context->lanes, + context->outlen); + + printf("Password[%u]: ", context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + printf("CLEARED\n"); + } else { + for (i = 0; i < context->pwdlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->pwd)[i]); + } + + printf("\n"); + } + + printf("Salt[%u]: ", context->saltlen); + + for (i = 0; i < context->saltlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->salt)[i]); + } + + printf("\n"); + + printf("Secret[%u]: ", context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + printf("CLEARED\n"); + } else { + for (i = 0; i < context->secretlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->secret)[i]); + } + + printf("\n"); + } + + printf("Associated data[%u]: ", context->adlen); + + for (i = 0; i < context->adlen; ++i) { + printf("%2.2x ", ((unsigned char *)context->ad)[i]); + } + + printf("\n"); + + printf("Pre-hashing digest: "); + + for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) { + printf("%2.2x ", ((unsigned char *)blockhash)[i]); + } + + printf("\n"); + } +} + +void print_tag(const void *out, uint32_t outlen) { + unsigned i; + if (out != NULL) { + printf("Tag: "); + + for (i = 0; i < outlen; ++i) { + printf("%2.2x ", ((uint8_t *)out)[i]); + } + + printf("\n"); + } +} + +void internal_kat(const argon2_instance_t *instance, uint32_t pass) { + + if (instance != NULL) { + uint32_t i, j; + printf("\n After pass %u:\n", pass); + + for (i = 0; i < instance->memory_blocks; ++i) { + uint32_t how_many_words = + (instance->memory_blocks > ARGON2_QWORDS_IN_BLOCK) + ? 1 + : ARGON2_QWORDS_IN_BLOCK; + + for (j = 0; j < how_many_words; ++j) + printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j, + instance->memory[i].v[j]); + } + } +} diff --git a/src/3rdparty/argon2/lib/genkat.h b/src/3rdparty/argon2/lib/genkat.h new file mode 100644 index 000000000..815c09b50 --- /dev/null +++ b/src/3rdparty/argon2/lib/genkat.h @@ -0,0 +1,47 @@ +/* + * Argon2 source code package + * + * Written by Daniel Dinu and Dmitry Khovratovich, 2015 + * + * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. + * + * You should have received a copy of the CC0 Public Domain Dedication along + * with + * this software. If not, see + * . + */ + +#ifndef ARGON2_KAT_H +#define ARGON2_KAT_H + +#include "core.h" + +/* + * Initial KAT function that prints the inputs to the file + * @param blockhash Array that contains pre-hashing digest + * @param context Holds inputs + * @param type Argon2 type + * @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes + * @pre context member pointers must point to allocated memory of size according + * to the length values + */ +void initial_kat(const uint8_t *blockhash, const argon2_context *context, + argon2_type type); + +/* + * Function that prints the output tag + * @param out output array pointer + * @param outlen digest length + * @pre out must point to @a outlen bytes + **/ +void print_tag(const void *out, uint32_t outlen); + +/* + * Function that prints the internal state at given moment + * @param instance pointer to the current instance + * @param pass current pass number + * @pre instance must have necessary memory allocated + **/ +void internal_kat(const argon2_instance_t *instance, uint32_t pass); + +#endif diff --git a/src/3rdparty/argon2/lib/impl-select.c b/src/3rdparty/argon2/lib/impl-select.c new file mode 100644 index 000000000..84c62aec3 --- /dev/null +++ b/src/3rdparty/argon2/lib/impl-select.c @@ -0,0 +1,120 @@ +#include +#include + +#include "impl-select.h" + +#include "argon2.h" + +#define log_maybe(file, ...) \ + do { \ + if (file) { \ + fprintf(file, __VA_ARGS__); \ + } \ + } while((void)0, 0) + +#define BENCH_SAMPLES 512 +#define BENCH_MEM_BLOCKS 512 + +static argon2_impl selected_argon_impl = { + "(default)", NULL, fill_segment_default +}; + +/* the benchmark routine is not thread-safe, so we can use a global var here: */ +static block memory[BENCH_MEM_BLOCKS]; + +static uint64_t benchmark_impl(const argon2_impl *impl) { + clock_t time; + unsigned int i; + uint64_t bench; + argon2_instance_t instance; + argon2_position_t pos; + + memset(memory, 0, sizeof(memory)); + + instance.version = ARGON2_VERSION_NUMBER; + instance.memory = memory; + instance.passes = 1; + instance.memory_blocks = BENCH_MEM_BLOCKS; + instance.segment_length = BENCH_MEM_BLOCKS / ARGON2_SYNC_POINTS; + instance.lane_length = instance.segment_length * ARGON2_SYNC_POINTS; + instance.lanes = 1; + instance.threads = 1; + instance.type = Argon2_i; + + pos.lane = 0; + pos.pass = 0; + pos.slice = 0; + pos.index = 0; + + /* warm-up cache: */ + impl->fill_segment(&instance, pos); + + /* OK, now measure: */ + bench = 0; + time = clock(); + for (i = 0; i < BENCH_SAMPLES; i++) { + impl->fill_segment(&instance, pos); + } + time = clock() - time; + bench = (uint64_t)time; + return bench; +} + +static void select_impl(FILE *out, const char *prefix) +{ + argon2_impl_list impls; + unsigned int i; + const argon2_impl *best_impl = NULL; + uint64_t best_bench = UINT_MAX; + + log_maybe(out, "%sSelecting best fill_segment implementation...\n", prefix); + + argon2_get_impl_list(&impls); + + for (i = 0; i < impls.count; i++) { + const argon2_impl *impl = &impls.entries[i]; + uint64_t bench; + + log_maybe(out, "%s%s: Checking availability... ", prefix, impl->name); + if (impl->check != NULL && !impl->check()) { + log_maybe(out, "FAILED!\n"); + continue; + } + log_maybe(out, "OK!\n"); + + log_maybe(out, "%s%s: Benchmarking...\n", prefix, impl->name); + bench = benchmark_impl(impl); + log_maybe(out, "%s%s: Benchmark result: %llu\n", prefix, impl->name, + (unsigned long long)bench); + + if (bench < best_bench) { + best_bench = bench; + best_impl = impl; + } + } + + if (best_impl != NULL) { + log_maybe(out, + "%sBest implementation: '%s' (bench %llu)\n", prefix, + best_impl->name, (unsigned long long)best_bench); + + selected_argon_impl = *best_impl; + } else { + log_maybe(out, + "%sNo optimized implementation available, using default!\n", + prefix); + } +} + +void fill_segment(const argon2_instance_t *instance, argon2_position_t position) +{ + selected_argon_impl.fill_segment(instance, position); +} + +void argon2_select_impl(FILE *out, const char *prefix) +{ + if (prefix == NULL) { + prefix = ""; + } + select_impl(out, prefix); +} diff --git a/src/3rdparty/argon2/lib/impl-select.h b/src/3rdparty/argon2/lib/impl-select.h new file mode 100644 index 000000000..e4acbd1fe --- /dev/null +++ b/src/3rdparty/argon2/lib/impl-select.h @@ -0,0 +1,23 @@ +#ifndef ARGON2_IMPL_SELECT_H +#define ARGON2_IMPL_SELECT_H + +#include "core.h" + +typedef struct Argon2_impl { + const char *name; + int (*check)(void); + void (*fill_segment)(const argon2_instance_t *instance, + argon2_position_t position); +} argon2_impl; + +typedef struct Argon2_impl_list { + const argon2_impl *entries; + size_t count; +} argon2_impl_list; + +void argon2_get_impl_list(argon2_impl_list *list); +void fill_segment_default(const argon2_instance_t *instance, + argon2_position_t position); + +#endif // ARGON2_IMPL_SELECT_H + diff --git a/src/3rdparty/argon2/lib/thread.c b/src/3rdparty/argon2/lib/thread.c new file mode 100644 index 000000000..412261f12 --- /dev/null +++ b/src/3rdparty/argon2/lib/thread.c @@ -0,0 +1,36 @@ +#include "thread.h" +#if defined(_WIN32) +#include +#endif + +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args) { + if (NULL == handle || func == NULL) { + return -1; + } +#if defined(_WIN32) + *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); + return *handle != 0 ? 0 : -1; +#else + return pthread_create(handle, NULL, func, args); +#endif +} + +int argon2_thread_join(argon2_thread_handle_t handle) { +#if defined(_WIN32) + if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { + return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; + } + return -1; +#else + return pthread_join(handle, NULL); +#endif +} + +void argon2_thread_exit(void) { +#if defined(_WIN32) + _endthreadex(0); +#else + pthread_exit(NULL); +#endif +} diff --git a/src/3rdparty/argon2/lib/thread.h b/src/3rdparty/argon2/lib/thread.h new file mode 100644 index 000000000..f1ef5191a --- /dev/null +++ b/src/3rdparty/argon2/lib/thread.h @@ -0,0 +1,47 @@ +#ifndef ARGON2_THREAD_H +#define ARGON2_THREAD_H +/* + Here we implement an abstraction layer for the simpĺe requirements + of the Argon2 code. We only require 3 primitives---thread creation, + joining, and termination---so full emulation of the pthreads API + is unwarranted. Currently we wrap pthreads and Win32 threads. + + The API defines 2 types: the function pointer type, + argon2_thread_func_t, + and the type of the thread handle---argon2_thread_handle_t. +*/ +#if defined(_WIN32) +#include +#include +typedef unsigned(__stdcall *argon2_thread_func_t)(void *); +typedef uintptr_t argon2_thread_handle_t; +#else +#include +typedef void *(*argon2_thread_func_t)(void *); +typedef pthread_t argon2_thread_handle_t; +#endif + +/* Creates a thread + * @param handle pointer to a thread handle, which is the output of this + * function. Must not be NULL. + * @param func A function pointer for the thread's entry point. Must not be + * NULL. + * @param args Pointer that is passed as an argument to @func. May be NULL. + * @return 0 if @handle and @func are valid pointers and a thread is successfuly + * created. + */ +int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args); + +/* Waits for a thread to terminate + * @param handle Handle to a thread created with argon2_thread_create. + * @return 0 if @handle is a valid handle, and joining completed successfully. +*/ +int argon2_thread_join(argon2_thread_handle_t handle); + +/* Terminate the current thread. Must be run inside a thread created by + * argon2_thread_create. +*/ +void argon2_thread_exit(void); + +#endif diff --git a/src/backend/common/Workers.cpp b/src/backend/common/Workers.cpp index 036bc8b66..2cb4549b3 100644 --- a/src/backend/common/Workers.cpp +++ b/src/backend/common/Workers.cpp @@ -166,7 +166,7 @@ namespace xmrig { template<> xmrig::IWorker *xmrig::Workers::create(Thread *handle) { - const int intensity = handle->config().intensity; + const uint32_t intensity = static_cast(handle->config().intensity); # if defined(XMRIG_ALGO_RANDOMX) || defined(XMRIG_ALGO_CN_GPU) if (intensity > handle->config().algorithm.maxIntensity()) { diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 14ef1797e..0fc77f8d9 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -156,6 +156,13 @@ bool xmrig::CpuWorker::selfTest() } # endif +# ifdef XMRIG_ALGO_ARGON2 + if (m_algorithm.family() == Algorithm::ARGON2) { + return verify(Algorithm::AR2_CHUKWA, argon2_chukwa_test_out) && + verify(Algorithm::AR2_WRKZ, argon2_wrkz_test_out); + } +# endif + return false; } diff --git a/src/crypto/argon2/Argon2.h b/src/crypto/argon2/Argon2.h new file mode 100644 index 000000000..709be703d --- /dev/null +++ b/src/crypto/argon2/Argon2.h @@ -0,0 +1,67 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#ifndef XMRIG_ARGON2_H +#define XMRIG_ARGON2_H + + +#include "3rdparty/argon2.h" +#include "crypto/common/Algorithm.h" + + +struct cryptonight_ctx; + + +namespace xmrig { + + +template +inline void argon2_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__, uint64_t) +{ +// static bool argon_optimization_selected = false; + +// if (!argon_optimization_selected) { +// argon2_select_impl(stdout, nullptr); + +// argon_optimization_selected = true; +// } + + uint8_t salt[16]; + + memcpy(salt, input, sizeof(salt)); + + if (ALGO == Algorithm::AR2_CHUKWA) { + argon2id_hash_raw(3, 512, 1, input, size, salt, 16, output, 32); + } + else if (ALGO == Algorithm::AR2_WRKZ) { + argon2id_hash_raw(4, 256, 1, input, size, salt, 16, output, 32); + } +} + + +} // namespace xmrig + + +#endif /* XMRIG_ARGON2_H */ diff --git a/src/crypto/cn/CnAlgo.h b/src/crypto/cn/CnAlgo.h index ed64331a9..6564e8d07 100644 --- a/src/crypto/cn/CnAlgo.h +++ b/src/crypto/cn/CnAlgo.h @@ -132,6 +132,10 @@ private: 0, // RX_0 0, // RX_WOW 0, // RX_LOKI +# endif +# ifdef XMRIG_ALGO_ARGON2 + 0, // AR2_CHUKWA + 0, // AR2_WRKZ # endif }; @@ -167,6 +171,10 @@ private: 0, // RX_0 0, // RX_WOW 0, // RX_LOKI +# endif +# ifdef XMRIG_ALGO_ARGON2 + 0, // AR2_CHUKWA + 0, // AR2_WRKZ # endif }; @@ -202,6 +210,10 @@ private: Algorithm::INVALID, // RX_0 Algorithm::INVALID, // RX_WOW Algorithm::INVALID, // RX_LOKI +# endif +# ifdef XMRIG_ALGO_ARGON2 + Algorithm::INVALID, // AR2_CHUKWA + Algorithm::INVALID, // AR2_WRKZ # endif }; }; diff --git a/src/crypto/cn/CnHash.cpp b/src/crypto/cn/CnHash.cpp index a2f8880cf..dcb5a2d0b 100644 --- a/src/crypto/cn/CnHash.cpp +++ b/src/crypto/cn/CnHash.cpp @@ -38,6 +38,11 @@ #endif +#ifdef XMRIG_ALGO_ARGON2 +# include "crypto/argon2/Argon2.h" +#endif + + #define ADD_FN(algo) \ m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash; \ m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash; \ @@ -249,6 +254,13 @@ xmrig::CnHash::CnHash() ADD_FN_ASM(Algorithm::CN_PICO_0); # endif +# ifdef XMRIG_ALGO_ARGON2 + m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2_single_hash; + m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2_single_hash; + m_map[Algorithm::AR2_WRKZ][AV_SINGLE][Assembly::NONE] = argon2_single_hash; + m_map[Algorithm::AR2_WRKZ][AV_SINGLE_SOFT][Assembly::NONE] = argon2_single_hash; +# endif + # ifdef XMRIG_FEATURE_ASM patchAsmVariants(); # endif diff --git a/src/crypto/cn/CryptoNight_test.h b/src/crypto/cn/CryptoNight_test.h index 77cbbb8e1..cc8130b86 100644 --- a/src/crypto/cn/CryptoNight_test.h +++ b/src/crypto/cn/CryptoNight_test.h @@ -30,6 +30,9 @@ #include +namespace xmrig { + + const static uint8_t test_input[380] = { 0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00, 0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B, @@ -355,19 +358,29 @@ const static uint8_t test_output_pico_trtl[160] = { #ifdef XMRIG_ALGO_CN_GPU // "cn/gpu" -const static uint8_t test_output_gpu[160] = { +const static uint8_t test_output_gpu[32] = { 0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7, - 0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF }; #endif +#ifdef XMRIG_ALGO_ARGON2 +// "argon2/chukwa" +const static uint8_t argon2_chukwa_test_out[32] = { + 0xC1, 0x58, 0xA1, 0x05, 0xAE, 0x75, 0xC7, 0x56, 0x1C, 0xFD, 0x02, 0x90, 0x83, 0xA4, 0x7A, 0x87, + 0x65, 0x3D, 0x51, 0xF9, 0x14, 0x12, 0x8E, 0x21, 0xC1, 0x97, 0x1D, 0x8B, 0x10, 0xC4, 0x90, 0x34 +}; + +// "argon2/wrkz" +const static uint8_t argon2_wrkz_test_out[32] = { + 0x35, 0xE0, 0x83, 0xD4, 0xB9, 0xC6, 0x4C, 0x2A, 0x68, 0x82, 0x0A, 0x43, 0x1F, 0x61, 0x31, 0x19, + 0x98, 0xA8, 0xCD, 0x18, 0x64, 0xDB, 0xA4, 0x07, 0x7E, 0x25, 0xB7, 0xF1, 0x21, 0xD5, 0x4B, 0xD1, +}; +#endif + + +} // namespace xmrig + + #endif /* XMRIG_CRYPTONIGHT_TEST_H */ diff --git a/src/crypto/common/Algorithm.cpp b/src/crypto/common/Algorithm.cpp index db6cb234b..d3e3fa891 100644 --- a/src/crypto/common/Algorithm.cpp +++ b/src/crypto/common/Algorithm.cpp @@ -116,30 +116,17 @@ static AlgoName const algorithm_names[] = { { "randomx/loki", "rx/loki", Algorithm::RX_LOKI }, { "RandomXL", nullptr, Algorithm::RX_LOKI }, # endif +# ifdef XMRIG_ALGO_ARGON2 + { "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA }, + { "chukwa", nullptr, Algorithm::AR2_CHUKWA }, + { "argon2/wrkz", nullptr, Algorithm::AR2_WRKZ }, +# endif }; } /* namespace xmrig */ -int xmrig::Algorithm::maxIntensity() const -{ -# ifdef XMRIG_ALGO_RANDOMX - if (family() == RANDOM_X) { - return 1; - } -# endif - -# ifdef XMRIG_ALGO_CN_GPU - if (m_id == CN_GPU) { - return 1; - } -# endif - - return 5; -} - - rapidjson::Value xmrig::Algorithm::toJSON() const { using namespace rapidjson; @@ -170,6 +157,8 @@ size_t xmrig::Algorithm::l2() const size_t xmrig::Algorithm::l3() const { + constexpr size_t oneMiB = 0x100000; + const Family f = family(); assert(f != UNKNOWN); @@ -179,8 +168,6 @@ size_t xmrig::Algorithm::l3() const # ifdef XMRIG_ALGO_RANDOMX if (f == RANDOM_X) { - constexpr size_t oneMiB = 0x100000; - switch (m_id) { case RX_0: case RX_LOKI: @@ -195,10 +182,49 @@ size_t xmrig::Algorithm::l3() const } # endif +# ifdef XMRIG_ALGO_ARGON2 + if (f == ARGON2) { + switch (m_id) { + case AR2_CHUKWA: + return oneMiB / 2; + + case AR2_WRKZ: + return oneMiB / 4; + + default: + break; + } + } +# endif + return 0; } +uint32_t xmrig::Algorithm::maxIntensity() const +{ +# ifdef XMRIG_ALGO_RANDOMX + if (family() == RANDOM_X) { + return 1; + } +# endif + +# ifdef XMRIG_ALGO_ARGON2 + if (family() == ARGON2) { + return 1; + } +# endif + +# ifdef XMRIG_ALGO_CN_GPU + if (m_id == CN_GPU) { + return 1; + } +# endif + + return 5; +} + + xmrig::Algorithm::Family xmrig::Algorithm::family(Id id) { switch (id) { @@ -244,6 +270,12 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id) return RANDOM_X; # endif +# ifdef XMRIG_ALGO_ARGON2 + case AR2_CHUKWA: + case AR2_WRKZ: + return ARGON2; +# endif + case INVALID: case MAX: return UNKNOWN; @@ -273,7 +305,7 @@ const char *xmrig::Algorithm::name(bool shortName) const { for (size_t i = 0; i < ARRAY_SIZE(algorithm_names); i++) { if (algorithm_names[i].id == m_id) { - return shortName ? algorithm_names[i].shortName : algorithm_names[i].name; + return (shortName && algorithm_names[i].shortName) ? algorithm_names[i].shortName : algorithm_names[i].name; } } diff --git a/src/crypto/common/Algorithm.h b/src/crypto/common/Algorithm.h index 92fcc61e7..5226133d8 100644 --- a/src/crypto/common/Algorithm.h +++ b/src/crypto/common/Algorithm.h @@ -72,6 +72,10 @@ public: RX_0, // "rx/0" RandomX (reference configuration). RX_WOW, // "rx/wow" RandomWOW (Wownero). RX_LOKI, // "rx/loki" RandomXL (Loki). +# endif +# ifdef XMRIG_ALGO_ARGON2 + AR2_CHUKWA, // "argon2/chukwa" + AR2_WRKZ, // "argon2/wrkz" # endif MAX }; @@ -82,7 +86,8 @@ public: CN_LITE, CN_HEAVY, CN_PICO, - RANDOM_X + RANDOM_X, + ARGON2 }; inline Algorithm() {} @@ -102,10 +107,10 @@ public: inline bool operator==(const Algorithm &other) const { return isEqual(other); } inline operator Algorithm::Id() const { return m_id; } - int maxIntensity() const; rapidjson::Value toJSON() const; size_t l2() const; size_t l3() const; + uint32_t maxIntensity() const; static Family family(Id id); static Id parse(const char *name); diff --git a/src/crypto/rx/RxAlgo.cpp b/src/crypto/rx/RxAlgo.cpp index b0e92e6e0..daf3ec2da 100644 --- a/src/crypto/rx/RxAlgo.cpp +++ b/src/crypto/rx/RxAlgo.cpp @@ -47,23 +47,3 @@ xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm) return algorithm; } - - -size_t xmrig::RxAlgo::l3(Algorithm::Id algorithm) -{ - switch (algorithm) { - case Algorithm::RX_0: - return RandomX_MoneroConfig.ScratchpadL3_Size; - - case Algorithm::RX_WOW: - return RandomX_WowneroConfig.ScratchpadL3_Size; - - case Algorithm::RX_LOKI: - return RandomX_LokiConfig.ScratchpadL3_Size; - - default: - break; - } - - return 0; -} diff --git a/src/crypto/rx/RxAlgo.h b/src/crypto/rx/RxAlgo.h index dd3f0aa73..95033a9c9 100644 --- a/src/crypto/rx/RxAlgo.h +++ b/src/crypto/rx/RxAlgo.h @@ -35,9 +35,6 @@ #include "crypto/common/Algorithm.h" -struct RandomX_ConfigurationBase; - - namespace xmrig { @@ -46,7 +43,6 @@ class RxAlgo { public: static Algorithm::Id apply(Algorithm::Id algorithm); - static size_t l3(Algorithm::Id algorithm); };