mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 12:09:22 +00:00
Merge pull request #1050 from SChernykh/dev
Integrated RandomX, added RandomXL (Loki)
This commit is contained in:
commit
02410fa084
79 changed files with 9877 additions and 37 deletions
|
@ -161,13 +161,48 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
|
|||
find_package(UV REQUIRED)
|
||||
|
||||
if (WITH_RANDOMX)
|
||||
find_package(RandomX REQUIRED)
|
||||
include_directories(${RANDOMX_INCLUDE_DIR})
|
||||
|
||||
include_directories(src/crypto/randomx)
|
||||
add_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||
set(SOURCES_CRYPTO
|
||||
"${SOURCES_CRYPTO}"
|
||||
src/crypto/randomx/aes_hash.cpp
|
||||
src/crypto/randomx/argon2_ref.c
|
||||
src/crypto/randomx/bytecode_machine.cpp
|
||||
src/crypto/randomx/dataset.cpp
|
||||
src/crypto/randomx/soft_aes.cpp
|
||||
src/crypto/randomx/virtual_memory.cpp
|
||||
src/crypto/randomx/vm_interpreted.cpp
|
||||
src/crypto/randomx/allocator.cpp
|
||||
src/crypto/randomx/randomx.cpp
|
||||
src/crypto/randomx/superscalar.cpp
|
||||
src/crypto/randomx/vm_compiled.cpp
|
||||
src/crypto/randomx/vm_interpreted_light.cpp
|
||||
src/crypto/randomx/argon2_core.c
|
||||
src/crypto/randomx/blake2_generator.cpp
|
||||
src/crypto/randomx/instructions_portable.cpp
|
||||
src/crypto/randomx/reciprocal.c
|
||||
src/crypto/randomx/virtual_machine.cpp
|
||||
src/crypto/randomx/vm_compiled_light.cpp
|
||||
src/crypto/randomx/blake2/blake2b.c
|
||||
)
|
||||
if (NOT ARCH_ID)
|
||||
set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_x86_static.asm
|
||||
src/crypto/randomx/jit_compiler_x86.cpp
|
||||
)
|
||||
elseif (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64" OR ARCH_ID STREQUAL "AMD64")
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_x86_static.S
|
||||
src/crypto/randomx/jit_compiler_x86.cpp
|
||||
)
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
else()
|
||||
set(RANDOMX_LIBRARIES "")
|
||||
|
||||
remove_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||
endif()
|
||||
|
||||
|
@ -241,4 +276,4 @@ if (WITH_DEBUG_LOG)
|
|||
endif()
|
||||
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTP_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${RANDOMX_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
find_path(
|
||||
RANDOMX_INCLUDE_DIR
|
||||
NAMES randomx.h
|
||||
PATHS "${XMRIG_DEPS}" ENV "XMRIG_DEPS"
|
||||
PATH_SUFFIXES "include"
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
find_path(RANDOMX_INCLUDE_DIR NAMES randomx.h)
|
||||
|
||||
find_library(
|
||||
RANDOMX_LIBRARY
|
||||
NAMES librandomx.a randomx librandomx
|
||||
PATHS "${XMRIG_DEPS}" ENV "XMRIG_DEPS"
|
||||
PATH_SUFFIXES "lib"
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
find_library(RANDOMX_LIBRARY NAMES librandomx.a randomx librandomx)
|
||||
|
||||
set(RANDOMX_LIBRARIES ${RANDOMX_LIBRARY})
|
||||
set(RANDOMX_INCLUDE_DIRS ${RANDOMX_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(RANDOMX DEFAULT_MSG RANDOMX_LIBRARY RANDOMX_INCLUDE_DIR)
|
|
@ -17,7 +17,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-strict-aliasing")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fno-exceptions -fno-rtti -Wno-class-memaccess")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-class-memaccess")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s")
|
||||
|
||||
if (XMRIG_ARMv8)
|
||||
|
|
|
@ -518,6 +518,7 @@ void xmrig::Pool::rebuild()
|
|||
addVariant(VARIANT_ZLS);
|
||||
addVariant(VARIANT_DOUBLE);
|
||||
addVariant(VARIANT_RX_WOW);
|
||||
addVariant(VARIANT_RX_LOKI);
|
||||
addVariant(VARIANT_AUTO);
|
||||
# endif
|
||||
}
|
||||
|
|
|
@ -81,6 +81,7 @@ enum Variant {
|
|||
VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius)
|
||||
VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH)
|
||||
VARIANT_RX_WOW = 17, // RandomX (Wownero)
|
||||
VARIANT_RX_LOKI = 18, // RandomX (Loki)
|
||||
VARIANT_MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -72,6 +72,7 @@ static AlgoData const algorithms[] = {
|
|||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
{ "randomx/wow", "rx/wow", xmrig::RANDOM_X, xmrig::VARIANT_RX_WOW },
|
||||
{ "randomx/loki", "rx/loki", xmrig::RANDOM_X, xmrig::VARIANT_RX_LOKI },
|
||||
{ "randomx", "rx", xmrig::RANDOM_X, xmrig::VARIANT_RX_WOW },
|
||||
# endif
|
||||
|
||||
|
@ -145,6 +146,7 @@ static const char *variants[] = {
|
|||
"zls",
|
||||
"double",
|
||||
"rx/wow",
|
||||
"rx/loki",
|
||||
};
|
||||
|
||||
|
||||
|
|
214
src/crypto/randomx/aes_hash.cpp
Normal file
214
src/crypto/randomx/aes_hash.cpp
Normal file
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "soft_aes.h"
|
||||
#include "randomx.h"
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
|
||||
#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c
|
||||
|
||||
#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389
|
||||
#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1
|
||||
|
||||
/*
|
||||
Calculate a 512-bit hash of 'input' using 4 lanes of AES.
|
||||
The input is treated as a set of round keys for the encryption
|
||||
of the initial state.
|
||||
|
||||
'inputSize' must be a multiple of 64.
|
||||
|
||||
For a 2 MiB input, this has the same security as 32768-round
|
||||
AES encryption.
|
||||
|
||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
*/
|
||||
template<bool softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||
const uint8_t* inptr = (uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 in0, in1, in2, in3;
|
||||
|
||||
//intial state
|
||||
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (inptr < inputEnd) {
|
||||
in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
|
||||
in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
|
||||
in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
|
||||
in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
|
||||
|
||||
state0 = aesenc<softAes>(state0, in0);
|
||||
state1 = aesdec<softAes>(state1, in1);
|
||||
state2 = aesenc<softAes>(state2, in2);
|
||||
state3 = aesdec<softAes>(state3, in3);
|
||||
|
||||
inptr += 64;
|
||||
}
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
|
||||
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
|
||||
|
||||
state0 = aesenc<softAes>(state0, xkey0);
|
||||
state1 = aesdec<softAes>(state1, xkey0);
|
||||
state2 = aesenc<softAes>(state2, xkey0);
|
||||
state3 = aesdec<softAes>(state3, xkey0);
|
||||
|
||||
state0 = aesenc<softAes>(state0, xkey1);
|
||||
state1 = aesdec<softAes>(state1, xkey1);
|
||||
state2 = aesenc<softAes>(state2, xkey1);
|
||||
state3 = aesdec<softAes>(state3, xkey1);
|
||||
|
||||
//output hash
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
|
||||
}
|
||||
|
||||
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
|
||||
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553
|
||||
#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07
|
||||
#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1
|
||||
#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135
|
||||
|
||||
/*
|
||||
Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
|
||||
The state is encrypted using a single AES round per 16 bytes of output
|
||||
in 4 lanes.
|
||||
|
||||
'outputSize' must be a multiple of 64.
|
||||
|
||||
The modified state is written back to 'state' to allow multiple
|
||||
calls to this function.
|
||||
*/
|
||||
template<bool softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 key0, key1, key2, key3;
|
||||
|
||||
key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
|
||||
key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
|
||||
key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
|
||||
key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
|
||||
|
||||
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
|
||||
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
|
||||
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
||||
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state0 = aesdec<softAes>(state0, key0);
|
||||
state1 = aesenc<softAes>(state1, key1);
|
||||
state2 = aesdec<softAes>(state2, key2);
|
||||
state3 = aesenc<softAes>(state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
|
||||
}
|
||||
|
||||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
|
||||
|
||||
key0 = RandomX_CurrentConfig.fillAes4Rx4_Key[0];
|
||||
key1 = RandomX_CurrentConfig.fillAes4Rx4_Key[1];
|
||||
key2 = RandomX_CurrentConfig.fillAes4Rx4_Key[2];
|
||||
key3 = RandomX_CurrentConfig.fillAes4Rx4_Key[3];
|
||||
key4 = RandomX_CurrentConfig.fillAes4Rx4_Key[4];
|
||||
key5 = RandomX_CurrentConfig.fillAes4Rx4_Key[5];
|
||||
key6 = RandomX_CurrentConfig.fillAes4Rx4_Key[6];
|
||||
key7 = RandomX_CurrentConfig.fillAes4Rx4_Key[7];
|
||||
|
||||
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
|
||||
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
|
||||
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
||||
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state0 = aesdec<softAes>(state0, key0);
|
||||
state1 = aesenc<softAes>(state1, key0);
|
||||
state2 = aesdec<softAes>(state2, key4);
|
||||
state3 = aesenc<softAes>(state3, key4);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key1);
|
||||
state1 = aesenc<softAes>(state1, key1);
|
||||
state2 = aesdec<softAes>(state2, key5);
|
||||
state3 = aesenc<softAes>(state3, key5);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key2);
|
||||
state1 = aesenc<softAes>(state1, key2);
|
||||
state2 = aesdec<softAes>(state2, key6);
|
||||
state3 = aesenc<softAes>(state3, key6);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key3);
|
||||
state1 = aesenc<softAes>(state1, key3);
|
||||
state2 = aesdec<softAes>(state2, key7);
|
||||
state3 = aesenc<softAes>(state3, key7);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
40
src/crypto/randomx/aes_hash.hpp
Normal file
40
src/crypto/randomx/aes_hash.hpp
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
template<bool softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
60
src/crypto/randomx/allocator.cpp
Normal file
60
src/crypto/randomx/allocator.cpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <new>
|
||||
#include "allocator.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "virtual_memory.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
void* AlignedAllocator<alignment>::allocMemory(size_t count) {
|
||||
void *mem = rx_aligned_alloc(count, alignment);
|
||||
if (mem == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return mem;
|
||||
}
|
||||
|
||||
template<size_t alignment>
|
||||
void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) {
|
||||
rx_aligned_free(ptr);
|
||||
}
|
||||
|
||||
template class AlignedAllocator<CacheLineSize>;
|
||||
|
||||
void* LargePageAllocator::allocMemory(size_t count) {
|
||||
return allocLargePagesMemory(count);
|
||||
}
|
||||
|
||||
void LargePageAllocator::freeMemory(void* ptr, size_t count) {
|
||||
freePagedMemory(ptr, count);
|
||||
};
|
||||
|
||||
}
|
46
src/crypto/randomx/allocator.hpp
Normal file
46
src/crypto/randomx/allocator.hpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
struct AlignedAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
struct LargePageAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
}
|
229
src/crypto/randomx/argon2.h
Normal file
229
src/crypto/randomx/argon2.h
Normal file
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
/*
|
||||
* Argon2 input parameter restrictions
|
||||
*/
|
||||
|
||||
/* Minimum and maximum number of lanes (degree of parallelism) */
|
||||
#define ARGON2_MIN_LANES UINT32_C(1)
|
||||
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of threads */
|
||||
#define ARGON2_MIN_THREADS UINT32_C(1)
|
||||
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Number of synchronization points between lanes per pass */
|
||||
#define ARGON2_SYNC_POINTS UINT32_C(4)
|
||||
|
||||
/* Minimum and maximum digest size in bytes */
|
||||
#define ARGON2_MIN_OUTLEN UINT32_C(4)
|
||||
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
|
||||
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
|
||||
|
||||
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
|
||||
#define ARGON2_MAX_MEMORY_BITS \
|
||||
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
|
||||
#define ARGON2_MAX_MEMORY \
|
||||
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
|
||||
|
||||
/* Minimum and maximum number of passes */
|
||||
#define ARGON2_MIN_TIME UINT32_C(1)
|
||||
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum password length in bytes */
|
||||
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum associated data length in bytes */
|
||||
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum salt length in bytes */
|
||||
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
|
||||
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum key length in bytes */
|
||||
#define ARGON2_MIN_SECRET UINT32_C(0)
|
||||
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Flags to determine which fields are securely wiped (default = no wipe). */
|
||||
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
|
||||
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
|
||||
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
|
||||
|
||||
|
||||
/* Error codes */
|
||||
typedef enum Argon2_ErrorCodes {
|
||||
ARGON2_OK = 0,
|
||||
|
||||
ARGON2_OUTPUT_PTR_NULL = -1,
|
||||
|
||||
ARGON2_OUTPUT_TOO_SHORT = -2,
|
||||
ARGON2_OUTPUT_TOO_LONG = -3,
|
||||
|
||||
ARGON2_PWD_TOO_SHORT = -4,
|
||||
ARGON2_PWD_TOO_LONG = -5,
|
||||
|
||||
ARGON2_SALT_TOO_SHORT = -6,
|
||||
ARGON2_SALT_TOO_LONG = -7,
|
||||
|
||||
ARGON2_AD_TOO_SHORT = -8,
|
||||
ARGON2_AD_TOO_LONG = -9,
|
||||
|
||||
ARGON2_SECRET_TOO_SHORT = -10,
|
||||
ARGON2_SECRET_TOO_LONG = -11,
|
||||
|
||||
ARGON2_TIME_TOO_SMALL = -12,
|
||||
ARGON2_TIME_TOO_LARGE = -13,
|
||||
|
||||
ARGON2_MEMORY_TOO_LITTLE = -14,
|
||||
ARGON2_MEMORY_TOO_MUCH = -15,
|
||||
|
||||
ARGON2_LANES_TOO_FEW = -16,
|
||||
ARGON2_LANES_TOO_MANY = -17,
|
||||
|
||||
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
|
||||
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
|
||||
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
|
||||
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
|
||||
|
||||
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
|
||||
|
||||
ARGON2_FREE_MEMORY_CBK_NULL = -23,
|
||||
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
|
||||
|
||||
ARGON2_INCORRECT_PARAMETER = -25,
|
||||
ARGON2_INCORRECT_TYPE = -26,
|
||||
|
||||
ARGON2_OUT_PTR_MISMATCH = -27,
|
||||
|
||||
ARGON2_THREADS_TOO_FEW = -28,
|
||||
ARGON2_THREADS_TOO_MANY = -29,
|
||||
|
||||
ARGON2_MISSING_ARGS = -30,
|
||||
|
||||
ARGON2_ENCODING_FAIL = -31,
|
||||
|
||||
ARGON2_DECODING_FAIL = -32,
|
||||
|
||||
ARGON2_THREAD_FAIL = -33,
|
||||
|
||||
ARGON2_DECODING_LENGTH_FAIL = -34,
|
||||
|
||||
ARGON2_VERIFY_MISMATCH = -35
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
|
||||
typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
|
||||
|
||||
/* Argon2 external data structures */
|
||||
|
||||
/*
|
||||
*****
|
||||
* Context: structure to hold Argon2 inputs:
|
||||
* output array and its length,
|
||||
* password and its length,
|
||||
* salt and its length,
|
||||
* secret and its length,
|
||||
* associated data and its length,
|
||||
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
|
||||
* number of parallel threads that will be run.
|
||||
* All the parameters above affect the output hash value.
|
||||
* Additionally, two function pointers can be provided to allocate and
|
||||
* deallocate the memory (if NULL, memory will be allocated internally).
|
||||
* Also, three flags indicate whether to erase password, secret as soon as they
|
||||
* are pre-hashed (and thus not needed anymore), and the entire memory
|
||||
*****
|
||||
* Simplest situation: you have output array out[8], password is stored in
|
||||
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
|
||||
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
|
||||
* 4 parallel lanes.
|
||||
* You want to erase the password, but you're OK with last pass not being
|
||||
* erased. You want to use the default memory allocator.
|
||||
* Then you initialize:
|
||||
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
|
||||
*/
|
||||
typedef struct Argon2_Context {
|
||||
uint8_t *out; /* output array */
|
||||
uint32_t outlen; /* digest length */
|
||||
|
||||
uint8_t *pwd; /* password array */
|
||||
uint32_t pwdlen; /* password length */
|
||||
|
||||
uint8_t *salt; /* salt array */
|
||||
uint32_t saltlen; /* salt length */
|
||||
|
||||
uint8_t *secret; /* key array */
|
||||
uint32_t secretlen; /* key length */
|
||||
|
||||
uint8_t *ad; /* associated data array */
|
||||
uint32_t adlen; /* associated data length */
|
||||
|
||||
uint32_t t_cost; /* number of passes */
|
||||
uint32_t m_cost; /* amount of memory requested (KB) */
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
uint32_t version; /* version number */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
uint32_t flags; /* array of bool options */
|
||||
} argon2_context;
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0,
|
||||
Argon2_i = 1,
|
||||
Argon2_id = 2
|
||||
} argon2_type;
|
||||
|
||||
/* Version of the algorithm */
|
||||
typedef enum Argon2_version {
|
||||
ARGON2_VERSION_10 = 0x10,
|
||||
ARGON2_VERSION_13 = 0x13,
|
||||
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
|
||||
} argon2_version;
|
516
src/crypto/randomx/argon2_core.c
Normal file
516
src/crypto/randomx/argon2_core.c
Normal file
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
/*For memory wiping*/
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <winbase.h> /* For SecureZeroMemory */
|
||||
#endif
|
||||
#if defined __STDC_LIB_EXT1__
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "argon2_core.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
|
||||
#ifdef GENKAT
|
||||
#include "genkat.h"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define GCC_VERSION \
|
||||
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION >= 40400
|
||||
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef NOT_OPTIMIZED
|
||||
#define NOT_OPTIMIZED
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
void rxa2_init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
void rxa2_copy_block(block *dst, const block *src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
void rxa2_xor_block(block *dst, const block *src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/***************Memory functions*****************/
|
||||
|
||||
int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num * size;
|
||||
if (memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 1. Check for multiplication overflow */
|
||||
if (size != 0 && memory_size / size != num) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
/* 2. Try to allocate with appropriate allocator */
|
||||
if (context->allocate_cbk) {
|
||||
(context->allocate_cbk)(memory, memory_size);
|
||||
}
|
||||
else {
|
||||
*memory = (uint8_t*)malloc(memory_size);
|
||||
}
|
||||
|
||||
if (*memory == NULL) {
|
||||
return ARGON2_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void rxa2_free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size) {
|
||||
size_t memory_size = num * size;
|
||||
rxa2_clear_internal_memory(memory, memory_size);
|
||||
if (context->free_cbk) {
|
||||
(context->free_cbk)(memory, memory_size);
|
||||
}
|
||||
else {
|
||||
free(memory);
|
||||
}
|
||||
}
|
||||
|
||||
void NOT_OPTIMIZED rxa2_secure_wipe_memory(void *v, size_t n) {
|
||||
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
|
||||
SecureZeroMemory(v, n);
|
||||
#elif defined memset_s
|
||||
memset_s(v, n, 0, n);
|
||||
#elif defined(__OpenBSD__)
|
||||
explicit_bzero(v, n);
|
||||
#else
|
||||
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
|
||||
memset_sec(v, 0, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Memory clear flag defaults to true. */
|
||||
#define FLAG_clear_internal_memory 0
|
||||
void rxa2_clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
rxa2_secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rxa2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
/*
|
||||
* Pass 0:
|
||||
* This lane : all already finished segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : all already finished segments
|
||||
* Pass 1+:
|
||||
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : (SYNC_POINTS - 1) last segments
|
||||
*/
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size =
|
||||
position->index - 1; /* all but the previous */
|
||||
}
|
||||
else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
position->index - 1;
|
||||
}
|
||||
else {
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Second pass */
|
||||
if (same_lane) {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length + position->index -
|
||||
1;
|
||||
}
|
||||
else {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 -
|
||||
(reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||
? 0
|
||||
: (position->slice + 1) * instance->segment_length;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) %
|
||||
instance->lane_length; /* absolute position */
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
/* Single-threaded version for p=1 case */
|
||||
static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
uint32_t r, s, l;
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position = { r, l, (uint8_t)s, 0 };
|
||||
rxa2_fill_segment(instance, position);
|
||||
}
|
||||
}
|
||||
#ifdef GENKAT
|
||||
internal_kat(instance, r); /* Print all memory blocks */
|
||||
#endif
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int rxa2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
if (instance == NULL || instance->lanes == 0) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
return fill_memory_blocks_st(instance);
|
||||
}
|
||||
|
||||
int rxa2_validate_inputs(const argon2_context *context) {
|
||||
if (NULL == context) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
if (NULL == context->out) {
|
||||
return ARGON2_OUTPUT_PTR_NULL;
|
||||
}
|
||||
|
||||
/* Validate output length */
|
||||
if (ARGON2_MIN_OUTLEN > context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_OUTLEN < context->outlen) {
|
||||
return ARGON2_OUTPUT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate password (required param) */
|
||||
if (NULL == context->pwd) {
|
||||
if (0 != context->pwdlen) {
|
||||
return ARGON2_PWD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate salt (required param) */
|
||||
if (NULL == context->salt) {
|
||||
if (0 != context->saltlen) {
|
||||
return ARGON2_SALT_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
|
||||
return ARGON2_SALT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate secret (optional param) */
|
||||
if (NULL == context->secret) {
|
||||
if (0 != context->secretlen) {
|
||||
return ARGON2_SECRET_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ARGON2_MIN_SECRET > context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_SECRET < context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate associated data (optional param) */
|
||||
if (NULL == context->ad) {
|
||||
if (0 != context->adlen) {
|
||||
return ARGON2_AD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
|
||||
return ARGON2_AD_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
|
||||
return ARGON2_AD_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate memory cost */
|
||||
if (ARGON2_MIN_MEMORY > context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_MEMORY < context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_MUCH;
|
||||
}
|
||||
|
||||
if (context->m_cost < 8 * context->lanes) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
/* Validate time cost */
|
||||
if (ARGON2_MIN_TIME > context->t_cost) {
|
||||
return ARGON2_TIME_TOO_SMALL;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_TIME < context->t_cost) {
|
||||
return ARGON2_TIME_TOO_LARGE;
|
||||
}
|
||||
|
||||
/* Validate lanes */
|
||||
if (ARGON2_MIN_LANES > context->lanes) {
|
||||
return ARGON2_LANES_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_LANES < context->lanes) {
|
||||
return ARGON2_LANES_TOO_MANY;
|
||||
}
|
||||
|
||||
/* Validate threads */
|
||||
if (ARGON2_MIN_THREADS > context->threads) {
|
||||
return ARGON2_THREADS_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_THREADS < context->threads) {
|
||||
return ARGON2_THREADS_TOO_MANY;
|
||||
}
|
||||
|
||||
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
|
||||
return ARGON2_FREE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
|
||||
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
uint32_t l;
|
||||
/* Make the first and second block in each lane as G(H0||0||i) or
|
||||
G(H0||1||i) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||
rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 0],
|
||||
blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
|
||||
ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 1],
|
||||
blockhash_bytes);
|
||||
}
|
||||
rxa2_clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
|
||||
blake2b_state BlakeHash;
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
if (NULL == context || NULL == blockhash) {
|
||||
return;
|
||||
}
|
||||
|
||||
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
store32(&value, context->lanes);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->outlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->m_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->version);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
|
||||
rxa2_secure_wipe_memory(context->pwd, context->pwdlen);
|
||||
context->pwdlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->salt != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen);
|
||||
}
|
||||
|
||||
store32(&value, context->secretlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->secret != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
|
||||
context->secretlen);
|
||||
|
||||
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
|
||||
rxa2_secure_wipe_memory(context->secret, context->secretlen);
|
||||
context->secretlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
store32(&value, context->adlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->ad != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
|
||||
context->adlen);
|
||||
}
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
int result = ARGON2_OK;
|
||||
|
||||
if (instance == NULL || context == NULL)
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
instance->context_ptr = context;
|
||||
|
||||
/* 1. Memory allocation */
|
||||
/*result = allocate_memory(context, (uint8_t **)&(instance->memory), instance->memory_blocks, sizeof(block));
|
||||
if (result != ARGON2_OK) {
|
||||
return result;
|
||||
}*/
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
|
||||
/* Hashing all inputs */
|
||||
rxa2_initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
rxa2_fill_first_blocks(blockhash, instance);
|
||||
/* Clearing the hash */
|
||||
rxa2_clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
254
src/crypto/randomx/argon2_core.h
Normal file
254
src/crypto/randomx/argon2_core.h
Normal file
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_CORE_H
|
||||
#define ARGON2_CORE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "argon2.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CONST_CAST(x) (x)(uintptr_t)
|
||||
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
|
||||
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
generate reference block positions */
|
||||
ARGON2_ADDRESSES_IN_BLOCK = 128,
|
||||
|
||||
/* Pre-hashing digest length and its extension*/
|
||||
ARGON2_PREHASH_DIGEST_LENGTH = 64,
|
||||
ARGON2_PREHASH_SEED_LENGTH = 72
|
||||
};
|
||||
|
||||
/*************************Argon2 internal data types***********************/
|
||||
|
||||
/*
|
||||
* Structure for the (1KB) memory block implemented as 128 64-bit words.
|
||||
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
|
||||
* bounds checking).
|
||||
*/
|
||||
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
|
||||
|
||||
/*****************Functions that work with the block******************/
|
||||
|
||||
/* Initialize each byte of the block with @in */
|
||||
void rxa2_init_block_value(block *b, uint8_t in);
|
||||
|
||||
/* Copy block @src to block @dst */
|
||||
void rxa2_copy_block(block *dst, const block *src);
|
||||
|
||||
/* XOR @src onto @dst bytewise */
|
||||
void rxa2_xor_block(block *dst, const block *src);
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
* Used to evaluate the number and location of blocks to construct in each
|
||||
* thread
|
||||
*/
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
uint32_t version;
|
||||
uint32_t passes; /* Number of passes */
|
||||
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
argon2_context *context_ptr; /* points back to original context */
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
/*Struct that holds the inputs for thread handling FillSegment*/
|
||||
typedef struct Argon2_thread_data {
|
||||
argon2_instance_t *instance_ptr;
|
||||
argon2_position_t pos;
|
||||
} argon2_thread_data;
|
||||
|
||||
/*************************Argon2 core functions********************************/
|
||||
|
||||
/* Allocates memory to the given pointer, uses the appropriate allocator as
|
||||
* specified in the context. Total allocated memory is num*size.
|
||||
* @param context argon2_context which specifies the allocator
|
||||
* @param memory pointer to the pointer to the memory
|
||||
* @param size the size in bytes for each element to be allocated
|
||||
* @param num the number of elements to be allocated
|
||||
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
|
||||
*/
|
||||
int rxa2_allocate_memory(const argon2_context *context, uint8_t **memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/*
|
||||
* Frees memory at the given pointer, uses the appropriate deallocator as
|
||||
* specified in the context. Also cleans the memory using clear_internal_memory.
|
||||
* @param context argon2_context which specifies the deallocator
|
||||
* @param memory pointer to buffer to be freed
|
||||
* @param size the size in bytes for each element to be deallocated
|
||||
* @param num the number of elements to be deallocated
|
||||
*/
|
||||
void rxa2_free_memory(const argon2_context *context, uint8_t *memory,
|
||||
size_t num, size_t size);
|
||||
|
||||
/* Function that securely cleans the memory. This ignores any flags set
|
||||
* regarding clearing memory. Usually one just calls clear_internal_memory.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void rxa2_secure_wipe_memory(void *v, size_t n);
|
||||
|
||||
/* Function that securely clears the memory if FLAG_clear_internal_memory is
|
||||
* set. If the flag isn't set, this function does nothing.
|
||||
* @param mem Pointer to the memory
|
||||
* @param s Memory size in bytes
|
||||
*/
|
||||
void rxa2_clear_internal_memory(void *v, size_t n);
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
|
||||
* @param same_lane Indicates if the block will be taken from the current lane.
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t rxa2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
/*
|
||||
* Function that validates all inputs against predefined restrictions and return
|
||||
* an error code
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int rxa2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
|
||||
* password and secret if needed
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param blockhash Buffer for pre-hashing digest
|
||||
* @param type Argon2 type
|
||||
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
|
||||
* allocated
|
||||
*/
|
||||
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context,
|
||||
argon2_type type);
|
||||
|
||||
/*
|
||||
* Function creates first 2 blocks per lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @param blockhash Pointer to the pre-hashing digest
|
||||
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
|
||||
*/
|
||||
void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
|
||||
* initialized
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param instance Current Argon2 instance
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int rxa2_argon_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
* the memory.
|
||||
* @param context Pointer to current Argon2 context (use only the out parameters
|
||||
* from it)
|
||||
* @param instance Pointer to current instance of Argon2
|
||||
* @pre instance->state must point to necessary amount of memory
|
||||
* @pre context->out must point to outlen bytes of memory
|
||||
* @pre if context->free_cbk is not NULL, it should point to a function that
|
||||
* deallocates memory
|
||||
*/
|
||||
void rxa2_finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void rxa2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @return ARGON2_OK if successful, @context->state
|
||||
*/
|
||||
int rxa2_fill_memory_blocks(argon2_instance_t *instance);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
214
src/crypto/randomx/argon2_ref.c
Normal file
214
src/crypto/randomx/argon2_ref.c
Normal file
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-ref.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
* @next_block must be initialized.
|
||||
* @param prev_block Pointer to the previous block
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
static void fill_block(const block *prev_block, const block *ref_block,
|
||||
block *next_block, int with_xor) {
|
||||
block blockR, block_tmp;
|
||||
unsigned i;
|
||||
|
||||
rxa2_copy_block(&blockR, ref_block);
|
||||
rxa2_xor_block(&blockR, prev_block);
|
||||
rxa2_copy_block(&block_tmp, &blockR);
|
||||
/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
|
||||
if (with_xor) {
|
||||
/* Saving the next block contents for XOR over: */
|
||||
rxa2_xor_block(&block_tmp, next_block);
|
||||
/* Now blockR = ref_block + prev_block and
|
||||
block_tmp = ref_block + prev_block + next_block */
|
||||
}
|
||||
|
||||
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
|
||||
(16,17,..31)... finally (112,113,...127) */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||
blockR.v[16 * i + 15]);
|
||||
}
|
||||
|
||||
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
|
||||
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
|
||||
for (i = 0; i < 8; i++) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||
blockR.v[2 * i + 113]);
|
||||
}
|
||||
|
||||
rxa2_copy_block(next_block, &block_tmp);
|
||||
rxa2_xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
static void next_addresses(block *address_block, block *input_block,
|
||||
const block *zero_block) {
|
||||
input_block->v[6]++;
|
||||
fill_block(zero_block, input_block, address_block, 0);
|
||||
fill_block(zero_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void rxa2_fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block, zero_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index;
|
||||
uint32_t i;
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
data_independent_addressing =
|
||||
(instance->type == Argon2_i) ||
|
||||
(instance->type == Argon2_id && (position.pass == 0) &&
|
||||
(position.slice < ARGON2_SYNC_POINTS / 2));
|
||||
|
||||
if (data_independent_addressing) {
|
||||
rxa2_init_block_value(&zero_block, 0);
|
||||
rxa2_init_block_value(&input_block, 0);
|
||||
|
||||
input_block.v[0] = position.pass;
|
||||
input_block.v[1] = position.lane;
|
||||
input_block.v[2] = position.slice;
|
||||
input_block.v[3] = instance->memory_blocks;
|
||||
input_block.v[4] = instance->passes;
|
||||
input_block.v[5] = instance->type;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
}
|
||||
else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = rxa2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(instance->memory + prev_offset, ref_block,
|
||||
curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(instance->memory + prev_offset, ref_block,
|
||||
curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
10
src/crypto/randomx/asm/program_epilogue_linux.inc
Normal file
10
src/crypto/randomx/asm/program_epilogue_linux.inc
Normal file
|
@ -0,0 +1,10 @@
|
|||
;# restore callee-saved registers - System V AMD64 ABI
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
|
||||
;# program finished
|
||||
ret 0
|
19
src/crypto/randomx/asm/program_epilogue_store.inc
Normal file
19
src/crypto/randomx/asm/program_epilogue_store.inc
Normal file
|
@ -0,0 +1,19 @@
|
|||
;# save VM register values
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
movdqa xmmword ptr [rcx+64], xmm0
|
||||
movdqa xmmword ptr [rcx+80], xmm1
|
||||
movdqa xmmword ptr [rcx+96], xmm2
|
||||
movdqa xmmword ptr [rcx+112], xmm3
|
||||
lea rcx, [rcx+64]
|
||||
movdqa xmmword ptr [rcx+64], xmm4
|
||||
movdqa xmmword ptr [rcx+80], xmm5
|
||||
movdqa xmmword ptr [rcx+96], xmm6
|
||||
movdqa xmmword ptr [rcx+112], xmm7
|
24
src/crypto/randomx/asm/program_epilogue_win64.inc
Normal file
24
src/crypto/randomx/asm/program_epilogue_win64.inc
Normal file
|
@ -0,0 +1,24 @@
|
|||
;# restore callee-saved registers - Microsoft x64 calling convention
|
||||
movdqu xmm15, xmmword ptr [rsp]
|
||||
movdqu xmm14, xmmword ptr [rsp+16]
|
||||
movdqu xmm13, xmmword ptr [rsp+32]
|
||||
movdqu xmm12, xmmword ptr [rsp+48]
|
||||
movdqu xmm11, xmmword ptr [rsp+64]
|
||||
add rsp, 80
|
||||
movdqu xmm10, xmmword ptr [rsp]
|
||||
movdqu xmm9, xmmword ptr [rsp+16]
|
||||
movdqu xmm8, xmmword ptr [rsp+32]
|
||||
movdqu xmm7, xmmword ptr [rsp+48]
|
||||
movdqu xmm6, xmmword ptr [rsp+64]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
|
||||
;# program finished
|
||||
ret
|
32
src/crypto/randomx/asm/program_loop_load.inc
Normal file
32
src/crypto/randomx/asm/program_loop_load.inc
Normal file
|
@ -0,0 +1,32 @@
|
|||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
andps xmm4, xmm13
|
||||
andps xmm5, xmm13
|
||||
andps xmm6, xmm13
|
||||
andps xmm7, xmm13
|
||||
orps xmm4, xmm14
|
||||
orps xmm5, xmm14
|
||||
orps xmm6, xmm14
|
||||
orps xmm7, xmm14
|
19
src/crypto/randomx/asm/program_loop_store.inc
Normal file
19
src/crypto/randomx/asm/program_loop_store.inc
Normal file
|
@ -0,0 +1,19 @@
|
|||
xor eax, eax
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rcx
|
||||
xorpd xmm0, xmm4
|
||||
xorpd xmm1, xmm5
|
||||
xorpd xmm2, xmm6
|
||||
xorpd xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
34
src/crypto/randomx/asm/program_prologue_linux.inc
Normal file
34
src/crypto/randomx/asm/program_prologue_linux.inc
Normal file
|
@ -0,0 +1,34 @@
|
|||
;# callee-saved registers - System V AMD64 ABI
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
;# function arguments
|
||||
mov rbx, rcx ;# loop counter
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rcx, rdi
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
mov rsi, rdx ;# uint8_t* scratchpad
|
||||
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
47
src/crypto/randomx/asm/program_prologue_win64.inc
Normal file
47
src/crypto/randomx/asm/program_prologue_win64.inc
Normal file
|
@ -0,0 +1,47 @@
|
|||
;# callee-saved registers - Microsoft x64 calling convention
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm6
|
||||
movdqu xmmword ptr [rsp+48], xmm7
|
||||
movdqu xmmword ptr [rsp+32], xmm8
|
||||
movdqu xmmword ptr [rsp+16], xmm9
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm11
|
||||
movdqu xmmword ptr [rsp+48], xmm12
|
||||
movdqu xmmword ptr [rsp+32], xmm13
|
||||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
mov rsi, r8 ;# uint8_t* scratchpad
|
||||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
17
src/crypto/randomx/asm/program_read_dataset.inc
Normal file
17
src/crypto/randomx/asm/program_read_dataset.inc
Normal file
|
@ -0,0 +1,17 @@
|
|||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
|
10
src/crypto/randomx/asm/program_read_dataset_sshash_fin.inc
Normal file
10
src/crypto/randomx/asm/program_read_dataset_sshash_fin.inc
Normal file
|
@ -0,0 +1,10 @@
|
|||
mov rbx, qword ptr [rsp+64]
|
||||
xor r8, qword ptr [rsp+56]
|
||||
xor r9, qword ptr [rsp+48]
|
||||
xor r10, qword ptr [rsp+40]
|
||||
xor r11, qword ptr [rsp+32]
|
||||
xor r12, qword ptr [rsp+24]
|
||||
xor r13, qword ptr [rsp+16]
|
||||
xor r14, qword ptr [rsp+8]
|
||||
xor r15, qword ptr [rsp+0]
|
||||
add rsp, 72
|
17
src/crypto/randomx/asm/program_read_dataset_sshash_init.inc
Normal file
17
src/crypto/randomx/asm/program_read_dataset_sshash_init.inc
Normal file
|
@ -0,0 +1,17 @@
|
|||
sub rsp, 72
|
||||
mov qword ptr [rsp+64], rbx
|
||||
mov qword ptr [rsp+56], r8
|
||||
mov qword ptr [rsp+48], r9
|
||||
mov qword ptr [rsp+40], r10
|
||||
mov qword ptr [rsp+32], r11
|
||||
mov qword ptr [rsp+24], r12
|
||||
mov qword ptr [rsp+16], r13
|
||||
mov qword ptr [rsp+8], r14
|
||||
mov qword ptr [rsp+0], r15
|
||||
xor rbp, rax ;# modify "mx"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov ebx, ebp ;# ecx = ma
|
||||
and ebx, RANDOMX_DATASET_BASE_MASK
|
||||
shr ebx, 6 ;# ebx = Dataset block number
|
||||
;# add ebx, datasetOffset / 64
|
||||
;# call 32768
|
24
src/crypto/randomx/asm/program_sshash_constants.inc
Normal file
24
src/crypto/randomx/asm/program_sshash_constants.inc
Normal file
|
@ -0,0 +1,24 @@
|
|||
r0_mul:
|
||||
;#/ 6364136223846793005
|
||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||
r1_add:
|
||||
;#/ 9298411001130361340
|
||||
db 252, 161, 245, 89, 138, 151, 10, 129
|
||||
r2_add:
|
||||
;#/ 12065312585734608966
|
||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||
r3_add:
|
||||
;#/ 9306329213124626780
|
||||
db 92, 73, 34, 191, 28, 185, 38, 129
|
||||
r4_add:
|
||||
;#/ 5281919268842080866
|
||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||
r5_add:
|
||||
;#/ 10536153434571861004
|
||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||
r6_add:
|
||||
;#/ 3398623926847679864
|
||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||
r7_add:
|
||||
;#/ 9549104520008361294
|
||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
8
src/crypto/randomx/asm/program_sshash_load.inc
Normal file
8
src/crypto/randomx/asm/program_sshash_load.inc
Normal file
|
@ -0,0 +1,8 @@
|
|||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
4
src/crypto/randomx/asm/program_sshash_prefetch.inc
Normal file
4
src/crypto/randomx/asm/program_sshash_prefetch.inc
Normal file
|
@ -0,0 +1,4 @@
|
|||
and rbx, RANDOMX_CACHE_MASK
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
6
src/crypto/randomx/asm/program_xmm_constants.inc
Normal file
6
src/crypto/randomx/asm/program_xmm_constants.inc
Normal file
|
@ -0,0 +1,6 @@
|
|||
mantissaMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
|
||||
exp240:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
scaleMask:
|
||||
db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128
|
7
src/crypto/randomx/asm/randomx_reciprocal.inc
Normal file
7
src/crypto/randomx/asm/randomx_reciprocal.inc
Normal file
|
@ -0,0 +1,7 @@
|
|||
mov edx, 1
|
||||
mov r8, rcx
|
||||
xor eax, eax
|
||||
bsr rcx, rcx
|
||||
shl rdx, cl
|
||||
div r8
|
||||
ret
|
76
src/crypto/randomx/blake2/blake2-impl.h
Normal file
76
src/crypto/randomx/blake2/blake2-impl.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_IMPL_H
|
||||
#define PORTABLE_BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "endian.h"
|
||||
|
||||
static FORCE_INLINE uint64_t load48(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store48(void *dst, uint64_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
#endif
|
107
src/crypto/randomx/blake2/blake2.h
Normal file
107
src/crypto/randomx/blake2/blake2.h
Normal file
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_H
|
||||
#define PORTABLE_BLAKE2_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant {
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
/* Ensure param structs have not been wrongly padded */
|
||||
/* Poor man's static_assert */
|
||||
enum {
|
||||
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
|
||||
blake2_size_check_2 =
|
||||
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
|
||||
};
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
409
src/crypto/randomx/blake2/blake2b.c
Normal file
409
src/crypto/randomx/blake2/blake2b.c
Normal file
|
@ -0,0 +1,409 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S,
|
||||
uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) {
|
||||
//clear_internal_memory(S, sizeof(*S)); /* wipe */
|
||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_init0(blake2b_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||
}
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
|
||||
if (NULL == P || NULL == S) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_init0(S);
|
||||
/* IV XOR Parameter Block */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for unkeyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = 0;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
return blake2b_init_param(S, &P);
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for keyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = (uint8_t)keylen;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
if (blake2b_init_param(S, &P) < 0) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||
/* Burn the key from stack */
|
||||
//clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
m[i] = load64(block + i * sizeof(m[i]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[8] = blake2b_IV[0];
|
||||
v[9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
for (r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
if (inlen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sanity check */
|
||||
if (S == NULL || in == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||
/* Complete current block */
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy(&S->buf[S->buflen], pin, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
|
||||
unsigned int i;
|
||||
|
||||
/* Sanity checks */
|
||||
if (S == NULL || out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
memcpy(out, buffer, S->outlen);
|
||||
//clear_internal_memory(buffer, sizeof(buffer));
|
||||
//clear_internal_memory(S->buf, sizeof(S->buf));
|
||||
//clear_internal_memory(S->h, sizeof(S->h));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
//clear_internal_memory(&S, sizeof(S));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
|
||||
int ret = -1;
|
||||
|
||||
if (outlen > UINT32_MAX) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Ensure little-endian byte order! */
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(blake2b_init(&blake_state, outlen));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out, outlen));
|
||||
}
|
||||
else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
//clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
||||
|
73
src/crypto/randomx/blake2/blamka-round-ref.h
Normal file
73
src/crypto/randomx/blake2/blamka-round-ref.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_H
|
||||
#define BLAKE_ROUND_MKA_H
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/* designed by the Lyra PHC team */
|
||||
static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
#define G(a, b, c, d) \
|
||||
do { \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
||||
v12, v13, v14, v15) \
|
||||
do { \
|
||||
G(v0, v4, v8, v12); \
|
||||
G(v1, v5, v9, v13); \
|
||||
G(v2, v6, v10, v14); \
|
||||
G(v3, v7, v11, v15); \
|
||||
G(v0, v5, v10, v15); \
|
||||
G(v1, v6, v11, v12); \
|
||||
G(v2, v7, v8, v13); \
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
107
src/crypto/randomx/blake2/endian.h
Normal file
107
src/crypto/randomx/blake2/endian.h
Normal file
|
@ -0,0 +1,107 @@
|
|||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_INLINE __inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define FORCE_INLINE __inline__
|
||||
#else
|
||||
#define FORCE_INLINE
|
||||
#endif
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
/*
|
||||
Not an exhaustive list, but should cover the majority of modern platforms
|
||||
Additionally, the code will always be correct---this is only a performance
|
||||
tweak.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER__) && \
|
||||
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
|
||||
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
|
||||
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
|
||||
defined(_M_ARM)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
static FORCE_INLINE uint32_t load32(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64_native(const void *src) {
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return load64_native(src);
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
w |= (uint64_t)(*p++) << 48;
|
||||
w |= (uint64_t)(*p++) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
|
||||
memcpy(dst, &w, sizeof w);
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
store64_native(dst, w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
62
src/crypto/randomx/blake2_generator.cpp
Normal file
62
src/crypto/randomx/blake2_generator.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/endian.h"
|
||||
#include "blake2_generator.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
constexpr int maxSeedSize = 60;
|
||||
|
||||
Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) {
|
||||
memset(data, 0, sizeof(data));
|
||||
memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize);
|
||||
store32(&data[maxSeedSize], nonce);
|
||||
}
|
||||
|
||||
uint8_t Blake2Generator::getByte() {
|
||||
checkData(1);
|
||||
return data[dataIndex++];
|
||||
}
|
||||
|
||||
uint32_t Blake2Generator::getInt32() {
|
||||
checkData(4);
|
||||
auto ret = load32(&data[dataIndex]);
|
||||
dataIndex += 4;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
dataIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
46
src/crypto/randomx/blake2_generator.hpp
Normal file
46
src/crypto/randomx/blake2_generator.hpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
Blake2Generator(const void* seed, size_t seedSize, int nonce = 0);
|
||||
uint8_t getByte();
|
||||
uint32_t getInt32();
|
||||
private:
|
||||
void checkData(const size_t);
|
||||
|
||||
uint8_t data[64];
|
||||
size_t dataIndex;
|
||||
};
|
||||
}
|
482
src/crypto/randomx/bytecode_machine.cpp
Normal file
482
src/crypto/randomx/bytecode_machine.cpp
Normal file
|
@ -0,0 +1,482 @@
|
|||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "bytecode_machine.hpp"
|
||||
#include "reciprocal.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
const int_reg_t BytecodeMachine::zero = 0;
|
||||
|
||||
#define INSTR_CASE(x) case InstructionType::x: \
|
||||
exe_ ## x(ibc, pc, scratchpad, config); \
|
||||
break;
|
||||
|
||||
void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
|
||||
switch (ibc.type)
|
||||
{
|
||||
INSTR_CASE(IADD_RS)
|
||||
INSTR_CASE(IADD_M)
|
||||
INSTR_CASE(ISUB_R)
|
||||
INSTR_CASE(ISUB_M)
|
||||
INSTR_CASE(IMUL_R)
|
||||
INSTR_CASE(IMUL_M)
|
||||
INSTR_CASE(IMULH_R)
|
||||
INSTR_CASE(IMULH_M)
|
||||
INSTR_CASE(ISMULH_R)
|
||||
INSTR_CASE(ISMULH_M)
|
||||
INSTR_CASE(INEG_R)
|
||||
INSTR_CASE(IXOR_R)
|
||||
INSTR_CASE(IXOR_M)
|
||||
INSTR_CASE(IROR_R)
|
||||
INSTR_CASE(IROL_R)
|
||||
INSTR_CASE(ISWAP_R)
|
||||
INSTR_CASE(FSWAP_R)
|
||||
INSTR_CASE(FADD_R)
|
||||
INSTR_CASE(FADD_M)
|
||||
INSTR_CASE(FSUB_R)
|
||||
INSTR_CASE(FSUB_M)
|
||||
INSTR_CASE(FSCAL_R)
|
||||
INSTR_CASE(FMUL_R)
|
||||
INSTR_CASE(FDIV_M)
|
||||
INSTR_CASE(FSQRT_R)
|
||||
INSTR_CASE(CBRANCH)
|
||||
INSTR_CASE(CFROUND)
|
||||
INSTR_CASE(ISTORE)
|
||||
|
||||
case InstructionType::NOP:
|
||||
break;
|
||||
|
||||
case InstructionType::IMUL_RCP: //executed as IMUL_R
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
|
||||
int opcode = instr.opcode;
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_RS;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (dst != RegisterNeedsDisplacement) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.shift = instr.getModShift();
|
||||
ibc.imm = 0;
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.shift = instr.getModShift();
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isPowerOf2(divisor)) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = randomx_reciprocal(divisor);
|
||||
ibc.isrc = &ibc.imm;
|
||||
registerUsage[dst] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::INEG_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROR_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
if (src != dst) {
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.type = InstructionType::ISWAP_R;
|
||||
registerUsage[dst] = i;
|
||||
registerUsage[src] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::FSWAP_R;
|
||||
if (dst < RegisterCountFlt)
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
else
|
||||
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FADD_R;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FADD_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSUB_R;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FSUB_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.type = InstructionType::FSCAL_R;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FMUL_R;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FDIV_M;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSQRT_R;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) {
|
||||
ibc.type = InstructionType::CBRANCH;
|
||||
//jump condition
|
||||
int creg = instr.dst % RegistersCount;
|
||||
ibc.idst = &nreg->r[creg];
|
||||
ibc.target = registerUsage[creg];
|
||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
|
||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
|
||||
ibc.imm &= ~(1ULL << (shift - 1));
|
||||
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) {
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.type = InstructionType::CFROUND;
|
||||
ibc.imm = instr.getImm32() & 63;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISTORE;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.getModCond() < StoreL3Condition)
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
else
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < RandomX_CurrentConfig.CEIL_NOP) {
|
||||
ibc.type = InstructionType::NOP;
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
288
src/crypto/randomx/bytecode_machine.hpp
Normal file
288
src/crypto/randomx/bytecode_machine.hpp
Normal file
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "instruction.hpp"
|
||||
#include "program.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
//register file in machine byte order
|
||||
struct NativeRegisterFile {
|
||||
int_reg_t r[RegistersCount] = { 0 };
|
||||
rx_vec_f128 f[RegisterCountFlt];
|
||||
rx_vec_f128 e[RegisterCountFlt];
|
||||
rx_vec_f128 a[RegisterCountFlt];
|
||||
};
|
||||
|
||||
struct InstructionByteCode {
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
rx_vec_f128* fdst;
|
||||
};
|
||||
union {
|
||||
const int_reg_t* isrc;
|
||||
const rx_vec_f128* fsrc;
|
||||
};
|
||||
union {
|
||||
uint64_t imm;
|
||||
int64_t simm;
|
||||
};
|
||||
InstructionType type;
|
||||
union {
|
||||
int16_t target;
|
||||
uint16_t shift;
|
||||
};
|
||||
uint32_t memMask;
|
||||
};
|
||||
|
||||
#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
|
||||
#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
|
||||
|
||||
class BytecodeMachine;
|
||||
|
||||
typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
|
||||
|
||||
class BytecodeMachine {
|
||||
public:
|
||||
void beginCompilation(NativeRegisterFile& regFile) {
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
nreg = ®File;
|
||||
}
|
||||
|
||||
void compileProgram(Program& program, InstructionByteCode* bytecode, NativeRegisterFile& regFile) {
|
||||
beginCompilation(regFile);
|
||||
for (unsigned i = 0; i < RandomX_CurrentConfig.ProgramSize; ++i) {
|
||||
auto& instr = program(i);
|
||||
auto& ibc = bytecode[i];
|
||||
compileInstruction(instr, i, ibc);
|
||||
}
|
||||
}
|
||||
|
||||
static void executeBytecode(InstructionByteCode* bytecode, uint8_t* scratchpad, ProgramConfiguration& config) {
|
||||
for (int pc = 0; pc < RandomX_CurrentConfig.ProgramSize; ++pc) {
|
||||
auto& ibc = bytecode[pc];
|
||||
executeInstruction(ibc, pc, scratchpad, config);
|
||||
}
|
||||
}
|
||||
|
||||
void compileInstruction(RANDOMX_GEN_ARGS)
|
||||
#ifdef RANDOMX_GEN_TABLE
|
||||
{
|
||||
auto generator = genTable[instr.opcode];
|
||||
(this->*generator)(instr, i, ibc);
|
||||
}
|
||||
#else
|
||||
;
|
||||
#endif
|
||||
|
||||
static void executeInstruction(RANDOMX_EXE_ARGS);
|
||||
|
||||
static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
|
||||
}
|
||||
|
||||
static void exe_IADD_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst -= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst *= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
|
||||
}
|
||||
|
||||
static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
|
||||
}
|
||||
|
||||
static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
|
||||
}
|
||||
|
||||
static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
|
||||
}
|
||||
|
||||
static void exe_INEG_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
|
||||
}
|
||||
|
||||
static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst ^= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IROR_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = rotr64(*ibc.idst, *ibc.isrc & 63);
|
||||
}
|
||||
|
||||
static void exe_IROL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = rotl64(*ibc.idst, *ibc.isrc & 63);
|
||||
}
|
||||
|
||||
static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
|
||||
int_reg_t temp = *ibc.isrc;
|
||||
*(int_reg_t*)ibc.isrc = *ibc.idst;
|
||||
*ibc.idst = temp;
|
||||
}
|
||||
|
||||
static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
|
||||
}
|
||||
|
||||
static void exe_FADD_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FADD_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
|
||||
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
|
||||
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
|
||||
const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
|
||||
*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
|
||||
}
|
||||
|
||||
static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = maskRegisterExponentMantissa(
|
||||
config,
|
||||
rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
|
||||
);
|
||||
*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
|
||||
}
|
||||
|
||||
static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += ibc.imm;
|
||||
if ((*ibc.idst & ibc.memMask) == 0) {
|
||||
pc = ibc.target;
|
||||
}
|
||||
}
|
||||
|
||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||
rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4);
|
||||
}
|
||||
|
||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
|
||||
}
|
||||
protected:
|
||||
static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
|
||||
const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
|
||||
const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
|
||||
x = rx_and_vec_f128(x, xmantissaMask);
|
||||
x = rx_or_vec_f128(x, xexponentMask);
|
||||
return x;
|
||||
}
|
||||
|
||||
private:
|
||||
static const int_reg_t zero;
|
||||
int registerUsage[RegistersCount];
|
||||
NativeRegisterFile* nreg;
|
||||
|
||||
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
|
||||
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
||||
return scratchpad + addr;
|
||||
}
|
||||
|
||||
#ifdef RANDOMX_GEN_TABLE
|
||||
static InstructionGenBytecode genTable[256];
|
||||
|
||||
void gen_IADD_RS(RANDOMX_GEN_ARGS);
|
||||
void gen_IADD_M(RANDOMX_GEN_ARGS);
|
||||
void gen_ISUB_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISUB_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMULH_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IMULH_M(RANDOMX_GEN_ARGS);
|
||||
void gen_ISMULH_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISMULH_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
|
||||
void gen_INEG_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IXOR_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IXOR_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IROR_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IROL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISWAP_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FSWAP_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FADD_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FADD_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSUB_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FSUB_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSCAL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FMUL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FDIV_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSQRT_R(RANDOMX_GEN_ARGS);
|
||||
void gen_CBRANCH(RANDOMX_GEN_ARGS);
|
||||
void gen_CFROUND(RANDOMX_GEN_ARGS);
|
||||
void gen_ISTORE(RANDOMX_GEN_ARGS);
|
||||
void gen_NOP(RANDOMX_GEN_ARGS);
|
||||
#endif
|
||||
};
|
||||
}
|
173
src/crypto/randomx/common.hpp
Normal file
173
src/crypto/randomx/common.hpp
Normal file
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <climits>
|
||||
#include "blake2/endian.h"
|
||||
#include "configuration.h"
|
||||
#include "randomx.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
//static_assert(RANDOMX_ARGON_MEMORY > 0, "RANDOMX_ARGON_MEMORY must be greater than 0.");
|
||||
//static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
|
||||
//static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
|
||||
//static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
|
||||
//static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
|
||||
//static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
|
||||
//static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
|
||||
//static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
|
||||
//static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
|
||||
//static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
|
||||
//static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
|
||||
//static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
|
||||
//static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
|
||||
//static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
|
||||
//static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
|
||||
//static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
|
||||
//static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
|
||||
//static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
|
||||
//static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
|
||||
//static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
|
||||
//static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
|
||||
|
||||
/*constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
|
||||
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
|
||||
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
|
||||
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
|
||||
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
|
||||
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
|
||||
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;*/
|
||||
|
||||
//static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
|
||||
|
||||
|
||||
constexpr uint32_t ArgonBlockSize = 1024;
|
||||
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
|
||||
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
||||
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
|
||||
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated
|
||||
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated
|
||||
constexpr int StoreL3Condition = 14;
|
||||
|
||||
//Prevent some unsafe configurations.
|
||||
#ifndef RANDOMX_UNSAFE
|
||||
//static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
|
||||
//static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
|
||||
//static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies");
|
||||
//static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy");
|
||||
//static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead");
|
||||
#endif
|
||||
|
||||
#ifdef TRACE
|
||||
constexpr bool trace = true;
|
||||
#else
|
||||
constexpr bool trace = false;
|
||||
#endif
|
||||
|
||||
#ifndef UNREACHABLE
|
||||
#ifdef __GNUC__
|
||||
#define UNREACHABLE __builtin_unreachable()
|
||||
#elif _MSC_VER
|
||||
#define UNREACHABLE __assume(false)
|
||||
#else
|
||||
#define UNREACHABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define RANDOMX_HAVE_COMPILER 1
|
||||
class JitCompilerX86;
|
||||
using JitCompiler = JitCompilerX86;
|
||||
#elif defined(__aarch64__)
|
||||
#define RANDOMX_HAVE_COMPILER 0
|
||||
class JitCompilerA64;
|
||||
using JitCompiler = JitCompilerA64;
|
||||
#else
|
||||
#define RANDOMX_HAVE_COMPILER 0
|
||||
class JitCompilerFallback;
|
||||
using JitCompiler = JitCompilerFallback;
|
||||
#endif
|
||||
|
||||
using addr_t = uint32_t;
|
||||
|
||||
using int_reg_t = uint64_t;
|
||||
|
||||
struct fpu_reg_t {
|
||||
double lo;
|
||||
double hi;
|
||||
};
|
||||
|
||||
#define ScratchpadL1Mask RandomX_CurrentConfig.ScratchpadL1Mask_Calculated
|
||||
#define ScratchpadL1Mask16 RandomX_CurrentConfig.ScratchpadL1Mask16_Calculated
|
||||
#define ScratchpadL2Mask RandomX_CurrentConfig.ScratchpadL2Mask_Calculated
|
||||
#define ScratchpadL2Mask16 RandomX_CurrentConfig.ScratchpadL2Mask16_Calculated
|
||||
#define ScratchpadL3Mask RandomX_CurrentConfig.ScratchpadL3Mask_Calculated
|
||||
#define ScratchpadL3Mask64 RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated
|
||||
constexpr int RegistersCount = 8;
|
||||
constexpr int RegisterCountFlt = RegistersCount / 2;
|
||||
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||
constexpr int RegisterNeedsSib = 4; //x86 r12 register
|
||||
|
||||
inline bool isPowerOf2(uint64_t x) {
|
||||
return (x & (x - 1)) == 0;
|
||||
}
|
||||
|
||||
constexpr int mantissaSize = 52;
|
||||
constexpr int exponentSize = 11;
|
||||
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||
constexpr int exponentBias = 1023;
|
||||
constexpr int dynamicExponentBits = 4;
|
||||
constexpr int staticExponentBits = 4;
|
||||
constexpr uint64_t constExponentBits = 0x300;
|
||||
constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
|
||||
|
||||
struct MemoryRegisters {
|
||||
addr_t mx, ma;
|
||||
uint8_t* memory = nullptr;
|
||||
};
|
||||
|
||||
//register file in little-endian byte order
|
||||
struct RegisterFile {
|
||||
int_reg_t r[RegistersCount];
|
||||
fpu_reg_t f[RegisterCountFlt];
|
||||
fpu_reg_t e[RegisterCountFlt];
|
||||
fpu_reg_t a[RegisterCountFlt];
|
||||
};
|
||||
|
||||
typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
|
||||
typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
|
||||
typedef void(DatasetDeallocFunc)(randomx_dataset*);
|
||||
typedef void(CacheDeallocFunc)(randomx_cache*);
|
||||
typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
|
||||
}
|
47
src/crypto/randomx/configuration.h
Normal file
47
src/crypto/randomx/configuration.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// Increase it if some configs use more cache accesses
|
||||
#define RANDOMX_CACHE_MAX_ACCESSES 16
|
||||
|
||||
// Increase it if some configs use larger superscalar latency
|
||||
#define RANDOMX_SUPERSCALAR_MAX_LATENCY 256
|
||||
|
||||
// Increase it if some configs use larger cache
|
||||
#define RANDOMX_CACHE_MAX_SIZE 268435456
|
||||
|
||||
// Increase it if some configs use larger dataset
|
||||
#define RANDOMX_DATASET_MAX_SIZE 2181038080
|
||||
|
||||
// Increase it if some configs use larger programs
|
||||
#define RANDOMX_PROGRAM_MAX_SIZE 512
|
||||
|
||||
// Increase it if some configs use larger scratchpad
|
||||
#define RANDOMX_SCRATCHPAD_L3_MAX_SIZE 2097152
|
189
src/crypto/randomx/dataset.cpp
Normal file
189
src/crypto/randomx/dataset.cpp
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <new>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <cstring>
|
||||
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "virtual_memory.hpp"
|
||||
#include "superscalar.hpp"
|
||||
#include "blake2_generator.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "blake2/endian.h"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
#include "jit_compiler.hpp"
|
||||
#include "intrin_portable.h"
|
||||
|
||||
//static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
|
||||
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator>
|
||||
void deallocCache(randomx_cache* cache) {
|
||||
if (cache->memory != nullptr)
|
||||
Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE);
|
||||
if (cache->jit != nullptr)
|
||||
delete cache->jit;
|
||||
}
|
||||
|
||||
template void deallocCache<DefaultAllocator>(randomx_cache* cache);
|
||||
template void deallocCache<LargePageAllocator>(randomx_cache* cache);
|
||||
|
||||
void initCache(randomx_cache* cache, const void* key, size_t keySize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)key;
|
||||
context.pwdlen = (uint32_t)keySize;
|
||||
context.salt = CONST_CAST(uint8_t *)RandomX_CurrentConfig.ArgonSalt;
|
||||
context.saltlen = (uint32_t)strlen(RandomX_CurrentConfig.ArgonSalt);
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = RandomX_CurrentConfig.ArgonIterations;
|
||||
context.m_cost = RandomX_CurrentConfig.ArgonMemory;
|
||||
context.lanes = RandomX_CurrentConfig.ArgonLanes;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost;
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)cache->memory;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
rxa2_argon_initialize(&instance, &context);
|
||||
|
||||
rxa2_fill_memory_blocks(&instance);
|
||||
|
||||
cache->reciprocalCache.clear();
|
||||
randomx::Blake2Generator gen(key, keySize);
|
||||
for (int i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
|
||||
randomx::generateSuperscalar(cache->programs[i], gen);
|
||||
for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
|
||||
auto& instr = cache->programs[i](j);
|
||||
if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
|
||||
auto rcp = randomx_reciprocal(instr.getImm32());
|
||||
instr.setImm32(cache->reciprocalCache.size());
|
||||
cache->reciprocalCache.push_back(rcp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
|
||||
initCache(cache, key, keySize);
|
||||
cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
cache->jit->generateDatasetInitCode();
|
||||
}
|
||||
|
||||
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||
constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
|
||||
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
|
||||
constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
|
||||
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
|
||||
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
|
||||
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
|
||||
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
|
||||
|
||||
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
|
||||
const uint32_t mask = (RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize) / CacheLineSize - 1;
|
||||
return memory + (registerValue & mask) * CacheLineSize;
|
||||
}
|
||||
|
||||
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
|
||||
int_reg_t rl[8];
|
||||
uint8_t* mixBlock;
|
||||
uint64_t registerValue = itemNumber;
|
||||
rl[0] = (itemNumber + 1) * superscalarMul0;
|
||||
rl[1] = rl[0] ^ superscalarAdd1;
|
||||
rl[2] = rl[0] ^ superscalarAdd2;
|
||||
rl[3] = rl[0] ^ superscalarAdd3;
|
||||
rl[4] = rl[0] ^ superscalarAdd4;
|
||||
rl[5] = rl[0] ^ superscalarAdd5;
|
||||
rl[6] = rl[0] ^ superscalarAdd6;
|
||||
rl[7] = rl[0] ^ superscalarAdd7;
|
||||
for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
|
||||
mixBlock = getMixBlock(registerValue, cache->memory);
|
||||
rx_prefetch_nta(mixBlock);
|
||||
SuperscalarProgram& prog = cache->programs[i];
|
||||
|
||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
rl[q] ^= load64_native(mixBlock + 8 * q);
|
||||
|
||||
registerValue = rl[prog.getAddressRegister()];
|
||||
}
|
||||
|
||||
memcpy(out, &rl, CacheLineSize);
|
||||
}
|
||||
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
|
||||
for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize)
|
||||
initDatasetItem(cache, dataset, itemNumber);
|
||||
}
|
||||
}
|
80
src/crypto/randomx/dataset.hpp
Normal file
80
src/crypto/randomx/dataset.hpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include "common.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "allocator.hpp"
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_dataset {
|
||||
uint8_t* memory = nullptr;
|
||||
randomx::DatasetDeallocFunc* dealloc;
|
||||
};
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_cache {
|
||||
uint8_t* memory = nullptr;
|
||||
randomx::CacheDeallocFunc* dealloc;
|
||||
randomx::JitCompiler* jit;
|
||||
randomx::CacheInitializeFunc* initialize;
|
||||
randomx::DatasetInitFunc* datasetInit;
|
||||
randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES];
|
||||
std::vector<uint64_t> reciprocalCache;
|
||||
|
||||
bool isInitialized() {
|
||||
return programs[0].getSize() != 0;
|
||||
}
|
||||
};
|
||||
|
||||
//A pointer to a standard-layout struct object points to its initial member
|
||||
static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
|
||||
static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
|
||||
|
||||
namespace randomx {
|
||||
|
||||
using DefaultAllocator = AlignedAllocator<CacheLineSize>;
|
||||
|
||||
template<class Allocator>
|
||||
void deallocDataset(randomx_dataset* dataset) {
|
||||
if (dataset->memory != nullptr)
|
||||
Allocator::freeMemory(dataset->memory, RANDOMX_DATASET_MAX_SIZE);
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void deallocCache(randomx_cache* cache);
|
||||
|
||||
void initCache(randomx_cache*, const void*, size_t);
|
||||
void initCacheCompile(randomx_cache*, const void*, size_t);
|
||||
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
}
|
102
src/crypto/randomx/instruction.hpp
Normal file
102
src/crypto/randomx/instruction.hpp
Normal file
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Instruction;
|
||||
|
||||
enum class InstructionType : uint16_t {
|
||||
IADD_RS = 0,
|
||||
IADD_M = 1,
|
||||
ISUB_R = 2,
|
||||
ISUB_M = 3,
|
||||
IMUL_R = 4,
|
||||
IMUL_M = 5,
|
||||
IMULH_R = 6,
|
||||
IMULH_M = 7,
|
||||
ISMULH_R = 8,
|
||||
ISMULH_M = 9,
|
||||
IMUL_RCP = 10,
|
||||
INEG_R = 11,
|
||||
IXOR_R = 12,
|
||||
IXOR_M = 13,
|
||||
IROR_R = 14,
|
||||
IROL_R = 15,
|
||||
ISWAP_R = 16,
|
||||
FSWAP_R = 17,
|
||||
FADD_R = 18,
|
||||
FADD_M = 19,
|
||||
FSUB_R = 20,
|
||||
FSUB_M = 21,
|
||||
FSCAL_R = 22,
|
||||
FMUL_R = 23,
|
||||
FDIV_M = 24,
|
||||
FSQRT_R = 25,
|
||||
CBRANCH = 26,
|
||||
CFROUND = 27,
|
||||
ISTORE = 28,
|
||||
NOP = 29,
|
||||
};
|
||||
|
||||
class Instruction {
|
||||
public:
|
||||
uint32_t getImm32() const {
|
||||
return load32(&imm32);
|
||||
}
|
||||
void setImm32(uint32_t val) {
|
||||
return store32(&imm32, val);
|
||||
}
|
||||
int getModMem() const {
|
||||
return mod % 4; //bits 0-1
|
||||
}
|
||||
int getModShift() const {
|
||||
return (mod >> 2) % 4; //bits 2-3
|
||||
}
|
||||
int getModCond() const {
|
||||
return mod >> 4; //bits 4-7
|
||||
}
|
||||
void setMod(uint8_t val) {
|
||||
mod = val;
|
||||
}
|
||||
|
||||
uint8_t opcode;
|
||||
uint8_t dst;
|
||||
uint8_t src;
|
||||
uint8_t mod;
|
||||
uint32_t imm32;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
|
||||
static_assert(std::is_standard_layout<Instruction>(), "randomx::Instruction must be a standard-layout struct");
|
||||
}
|
193
src/crypto/randomx/instructions_portable.cpp
Normal file
193
src/crypto/randomx/instructions_portable.cpp
Normal file
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cfenv>
|
||||
#include <cmath>
|
||||
#include "common.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "blake2/endian.h"
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
typedef unsigned __int128 uint128_t;
|
||||
typedef __int128 int128_t;
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
return ((uint128_t)a * b) >> 64;
|
||||
}
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
return ((int128_t)a * b) >> 64;
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define HAS_VALUE(X) X ## 0
|
||||
#define EVAL_DEFINE(X) HAS_VALUE(X)
|
||||
#include <intrin.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
uint64_t rotl64(uint64_t x, unsigned int c) {
|
||||
return _rotl64(x, c);
|
||||
}
|
||||
uint64_t rotr64(uint64_t x, unsigned int c) {
|
||||
return _rotr64(x, c);
|
||||
}
|
||||
#define HAVE_ROTL64
|
||||
#define HAVE_ROTR64
|
||||
|
||||
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
return __umulh(a, b);
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#endif
|
||||
|
||||
#if EVAL_DEFINE(__MACHINEX64(1))
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
_mul128(a, b, &hi);
|
||||
return hi;
|
||||
}
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
static void setRoundMode_(uint32_t mode) {
|
||||
_controlfp(mode, _MCW_RC);
|
||||
}
|
||||
#define HAVE_SETROUNDMODE_IMPL
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SETROUNDMODE_IMPL
|
||||
static void setRoundMode_(uint32_t mode) {
|
||||
fesetround(mode);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ROTR64
|
||||
uint64_t rotr64(uint64_t a, unsigned int b) {
|
||||
return (a >> b) | (a << (-b & 63));
|
||||
}
|
||||
#define HAVE_ROTR64
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ROTL64
|
||||
uint64_t rotl64(uint64_t a, unsigned int b) {
|
||||
return (a << b) | (a >> (-b & 63));
|
||||
}
|
||||
#define HAVE_ROTL64
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_MULH
|
||||
#define LO(x) ((x)&0xffffffff)
|
||||
#define HI(x) ((x)>>32)
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
uint64_t ah = HI(a), al = LO(a);
|
||||
uint64_t bh = HI(b), bl = LO(b);
|
||||
uint64_t x00 = al * bl;
|
||||
uint64_t x01 = al * bh;
|
||||
uint64_t x10 = ah * bl;
|
||||
uint64_t x11 = ah * bh;
|
||||
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
|
||||
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
|
||||
uint64_t m3 = HI(x11) + HI(m2);
|
||||
|
||||
return (m3 << 32) + LO(m2);
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SMULH
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
int64_t hi = mulh(a, b);
|
||||
if (a < 0LL) hi -= b;
|
||||
if (b < 0LL) hi -= a;
|
||||
return hi;
|
||||
}
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_DEFAULT_FENV
|
||||
|
||||
void rx_reset_float_state() {
|
||||
setRoundMode_(FE_TONEAREST);
|
||||
rx_set_double_precision(); //set precision to 53 bits if needed by the platform
|
||||
}
|
||||
|
||||
void rx_set_rounding_mode(uint32_t mode) {
|
||||
switch (mode & 3) {
|
||||
case RoundDown:
|
||||
setRoundMode_(FE_DOWNWARD);
|
||||
break;
|
||||
case RoundUp:
|
||||
setRoundMode_(FE_UPWARD);
|
||||
break;
|
||||
case RoundToZero:
|
||||
setRoundMode_(FE_TOWARDZERO);
|
||||
break;
|
||||
case RoundToNearest:
|
||||
setRoundMode_(FE_TONEAREST);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_USE_X87
|
||||
|
||||
#ifdef _M_IX86
|
||||
|
||||
void rx_set_double_precision() {
|
||||
_control87(_PC_53, _MCW_PC);
|
||||
}
|
||||
|
||||
#elif defined(__i386)
|
||||
|
||||
void rx_set_double_precision() {
|
||||
uint16_t volatile x87cw;
|
||||
asm volatile("fstcw %0" : "=m" (x87cw));
|
||||
x87cw &= ~0x300;
|
||||
x87cw |= 0x200;
|
||||
asm volatile("fldcw %0" : : "m" (x87cw));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif //RANDOMX_USE_X87
|
||||
|
||||
union double_ser_t {
|
||||
double f;
|
||||
uint64_t i;
|
||||
};
|
||||
|
||||
double loadDoublePortable(const void* addr) {
|
||||
double_ser_t ds;
|
||||
ds.i = load64(addr);
|
||||
return ds.f;
|
||||
}
|
605
src/crypto/randomx/intrin_portable.h
Normal file
605
src/crypto/randomx/intrin_portable.h
Normal file
|
@ -0,0 +1,605 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "blake2/endian.h"
|
||||
|
||||
constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
|
||||
return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
|
||||
}
|
||||
|
||||
constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) {
|
||||
return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
|
||||
}
|
||||
|
||||
constexpr uint64_t signExtend2sCompl(uint32_t x) {
|
||||
return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
|
||||
}
|
||||
|
||||
constexpr int RoundToNearest = 0;
|
||||
constexpr int RoundDown = 1;
|
||||
constexpr int RoundUp = 2;
|
||||
constexpr int RoundToZero = 3;
|
||||
|
||||
//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
|
||||
#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
|
||||
#define __SSE2__ 1
|
||||
#endif
|
||||
|
||||
//MSVC doesn't define __AES__
|
||||
#if defined(_MSC_VER) && defined(__SSE2__)
|
||||
#define __AES__
|
||||
#endif
|
||||
|
||||
//the library "sqrt" function provided by MSVC for x86 targets doesn't give
|
||||
//the correct results, so we have to use inline assembly to call x87 fsqrt directly
|
||||
#if !defined(__SSE2__)
|
||||
#if defined(_M_IX86)
|
||||
inline double __cdecl rx_sqrt(double x) {
|
||||
__asm {
|
||||
fld x
|
||||
fsqrt
|
||||
}
|
||||
}
|
||||
#define rx_sqrt rx_sqrt
|
||||
|
||||
void rx_set_double_precision();
|
||||
#define RANDOMX_USE_X87
|
||||
|
||||
#elif defined(__i386)
|
||||
|
||||
void rx_set_double_precision();
|
||||
#define RANDOMX_USE_X87
|
||||
|
||||
#endif
|
||||
#endif //__SSE2__
|
||||
|
||||
#if !defined(rx_sqrt)
|
||||
#define rx_sqrt sqrt
|
||||
#endif
|
||||
|
||||
#if !defined(RANDOMX_USE_X87)
|
||||
#define rx_set_double_precision(x)
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
typedef __m128i rx_vec_i128;
|
||||
typedef __m128d rx_vec_f128;
|
||||
|
||||
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
||||
#define rx_aligned_free(a) _mm_free(a)
|
||||
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||
|
||||
#define rx_load_vec_f128 _mm_load_pd
|
||||
#define rx_store_vec_f128 _mm_store_pd
|
||||
#define rx_add_vec_f128 _mm_add_pd
|
||||
#define rx_sub_vec_f128 _mm_sub_pd
|
||||
#define rx_mul_vec_f128 _mm_mul_pd
|
||||
#define rx_div_vec_f128 _mm_div_pd
|
||||
#define rx_sqrt_vec_f128 _mm_sqrt_pd
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
return _mm_shuffle_pd(a, a, 1);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
return _mm_castsi128_pd(_mm_set1_epi64x(x));
|
||||
}
|
||||
|
||||
#define rx_xor_vec_f128 _mm_xor_pd
|
||||
#define rx_and_vec_f128 _mm_and_pd
|
||||
#define rx_or_vec_f128 _mm_or_pd
|
||||
|
||||
#ifdef __AES__
|
||||
|
||||
#define rx_aesenc_vec_i128 _mm_aesenc_si128
|
||||
#define rx_aesdec_vec_i128 _mm_aesdec_si128
|
||||
|
||||
#define HAVE_AES
|
||||
|
||||
#endif //__AES__
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
|
||||
}
|
||||
|
||||
#define rx_set_int_vec_i128 _mm_set_epi32
|
||||
#define rx_xor_vec_i128 _mm_xor_si128
|
||||
#define rx_load_vec_i128 _mm_load_si128
|
||||
#define rx_store_vec_i128 _mm_store_si128
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
__m128i ix = _mm_loadl_epi64((const __m128i*)addr);
|
||||
return _mm_cvtepi32_pd(ix);
|
||||
}
|
||||
|
||||
constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
|
||||
|
||||
FORCE_INLINE void rx_reset_float_state() {
|
||||
_mm_setcsr(rx_mxcsr_default);
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
|
||||
_mm_setcsr(rx_mxcsr_default | (mode << 13));
|
||||
}
|
||||
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
#include <altivec.h>
|
||||
#undef vector
|
||||
#undef pixel
|
||||
#undef bool
|
||||
|
||||
typedef __vector uint8_t __m128i;
|
||||
typedef __vector uint32_t __m128l;
|
||||
typedef __vector int __m128li;
|
||||
typedef __vector uint64_t __m128ll;
|
||||
typedef __vector double __m128d;
|
||||
|
||||
typedef __m128i rx_vec_i128;
|
||||
typedef __m128d rx_vec_f128;
|
||||
typedef union{
|
||||
rx_vec_i128 i;
|
||||
rx_vec_f128 d;
|
||||
uint64_t u64[2];
|
||||
double d64[2];
|
||||
uint32_t u32[4];
|
||||
int i32[4];
|
||||
} vec_u;
|
||||
|
||||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
|
||||
/* Splat 64-bit long long to 2 64-bit long longs */
|
||||
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
||||
{ return (__m128i) vec_splats (scalar); }
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return (rx_vec_f128)vec_vsx_ld(0,pd);
|
||||
#else
|
||||
vec_u t;
|
||||
t.u64[0] = load64(pd + 0);
|
||||
t.u64[1] = load64(pd + 1);
|
||||
return (rx_vec_f128)t.d;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
|
||||
#else
|
||||
vec_u _a;
|
||||
_a.d = a;
|
||||
store64(mem_addr + 0, _a.u64[0]);
|
||||
store64(mem_addr + 1, _a.u64[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_add(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_sub(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_mul(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_div(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
|
||||
return (rx_vec_f128)vec_sqrt(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
|
||||
return (rx_vec_i128)vec_splat2sd(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
|
||||
return (rx_vec_f128)a;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
return (rx_vec_f128)(__m128ll){x0,x1};
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
return (rx_vec_f128)vec_splat2sd(x);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_xor(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_and(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_or(a,b);
|
||||
}
|
||||
|
||||
#if defined(__CRYPTO__)
|
||||
|
||||
FORCE_INLINE __m128ll vrev(__m128i v){
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
|
||||
#else
|
||||
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
__m128ll _v = vrev(v);
|
||||
__m128ll _rkey = vrev(rkey);
|
||||
__m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
|
||||
return (rx_vec_i128)result;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
__m128ll _v = vrev(v);
|
||||
__m128ll zero = (__m128ll){0};
|
||||
__m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
|
||||
return (rx_vec_i128)vec_xor((__m128i)out,rkey);
|
||||
}
|
||||
#define HAVE_AES
|
||||
|
||||
#endif //__CRYPTO__
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[0];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[1];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[2];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[3];
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||
return (rx_vec_i128)((__m128li){_I0,_I1,_I2,_I3});
|
||||
};
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
|
||||
return (rx_vec_i128)vec_xor(_A,_B);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *_P) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *_P;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
vec_u c;
|
||||
c.u32[0] = load32(ptr + 0);
|
||||
c.u32[1] = load32(ptr + 1);
|
||||
c.u32[2] = load32(ptr + 2);
|
||||
c.u32[3] = load32(ptr + 3);
|
||||
return (rx_vec_i128)c.i;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*_P = _B;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
vec_u B;
|
||||
B.i = _B;
|
||||
store32(ptr + 0, B.u32[0]);
|
||||
store32(ptr + 1, B.u32[1]);
|
||||
store32(ptr + 2, B.u32[2]);
|
||||
store32(ptr + 3, B.u32[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
vec_u x;
|
||||
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
return (rx_vec_f128)x.d;
|
||||
}
|
||||
|
||||
#define RANDOMX_DEFAULT_FENV
|
||||
|
||||
void rx_reset_float_state();
|
||||
|
||||
void rx_set_rounding_mode(uint32_t mode);
|
||||
|
||||
#else //end altivec
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
typedef union {
|
||||
uint64_t u64[2];
|
||||
uint32_t u32[4];
|
||||
uint16_t u16[8];
|
||||
uint8_t u8[16];
|
||||
} rx_vec_i128;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
double lo;
|
||||
double hi;
|
||||
};
|
||||
rx_vec_i128 i;
|
||||
} rx_vec_f128;
|
||||
|
||||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = load64(pd + 0);
|
||||
x.i.u64[1] = load64(pd + 1);
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
|
||||
store64(mem_addr + 0, a.i.u64[0]);
|
||||
store64(mem_addr + 1, a.i.u64[1]);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
double temp = a.hi;
|
||||
a.hi = a.lo;
|
||||
a.lo = temp;
|
||||
return a;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo + b.lo;
|
||||
x.hi = a.hi + b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo - b.lo;
|
||||
x.hi = a.hi - b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo * b.lo;
|
||||
x.hi = a.hi * b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo / b.lo;
|
||||
x.hi = a.hi / b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = rx_sqrt(a.lo);
|
||||
x.hi = rx_sqrt(a.hi);
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
|
||||
rx_vec_i128 x;
|
||||
x.u64[0] = a;
|
||||
x.u64[1] = a;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
|
||||
rx_vec_f128 x;
|
||||
x.i = a;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
rx_vec_f128 v;
|
||||
v.i.u64[0] = x0;
|
||||
v.i.u64[1] = x1;
|
||||
return v;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
rx_vec_f128 v;
|
||||
v.i.u64[0] = x;
|
||||
v.i.u64[1] = x;
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
return a.u32[0];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
return a.u32[1];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
return a.u32[2];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
return a.u32[3];
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||
rx_vec_i128 v;
|
||||
v.u32[0] = _I0;
|
||||
v.u32[1] = _I1;
|
||||
v.u32[2] = _I2;
|
||||
v.u32[3] = _I3;
|
||||
return v;
|
||||
};
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
|
||||
rx_vec_i128 c;
|
||||
c.u32[0] = _A.u32[0] ^ _B.u32[0];
|
||||
c.u32[1] = _A.u32[1] ^ _B.u32[1];
|
||||
c.u32[2] = _A.u32[2] ^ _B.u32[2];
|
||||
c.u32[3] = _A.u32[3] ^ _B.u32[3];
|
||||
return c;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *_P;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
rx_vec_i128 c;
|
||||
c.u32[0] = load32(ptr + 0);
|
||||
c.u32[1] = load32(ptr + 1);
|
||||
c.u32[2] = load32(ptr + 2);
|
||||
c.u32[3] = load32(ptr + 3);
|
||||
return c;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*_P = _B;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
store32(ptr + 0, _B.u32[0]);
|
||||
store32(ptr + 1, _B.u32[1]);
|
||||
store32(ptr + 2, _B.u32[2]);
|
||||
store32(ptr + 3, _B.u32[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define RANDOMX_DEFAULT_FENV
|
||||
|
||||
void rx_reset_float_state();
|
||||
|
||||
void rx_set_rounding_mode(uint32_t mode);
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_AES
|
||||
static const char* platformError = "Platform doesn't support hardware AES";
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
throw std::runtime_error(platformError);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
throw std::runtime_error(platformError);
|
||||
}
|
||||
#endif
|
||||
|
||||
double loadDoublePortable(const void* addr);
|
||||
uint64_t mulh(uint64_t, uint64_t);
|
||||
int64_t smulh(int64_t, int64_t);
|
||||
uint64_t rotl64(uint64_t, unsigned int);
|
||||
uint64_t rotr64(uint64_t, unsigned int);
|
37
src/crypto/randomx/jit_compiler.hpp
Normal file
37
src/crypto/randomx/jit_compiler.hpp
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#elif defined(__aarch64__)
|
||||
#include "jit_compiler_a64.hpp"
|
||||
#else
|
||||
#include "jit_compiler_fallback.hpp"
|
||||
#endif
|
73
src/crypto/randomx/jit_compiler_a64.hpp
Normal file
73
src/crypto/randomx/jit_compiler_a64.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
|
||||
class JitCompilerA64 {
|
||||
public:
|
||||
JitCompilerA64() {
|
||||
throw std::runtime_error("ARM64 JIT compiler is not implemented yet.");
|
||||
}
|
||||
void generateProgram(Program&, ProgramConfiguration&) {
|
||||
|
||||
}
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
|
||||
|
||||
}
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
|
||||
|
||||
}
|
||||
void generateDatasetInitCode() {
|
||||
|
||||
}
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return nullptr;
|
||||
}
|
||||
size_t getCodeSize() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
73
src/crypto/randomx/jit_compiler_fallback.hpp
Normal file
73
src/crypto/randomx/jit_compiler_fallback.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
|
||||
class JitCompilerFallback {
|
||||
public:
|
||||
JitCompilerFallback() {
|
||||
throw std::runtime_error("JIT compilation is not supported on this platform");
|
||||
}
|
||||
void generateProgram(Program&, ProgramConfiguration&) {
|
||||
|
||||
}
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
|
||||
|
||||
}
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
|
||||
|
||||
}
|
||||
void generateDatasetInitCode() {
|
||||
|
||||
}
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return nullptr;
|
||||
}
|
||||
size_t getCodeSize() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
774
src/crypto/randomx/jit_compiler_x86.cpp
Normal file
774
src/crypto/randomx/jit_compiler_x86.cpp
Normal file
|
@ -0,0 +1,774 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#include "jit_compiler_x86_static.hpp"
|
||||
#include "superscalar.hpp"
|
||||
#include "program.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "virtual_memory.hpp"
|
||||
|
||||
namespace randomx {
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> dataset pointer
|
||||
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
; r10 -> "r2"
|
||||
; r11 -> "r3"
|
||||
; r12 -> "r4"
|
||||
; r13 -> "r5"
|
||||
; r14 -> "r6"
|
||||
; r15 -> "r7"
|
||||
; xmm0 -> "f0"
|
||||
; xmm1 -> "f1"
|
||||
; xmm2 -> "f2"
|
||||
; xmm3 -> "f3"
|
||||
; xmm4 -> "e0"
|
||||
; xmm5 -> "e1"
|
||||
; xmm6 -> "e2"
|
||||
; xmm7 -> "e3"
|
||||
; xmm8 -> "a0"
|
||||
; xmm9 -> "a1"
|
||||
; xmm10 -> "a2"
|
||||
; xmm11 -> "a3"
|
||||
; xmm12 -> temporary
|
||||
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
*/
|
||||
|
||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||
const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load;
|
||||
const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch;
|
||||
const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end;
|
||||
const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init;
|
||||
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
|
||||
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
||||
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
|
||||
const int32_t epilogueSize = codeShhLoad - codeEpilogue;
|
||||
const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad;
|
||||
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
||||
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
||||
|
||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||
constexpr int32_t superScalarHashOffset = 32768;
|
||||
|
||||
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
||||
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
|
||||
static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
|
||||
static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b };
|
||||
static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b };
|
||||
static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
|
||||
static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
|
||||
static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
|
||||
static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 };
|
||||
static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
|
||||
static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
|
||||
static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 };
|
||||
static const uint8_t REX_81[] = { 0x49, 0x81 };
|
||||
static const uint8_t AND_EAX_I = 0x25;
|
||||
static const uint8_t MOV_EAX_I = 0xb8;
|
||||
static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
|
||||
static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 };
|
||||
static const uint8_t REX_LEA[] = { 0x4f, 0x8d };
|
||||
static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e };
|
||||
static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e };
|
||||
static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 };
|
||||
static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 };
|
||||
static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 };
|
||||
static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea };
|
||||
static const uint8_t REX_SH[] = { 0x49, 0xc1 };
|
||||
static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f };
|
||||
static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 };
|
||||
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
|
||||
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
|
||||
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
|
||||
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
|
||||
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
|
||||
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
|
||||
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
|
||||
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
|
||||
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
|
||||
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
|
||||
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
|
||||
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
||||
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 };
|
||||
static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 };
|
||||
static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
|
||||
static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 };
|
||||
static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 };
|
||||
static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 };
|
||||
static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c };
|
||||
static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 };
|
||||
static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 };
|
||||
static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f };
|
||||
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
|
||||
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
|
||||
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 };
|
||||
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
|
||||
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
|
||||
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
|
||||
static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
|
||||
static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
|
||||
static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 };
|
||||
static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 };
|
||||
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
|
||||
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
||||
static const uint8_t JMP = 0xe9;
|
||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
|
||||
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
||||
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
||||
static const uint8_t CALL = 0xe8;
|
||||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||
static const uint8_t RET = 0xc3;
|
||||
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
|
||||
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
|
||||
static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
|
||||
|
||||
static const uint8_t NOP1[] = { 0x90 };
|
||||
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
||||
static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 };
|
||||
static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
|
||||
static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos - prologueSize;
|
||||
}
|
||||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
code = (uint8_t*)allocExecutableMemory(CodeSize);
|
||||
memcpy(code, codePrologue, prologueSize);
|
||||
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
|
||||
}
|
||||
|
||||
JitCompilerX86::~JitCompilerX86() {
|
||||
freePagedMemory(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
memcpy(code + codePos, RandomX_CurrentConfig.codeReadDatasetTweaked, readDatasetSize);
|
||||
codePos += readDatasetSize;
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize);
|
||||
emit(ADD_EBX_I);
|
||||
emit32(datasetOffset / CacheLineSize);
|
||||
emitByte(CALL);
|
||||
emit32(superScalarHashOffset - (codePos + 4));
|
||||
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
||||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
codePos = superScalarHashOffset + codeSshInitSize;
|
||||
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
|
||||
SuperscalarProgram& prog = programs[j];
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
generateSuperscalarCode(instr, reciprocalCache);
|
||||
}
|
||||
emit(codeShhLoad, codeSshLoadSize);
|
||||
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xd8 + prog.getAddressRegister());
|
||||
emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize);
|
||||
#ifdef RANDOMX_ALIGN
|
||||
int align = (codePos % 16);
|
||||
while (align != 0) {
|
||||
int nopSize = 16 - align;
|
||||
if (nopSize > 8) nopSize = 8;
|
||||
emit(NOPX[nopSize - 1], nopSize);
|
||||
align = (codePos % 16);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
emitByte(RET);
|
||||
}
|
||||
|
||||
template
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector<uint64_t> &reciprocalCache);
|
||||
|
||||
void JitCompilerX86::generateDatasetInitCode() {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
codePos = prologueSize;
|
||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + pcfg.readReg0);
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + pcfg.readReg1);
|
||||
memcpy(code + codePos, RandomX_CurrentConfig.codeLoopLoadTweaked, loopLoadSize);
|
||||
codePos += loopLoadSize;
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + pcfg.readReg2);
|
||||
emit(REX_XOR_EAX);
|
||||
emitByte(0xc0 + pcfg.readReg3);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramEpilogue(Program& prog) {
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
emit(SUB_EBX);
|
||||
emit(JNZ);
|
||||
emit32(prologueSize - codePos - 4);
|
||||
emitByte(JMP);
|
||||
emit32(epilogueOffset - codePos - 4);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
instructionOffsets.push_back(codePos);
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case randomx::SuperscalarInstructionType::ISUB_R:
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_R:
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_RS:
|
||||
emit(REX_LEA);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.getModShift(), instr.src, instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_R:
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IROR_C:
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C7:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C7:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP1);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C8:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP1);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP2);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C9:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP2);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe0 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::ISMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe8 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_RCP:
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocalCache[instr.getImm32()]);
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||
emit(LEA_32);
|
||||
emitByte(0x80 + instr.src + (rax ? 0 : 8));
|
||||
if (instr.src == RegisterNeedsSib) {
|
||||
emitByte(0x24);
|
||||
}
|
||||
emit32(instr.getImm32());
|
||||
if (rax)
|
||||
emitByte(AND_EAX_I);
|
||||
else
|
||||
emit(AND_ECX_I);
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressRegDst(Instruction& instr) {
|
||||
emit(LEA_32);
|
||||
emitByte(0x80 + instr.dst);
|
||||
if (instr.dst == RegisterNeedsSib) {
|
||||
emitByte(0x24);
|
||||
}
|
||||
emit32(instr.getImm32());
|
||||
emitByte(AND_EAX_I);
|
||||
if (instr.getModCond() < StoreL3Condition) {
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
emit32(ScratchpadL3Mask);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressImm(Instruction& instr) {
|
||||
emit32(instr.getImm32() & ScratchpadL3Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
if (instr.dst == RegisterNeedsDisplacement)
|
||||
emitByte(0xac);
|
||||
else
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.getModShift(), instr.src, instr.dst);
|
||||
if (instr.dst == RegisterNeedsDisplacement)
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genSIB(int scale, int index, int base) {
|
||||
emitByte((scale << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_81);
|
||||
emitByte(0xe8 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_SUB_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_SUB_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_IMUL_RRI);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe0 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_MEM);
|
||||
}
|
||||
else {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_M);
|
||||
emitByte(0xa6);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe8 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_IMUL_MEM);
|
||||
}
|
||||
else {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_M);
|
||||
emitByte(0xae);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isPowerOf2(divisor)) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(MOV_RAX_I);
|
||||
emit64(randomx_reciprocal_fast(divisor));
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_NEG);
|
||||
emitByte(0xd8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_XOR_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_XOR_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
emit(REX_ROT_CL);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
}
|
||||
else {
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
emit(REX_ROT_CL);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
}
|
||||
else {
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||
if (instr.src != instr.dst) {
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
emit(REX_XCHG);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
|
||||
emit(SHUFPD);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emitByte(1);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_ADDPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ADDPD);
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_SUBPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_SUBPD);
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(REX_XORPS);
|
||||
emitByte(0xc7 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_MULPD);
|
||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ANDPS_XMM12);
|
||||
emit(REX_DIVPD);
|
||||
emitByte(0xe4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(SQRTPD);
|
||||
emitByte(0xe4 + 9 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.src);
|
||||
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
||||
if (rotate != 0) {
|
||||
emit(ROL_RAX);
|
||||
emitByte(rotate);
|
||||
}
|
||||
emit(AND_OR_MOV_LDMXCSR);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
|
||||
int reg = instr.dst;
|
||||
int target = registerUsage[reg] + 1;
|
||||
emit(REX_ADD_I);
|
||||
emitByte(0xc0 + reg);
|
||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
uint32_t imm = instr.getImm32() | (1UL << shift);
|
||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
|
||||
imm &= ~(1UL << (shift - 1));
|
||||
emit32(imm);
|
||||
emit(REX_TEST);
|
||||
emitByte(0xc0 + reg);
|
||||
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift);
|
||||
emit(JZ);
|
||||
emit32(instructionOffsets[target] - (codePos + 4));
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
emit(REX_MOV_MR);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
|
||||
emit(NOP1);
|
||||
}
|
||||
|
||||
InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
|
||||
|
||||
}
|
141
src/crypto/randomx/jit_compiler_x86.hpp
Normal file
141
src/crypto/randomx/jit_compiler_x86.hpp
Normal file
|
@ -0,0 +1,141 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class JitCompilerX86;
|
||||
class Instruction;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||
|
||||
constexpr uint32_t CodeSize = 64 * 1024;
|
||||
|
||||
class JitCompilerX86 {
|
||||
public:
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&, ProgramConfiguration&);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
|
||||
void generateDatasetInitCode();
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return (ProgramFunc*)code;
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return (DatasetInitFunc*)code;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return code;
|
||||
}
|
||||
size_t getCodeSize();
|
||||
|
||||
static InstructionGeneratorX86 engine[256];
|
||||
std::vector<int32_t> instructionOffsets;
|
||||
int registerUsage[RegistersCount];
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&);
|
||||
void genAddressReg(Instruction&, bool);
|
||||
void genAddressRegDst(Instruction&);
|
||||
void genAddressImm(Instruction&);
|
||||
void genSIB(int scale, int index, int base);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
code[codePos] = val;
|
||||
codePos++;
|
||||
}
|
||||
|
||||
void emit32(uint32_t val) {
|
||||
memcpy(code + codePos, &val, sizeof val);
|
||||
codePos += sizeof val;
|
||||
}
|
||||
|
||||
void emit64(uint64_t val) {
|
||||
memcpy(code + codePos, &val, sizeof val);
|
||||
codePos += sizeof val;
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void emit(const uint8_t (&src)[N]) {
|
||||
emit(src, N);
|
||||
}
|
||||
|
||||
void emit(const uint8_t* src, size_t count) {
|
||||
memcpy(code + codePos, src, count);
|
||||
codePos += count;
|
||||
}
|
||||
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_CBRANCH(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
};
|
||||
|
||||
}
|
208
src/crypto/randomx/jit_compiler_x86_static.S
Normal file
208
src/crypto/randomx/jit_compiler_x86_static.S
Normal file
|
@ -0,0 +1,208 @@
|
|||
# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the copyright holder nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
.intel_syntax noprefix
|
||||
#if defined(__APPLE__)
|
||||
.text
|
||||
#define DECL(x) _##x
|
||||
#else
|
||||
.section .text
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__CYGWIN__)
|
||||
#define WINABI
|
||||
#endif
|
||||
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_loop_begin)
|
||||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_dataset_init)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_sshash_load)
|
||||
.global DECL(randomx_sshash_prefetch)
|
||||
.global DECL(randomx_sshash_end)
|
||||
.global DECL(randomx_sshash_init)
|
||||
.global DECL(randomx_program_end)
|
||||
.global DECL(randomx_reciprocal_fast)
|
||||
|
||||
#define RANDOMX_SCRATCHPAD_MASK 2097088
|
||||
#define RANDOMX_DATASET_BASE_MASK 2147483584
|
||||
#define RANDOMX_CACHE_MASK 4194303
|
||||
|
||||
#define db .byte
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_prologue):
|
||||
#if defined(WINABI)
|
||||
#include "asm/program_prologue_win64.inc"
|
||||
#else
|
||||
#include "asm/program_prologue_linux.inc"
|
||||
#endif
|
||||
movapd xmm13, xmmword ptr mantissaMask[rip]
|
||||
movapd xmm14, xmmword ptr exp240[rip]
|
||||
movapd xmm15, xmmword ptr scaleMask[rip]
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_loop_begin):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_loop_load):
|
||||
#include "asm/program_loop_load.inc"
|
||||
|
||||
DECL(randomx_program_start):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_init):
|
||||
#include "asm/program_read_dataset_sshash_init.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_fin):
|
||||
#include "asm/program_read_dataset_sshash_fin.inc"
|
||||
|
||||
DECL(randomx_program_loop_store):
|
||||
#include "asm/program_loop_store.inc"
|
||||
|
||||
DECL(randomx_program_loop_end):
|
||||
nop
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_dataset_init):
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
#if defined(WINABI)
|
||||
push rdi
|
||||
push rsi
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
#else
|
||||
mov rdi, qword ptr [rdi] ;# cache->memory
|
||||
;# dataset in rsi
|
||||
mov rbp, rdx ;# block index
|
||||
push rcx ;# max. block index
|
||||
#endif
|
||||
init_block_loop:
|
||||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
.byte 232 ;# 0xE8 = call
|
||||
;# .set CALL_LOC,
|
||||
.int 32768 - (call_offset - DECL(randomx_dataset_init))
|
||||
call_offset:
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
add rbp, 1
|
||||
add rsi, 64
|
||||
cmp rbp, qword ptr [rsp]
|
||||
jb init_block_loop
|
||||
pop rax
|
||||
#if defined(WINABI)
|
||||
pop rsi
|
||||
pop rdi
|
||||
#endif
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_epilogue):
|
||||
#include "asm/program_epilogue_store.inc"
|
||||
#if defined(WINABI)
|
||||
#include "asm/program_epilogue_win64.inc"
|
||||
#else
|
||||
#include "asm/program_epilogue_linux.inc"
|
||||
#endif
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_sshash_load):
|
||||
#include "asm/program_sshash_load.inc"
|
||||
|
||||
DECL(randomx_sshash_prefetch):
|
||||
#include "asm/program_sshash_prefetch.inc"
|
||||
|
||||
DECL(randomx_sshash_end):
|
||||
nop
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_sshash_init):
|
||||
lea r8, [rbx+1]
|
||||
#include "asm/program_sshash_prefetch.inc"
|
||||
imul r8, qword ptr r0_mul[rip]
|
||||
mov r9, qword ptr r1_add[rip]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr r2_add[rip]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr r3_add[rip]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr r4_add[rip]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr r5_add[rip]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr r6_add[rip]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr r7_add[rip]
|
||||
xor r15, r8
|
||||
jmp DECL(randomx_program_end)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_sshash_constants.inc"
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_end):
|
||||
nop
|
||||
|
||||
DECL(randomx_reciprocal_fast):
|
||||
#if !defined(WINABI)
|
||||
mov rcx, rdi
|
||||
#endif
|
||||
#include "asm/randomx_reciprocal.inc"
|
199
src/crypto/randomx/jit_compiler_x86_static.asm
Normal file
199
src/crypto/randomx/jit_compiler_x86_static.asm
Normal file
|
@ -0,0 +1,199 @@
|
|||
; Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
;
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
; * Neither the name of the copyright holder nor the
|
||||
; names of its contributors may be used to endorse or promote products
|
||||
; derived from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
IFDEF RAX
|
||||
|
||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_loop_begin
|
||||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_sshash_load
|
||||
PUBLIC randomx_sshash_prefetch
|
||||
PUBLIC randomx_sshash_end
|
||||
PUBLIC randomx_sshash_init
|
||||
PUBLIC randomx_program_end
|
||||
PUBLIC randomx_reciprocal_fast
|
||||
|
||||
RANDOMX_SCRATCHPAD_MASK EQU 2097088
|
||||
RANDOMX_DATASET_BASE_MASK EQU 2147483584
|
||||
RANDOMX_CACHE_MASK EQU 4194303
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_prologue PROC
|
||||
include asm/program_prologue_win64.inc
|
||||
movapd xmm13, xmmword ptr [mantissaMask]
|
||||
movapd xmm14, xmmword ptr [exp240]
|
||||
movapd xmm15, xmmword ptr [scaleMask]
|
||||
jmp randomx_program_loop_begin
|
||||
randomx_program_prologue ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_loop_begin PROC
|
||||
nop
|
||||
randomx_program_loop_begin ENDP
|
||||
|
||||
randomx_program_loop_load PROC
|
||||
include asm/program_loop_load.inc
|
||||
randomx_program_loop_load ENDP
|
||||
|
||||
randomx_program_start PROC
|
||||
nop
|
||||
randomx_program_start ENDP
|
||||
|
||||
randomx_program_read_dataset PROC
|
||||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
include asm/program_read_dataset_sshash_init.inc
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_fin PROC
|
||||
include asm/program_read_dataset_sshash_fin.inc
|
||||
randomx_program_read_dataset_sshash_fin ENDP
|
||||
|
||||
randomx_program_loop_store PROC
|
||||
include asm/program_loop_store.inc
|
||||
randomx_program_loop_store ENDP
|
||||
|
||||
randomx_program_loop_end PROC
|
||||
nop
|
||||
randomx_program_loop_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_dataset_init PROC
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
init_block_loop:
|
||||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
db 232 ;# 0xE8 = call
|
||||
dd 32768 - distance
|
||||
distance equ $ - offset randomx_dataset_init
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
add rbp, 1
|
||||
add rsi, 64
|
||||
cmp rbp, qword ptr [rsp]
|
||||
jb init_block_loop
|
||||
pop r9
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
randomx_dataset_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_store.inc
|
||||
include asm/program_epilogue_win64.inc
|
||||
randomx_program_epilogue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_load PROC
|
||||
include asm/program_sshash_load.inc
|
||||
randomx_sshash_load ENDP
|
||||
|
||||
randomx_sshash_prefetch PROC
|
||||
include asm/program_sshash_prefetch.inc
|
||||
randomx_sshash_prefetch ENDP
|
||||
|
||||
randomx_sshash_end PROC
|
||||
nop
|
||||
randomx_sshash_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_init PROC
|
||||
lea r8, [rbx+1]
|
||||
include asm/program_sshash_prefetch.inc
|
||||
imul r8, qword ptr [r0_mul]
|
||||
mov r9, qword ptr [r1_add]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_add]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_add]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_add]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_add]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_add]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_add]
|
||||
xor r15, r8
|
||||
jmp randomx_program_end
|
||||
randomx_sshash_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_sshash_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_end PROC
|
||||
nop
|
||||
randomx_program_end ENDP
|
||||
|
||||
randomx_reciprocal_fast PROC
|
||||
include asm/randomx_reciprocal.inc
|
||||
randomx_reciprocal_fast ENDP
|
||||
|
||||
_RANDOMX_JITX86_STATIC ENDS
|
||||
|
||||
ENDIF
|
||||
|
||||
END
|
48
src/crypto/randomx/jit_compiler_x86_static.hpp
Normal file
48
src/crypto/randomx/jit_compiler_x86_static.hpp
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_loop_begin();
|
||||
void randomx_program_loop_load();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_dataset_init();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_sshash_load();
|
||||
void randomx_sshash_prefetch();
|
||||
void randomx_sshash_end();
|
||||
void randomx_sshash_init();
|
||||
void randomx_program_end();
|
||||
}
|
60
src/crypto/randomx/program.hpp
Normal file
60
src/crypto/randomx/program.hpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "common.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
struct ProgramConfiguration {
|
||||
uint64_t eMask[2];
|
||||
uint32_t readReg0, readReg1, readReg2, readReg3;
|
||||
};
|
||||
|
||||
class Program {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
uint64_t getEntropy(int i) {
|
||||
return load64(&entropyBuffer[i]);
|
||||
}
|
||||
uint32_t getSize() {
|
||||
return RandomX_CurrentConfig.ProgramSize;
|
||||
}
|
||||
private:
|
||||
uint64_t entropyBuffer[16];
|
||||
Instruction programBuffer[RANDOMX_PROGRAM_MAX_SIZE];
|
||||
};
|
||||
|
||||
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program");
|
||||
}
|
476
src/crypto/randomx/randomx.cpp
Normal file
476
src/crypto/randomx/randomx.cpp
Normal file
|
@ -0,0 +1,476 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "randomx.h"
|
||||
#include "dataset.hpp"
|
||||
#include "vm_interpreted.hpp"
|
||||
#include "vm_interpreted_light.hpp"
|
||||
#include "vm_compiled.hpp"
|
||||
#include "vm_compiled_light.hpp"
|
||||
#include "blake2/blake2.h"
|
||||
#include "jit_compiler_x86_static.hpp"
|
||||
#include <cassert>
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
{
|
||||
ArgonSalt = "RandomWOW\x01";
|
||||
ProgramIterations = 1024;
|
||||
ProgramCount = 16;
|
||||
ScratchpadL2_Size = 131072;
|
||||
ScratchpadL3_Size = 1048576;
|
||||
|
||||
RANDOMX_FREQ_IROR_R = 10;
|
||||
RANDOMX_FREQ_IROL_R = 0;
|
||||
RANDOMX_FREQ_FSWAP_R = 8;
|
||||
RANDOMX_FREQ_FADD_R = 20;
|
||||
RANDOMX_FREQ_FSUB_R = 20;
|
||||
RANDOMX_FREQ_FMUL_R = 20;
|
||||
|
||||
fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d);
|
||||
fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450);
|
||||
fillAes4Rx4_Key[2] = rx_set_int_vec_i128(0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4);
|
||||
fillAes4Rx4_Key[3] = rx_set_int_vec_i128(0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a);
|
||||
fillAes4Rx4_Key[4] = fillAes4Rx4_Key[0];
|
||||
fillAes4Rx4_Key[5] = fillAes4Rx4_Key[1];
|
||||
fillAes4Rx4_Key[6] = fillAes4Rx4_Key[2];
|
||||
fillAes4Rx4_Key[7] = fillAes4Rx4_Key[3];
|
||||
}
|
||||
|
||||
RandomX_ConfigurationLoki::RandomX_ConfigurationLoki()
|
||||
{
|
||||
ArgonIterations = 4;
|
||||
ArgonLanes = 2;
|
||||
ArgonSalt = "RandomXL\x12";
|
||||
ProgramSize = 320;
|
||||
ProgramCount = 7;
|
||||
}
|
||||
|
||||
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||
: ArgonMemory(262144)
|
||||
, ArgonIterations(3)
|
||||
, ArgonLanes(1)
|
||||
, ArgonSalt("RandomX\x03")
|
||||
, CacheAccesses(8)
|
||||
, SuperscalarLatency(170)
|
||||
, DatasetBaseSize(2147483648)
|
||||
, DatasetExtraSize(33554368)
|
||||
, ScratchpadL1_Size(16384)
|
||||
, ScratchpadL2_Size(262144)
|
||||
, ScratchpadL3_Size(2097152)
|
||||
, ProgramSize(256)
|
||||
, ProgramIterations(2048)
|
||||
, ProgramCount(8)
|
||||
, JumpBits(8)
|
||||
, JumpOffset(8)
|
||||
, RANDOMX_FREQ_IADD_RS(25)
|
||||
, RANDOMX_FREQ_IADD_M(7)
|
||||
, RANDOMX_FREQ_ISUB_R(16)
|
||||
, RANDOMX_FREQ_ISUB_M(7)
|
||||
, RANDOMX_FREQ_IMUL_R(16)
|
||||
, RANDOMX_FREQ_IMUL_M(4)
|
||||
, RANDOMX_FREQ_IMULH_R(4)
|
||||
, RANDOMX_FREQ_IMULH_M(1)
|
||||
, RANDOMX_FREQ_ISMULH_R(4)
|
||||
, RANDOMX_FREQ_ISMULH_M(1)
|
||||
, RANDOMX_FREQ_IMUL_RCP(8)
|
||||
, RANDOMX_FREQ_INEG_R(2)
|
||||
, RANDOMX_FREQ_IXOR_R(15)
|
||||
, RANDOMX_FREQ_IXOR_M(5)
|
||||
, RANDOMX_FREQ_IROR_R(8)
|
||||
, RANDOMX_FREQ_IROL_R(2)
|
||||
, RANDOMX_FREQ_ISWAP_R(4)
|
||||
, RANDOMX_FREQ_FSWAP_R(4)
|
||||
, RANDOMX_FREQ_FADD_R(16)
|
||||
, RANDOMX_FREQ_FADD_M(5)
|
||||
, RANDOMX_FREQ_FSUB_R(16)
|
||||
, RANDOMX_FREQ_FSUB_M(5)
|
||||
, RANDOMX_FREQ_FSCAL_R(6)
|
||||
, RANDOMX_FREQ_FMUL_R(32)
|
||||
, RANDOMX_FREQ_FDIV_M(4)
|
||||
, RANDOMX_FREQ_FSQRT_R(6)
|
||||
, RANDOMX_FREQ_CBRANCH(16)
|
||||
, RANDOMX_FREQ_CFROUND(1)
|
||||
, RANDOMX_FREQ_ISTORE(16)
|
||||
, RANDOMX_FREQ_NOP(0)
|
||||
{
|
||||
fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd);
|
||||
fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450);
|
||||
fillAes4Rx4_Key[2] = rx_set_int_vec_i128(0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904);
|
||||
fillAes4Rx4_Key[3] = rx_set_int_vec_i128(0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763);
|
||||
fillAes4Rx4_Key[4] = rx_set_int_vec_i128(0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73);
|
||||
fillAes4Rx4_Key[5] = rx_set_int_vec_i128(0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3);
|
||||
fillAes4Rx4_Key[6] = rx_set_int_vec_i128(0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7);
|
||||
fillAes4Rx4_Key[7] = rx_set_int_vec_i128(0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_sshash_prefetch;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_sshash_end;
|
||||
memcpy(codeShhPrefetchTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
memcpy(codeReadDatasetLightSshInitTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = (const uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* b = (const uint8_t*)&randomx_program_start;
|
||||
memcpy(codeLoopLoadTweaked, a, b - a);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void RandomX_ConfigurationBase::Apply()
|
||||
{
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||
const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
|
||||
*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
|
||||
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
||||
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||
#endif
|
||||
|
||||
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
|
||||
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
|
||||
|
||||
ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
||||
ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
||||
ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
|
||||
ScratchpadL2Mask16_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) / 2 - 1) * 16;
|
||||
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
||||
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
*(uint32_t*)(codeLoopLoadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(codeLoopLoadTweaked + 50) = ScratchpadL3Mask64_Calculated;
|
||||
#endif
|
||||
|
||||
ConditionMask_Calculated = (1 << JumpBits) - 1;
|
||||
|
||||
constexpr int CEIL_NULL = 0;
|
||||
int k = 0;
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
|
||||
#else
|
||||
#define JIT_HANDLE(x, prev)
|
||||
#endif
|
||||
|
||||
#define INST_HANDLE(x, prev) \
|
||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
||||
|
||||
INST_HANDLE(IADD_RS, NULL);
|
||||
INST_HANDLE(IADD_M, IADD_RS);
|
||||
INST_HANDLE(ISUB_R, IADD_M);
|
||||
INST_HANDLE(ISUB_M, ISUB_R);
|
||||
INST_HANDLE(IMUL_R, ISUB_M);
|
||||
INST_HANDLE(IMUL_M, IMUL_R);
|
||||
INST_HANDLE(IMULH_R, IMUL_M);
|
||||
INST_HANDLE(IMULH_M, IMULH_R);
|
||||
INST_HANDLE(ISMULH_R, IMULH_M);
|
||||
INST_HANDLE(ISMULH_M, ISMULH_R);
|
||||
INST_HANDLE(IMUL_RCP, ISMULH_M);
|
||||
INST_HANDLE(INEG_R, IMUL_RCP);
|
||||
INST_HANDLE(IXOR_R, INEG_R);
|
||||
INST_HANDLE(IXOR_M, IXOR_R);
|
||||
INST_HANDLE(IROR_R, IXOR_M);
|
||||
INST_HANDLE(IROL_R, IROR_R);
|
||||
INST_HANDLE(ISWAP_R, IROL_R);
|
||||
INST_HANDLE(FSWAP_R, ISWAP_R);
|
||||
INST_HANDLE(FADD_R, FSWAP_R);
|
||||
INST_HANDLE(FADD_M, FADD_R);
|
||||
INST_HANDLE(FSUB_R, FADD_M);
|
||||
INST_HANDLE(FSUB_M, FSUB_R);
|
||||
INST_HANDLE(FSCAL_R, FSUB_M);
|
||||
INST_HANDLE(FMUL_R, FSCAL_R);
|
||||
INST_HANDLE(FDIV_M, FMUL_R);
|
||||
INST_HANDLE(FSQRT_R, FDIV_M);
|
||||
INST_HANDLE(CBRANCH, FSQRT_R);
|
||||
INST_HANDLE(CFROUND, CBRANCH);
|
||||
INST_HANDLE(ISTORE, CFROUND);
|
||||
INST_HANDLE(NOP, ISTORE);
|
||||
#undef INST_HANDLE
|
||||
}
|
||||
|
||||
RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||
RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||
RandomX_ConfigurationLoki RandomX_LokiConfig;
|
||||
|
||||
RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
|
||||
extern "C" {
|
||||
|
||||
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
|
||||
randomx_cache *cache;
|
||||
|
||||
try {
|
||||
cache = new randomx_cache();
|
||||
switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) {
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
|
||||
cache->jit = nullptr;
|
||||
cache->initialize = &randomx::initCache;
|
||||
cache->datasetInit = &randomx::initDataset;
|
||||
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
|
||||
cache->jit = new randomx::JitCompiler();
|
||||
cache->initialize = &randomx::initCacheCompile;
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
|
||||
cache->jit = nullptr;
|
||||
cache->initialize = &randomx::initCache;
|
||||
cache->datasetInit = &randomx::initDataset;
|
||||
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
|
||||
cache->jit = new randomx::JitCompiler();
|
||||
cache->initialize = &randomx::initCacheCompile;
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_CACHE_MAX_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
if (cache != nullptr) {
|
||||
randomx_release_cache(cache);
|
||||
cache = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
|
||||
assert(cache != nullptr);
|
||||
assert(keySize == 0 || key != nullptr);
|
||||
cache->initialize(cache, key, keySize);
|
||||
}
|
||||
|
||||
void randomx_release_cache(randomx_cache* cache) {
|
||||
assert(cache != nullptr);
|
||||
cache->dealloc(cache);
|
||||
delete cache;
|
||||
}
|
||||
|
||||
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
|
||||
randomx_dataset *dataset;
|
||||
|
||||
try {
|
||||
dataset = new randomx_dataset();
|
||||
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
|
||||
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
|
||||
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
|
||||
}
|
||||
else {
|
||||
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
|
||||
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(RANDOMX_DATASET_MAX_SIZE);
|
||||
}
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
if (dataset != nullptr) {
|
||||
randomx_release_dataset(dataset);
|
||||
dataset = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return dataset;
|
||||
}
|
||||
|
||||
#define DatasetItemCount ((RandomX_CurrentConfig.DatasetBaseSize + RandomX_CurrentConfig.DatasetExtraSize) / RANDOMX_DATASET_ITEM_SIZE)
|
||||
|
||||
unsigned long randomx_dataset_item_count() {
|
||||
return DatasetItemCount;
|
||||
}
|
||||
|
||||
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
|
||||
assert(dataset != nullptr);
|
||||
assert(cache != nullptr);
|
||||
assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
|
||||
assert(startItem + itemCount <= DatasetItemCount);
|
||||
cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
|
||||
}
|
||||
|
||||
void *randomx_get_dataset_memory(randomx_dataset *dataset) {
|
||||
assert(dataset != nullptr);
|
||||
return dataset->memory;
|
||||
}
|
||||
|
||||
void randomx_release_dataset(randomx_dataset *dataset) {
|
||||
assert(dataset != nullptr);
|
||||
dataset->dealloc(dataset);
|
||||
delete dataset;
|
||||
}
|
||||
|
||||
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) {
|
||||
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
|
||||
assert(cache == nullptr || cache->isInitialized());
|
||||
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
|
||||
|
||||
randomx_vm *vm = nullptr;
|
||||
|
||||
try {
|
||||
switch (flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES)) {
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
vm = new randomx::InterpretedLightVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM:
|
||||
vm = new randomx::InterpretedVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
vm = new randomx::CompiledLightVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
|
||||
vm = new randomx::CompiledVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::InterpretedLightVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::InterpretedVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::CompiledLightVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::CompiledVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedLightVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::CompiledLightVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::CompiledVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedLightVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::CompiledLightVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::CompiledVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
if(cache != nullptr)
|
||||
vm->setCache(cache);
|
||||
|
||||
if(dataset != nullptr)
|
||||
vm->setDataset(dataset);
|
||||
|
||||
vm->allocate();
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
delete vm;
|
||||
vm = nullptr;
|
||||
}
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
|
||||
assert(machine != nullptr);
|
||||
assert(cache != nullptr && cache->isInitialized());
|
||||
machine->setCache(cache);
|
||||
}
|
||||
|
||||
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
|
||||
assert(machine != nullptr);
|
||||
assert(dataset != nullptr);
|
||||
machine->setDataset(dataset);
|
||||
}
|
||||
|
||||
void randomx_destroy_vm(randomx_vm *machine) {
|
||||
assert(machine != nullptr);
|
||||
delete machine;
|
||||
}
|
||||
|
||||
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
|
||||
assert(machine != nullptr);
|
||||
assert(inputSize == 0 || input != nullptr);
|
||||
assert(output != nullptr);
|
||||
alignas(16) uint64_t tempHash[8];
|
||||
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
assert(blakeResult == 0);
|
||||
machine->initScratchpad(&tempHash);
|
||||
machine->resetRoundingMode();
|
||||
for (int chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
assert(blakeResult == 0);
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
}
|
||||
|
||||
}
|
332
src/crypto/randomx/randomx.h
Normal file
332
src/crypto/randomx/randomx.h
Normal file
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef RANDOMX_H
|
||||
#define RANDOMX_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <type_traits>
|
||||
#include "intrin_portable.h"
|
||||
|
||||
#define RANDOMX_HASH_SIZE 32
|
||||
#define RANDOMX_DATASET_ITEM_SIZE 64
|
||||
|
||||
#ifndef RANDOMX_EXPORT
|
||||
#define RANDOMX_EXPORT
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
RANDOMX_FLAG_DEFAULT = 0,
|
||||
RANDOMX_FLAG_LARGE_PAGES = 1,
|
||||
RANDOMX_FLAG_HARD_AES = 2,
|
||||
RANDOMX_FLAG_FULL_MEM = 4,
|
||||
RANDOMX_FLAG_JIT = 8,
|
||||
} randomx_flags;
|
||||
|
||||
typedef struct randomx_dataset randomx_dataset;
|
||||
typedef struct randomx_cache randomx_cache;
|
||||
typedef struct randomx_vm randomx_vm;
|
||||
|
||||
struct RandomX_ConfigurationBase
|
||||
{
|
||||
RandomX_ConfigurationBase();
|
||||
|
||||
void Apply();
|
||||
|
||||
uint32_t ArgonMemory;
|
||||
uint32_t ArgonIterations;
|
||||
uint32_t ArgonLanes;
|
||||
const char* ArgonSalt;
|
||||
uint32_t CacheAccesses;
|
||||
uint32_t SuperscalarLatency;
|
||||
|
||||
uint32_t DatasetBaseSize;
|
||||
uint32_t DatasetExtraSize;
|
||||
|
||||
uint32_t ScratchpadL1_Size;
|
||||
uint32_t ScratchpadL2_Size;
|
||||
uint32_t ScratchpadL3_Size;
|
||||
|
||||
uint32_t ProgramSize;
|
||||
uint32_t ProgramIterations;
|
||||
uint32_t ProgramCount;
|
||||
|
||||
uint32_t JumpBits;
|
||||
uint32_t JumpOffset;
|
||||
|
||||
uint32_t RANDOMX_FREQ_IADD_RS;
|
||||
uint32_t RANDOMX_FREQ_IADD_M;
|
||||
uint32_t RANDOMX_FREQ_ISUB_R;
|
||||
uint32_t RANDOMX_FREQ_ISUB_M;
|
||||
uint32_t RANDOMX_FREQ_IMUL_R;
|
||||
uint32_t RANDOMX_FREQ_IMUL_M;
|
||||
uint32_t RANDOMX_FREQ_IMULH_R;
|
||||
uint32_t RANDOMX_FREQ_IMULH_M;
|
||||
uint32_t RANDOMX_FREQ_ISMULH_R;
|
||||
uint32_t RANDOMX_FREQ_ISMULH_M;
|
||||
uint32_t RANDOMX_FREQ_IMUL_RCP;
|
||||
uint32_t RANDOMX_FREQ_INEG_R;
|
||||
uint32_t RANDOMX_FREQ_IXOR_R;
|
||||
uint32_t RANDOMX_FREQ_IXOR_M;
|
||||
uint32_t RANDOMX_FREQ_IROR_R;
|
||||
uint32_t RANDOMX_FREQ_IROL_R;
|
||||
uint32_t RANDOMX_FREQ_ISWAP_R;
|
||||
uint32_t RANDOMX_FREQ_FSWAP_R;
|
||||
uint32_t RANDOMX_FREQ_FADD_R;
|
||||
uint32_t RANDOMX_FREQ_FADD_M;
|
||||
uint32_t RANDOMX_FREQ_FSUB_R;
|
||||
uint32_t RANDOMX_FREQ_FSUB_M;
|
||||
uint32_t RANDOMX_FREQ_FSCAL_R;
|
||||
uint32_t RANDOMX_FREQ_FMUL_R;
|
||||
uint32_t RANDOMX_FREQ_FDIV_M;
|
||||
uint32_t RANDOMX_FREQ_FSQRT_R;
|
||||
uint32_t RANDOMX_FREQ_CBRANCH;
|
||||
uint32_t RANDOMX_FREQ_CFROUND;
|
||||
uint32_t RANDOMX_FREQ_ISTORE;
|
||||
uint32_t RANDOMX_FREQ_NOP;
|
||||
|
||||
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||
|
||||
uint8_t codeShhPrefetchTweaked[20];
|
||||
uint8_t codeReadDatasetTweaked[64];
|
||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||
uint8_t codeLoopLoadTweaked[140];
|
||||
|
||||
uint32_t CacheLineAlignMask_Calculated;
|
||||
uint32_t DatasetExtraItems_Calculated;
|
||||
|
||||
uint32_t ScratchpadL1Mask_Calculated;
|
||||
uint32_t ScratchpadL1Mask16_Calculated;
|
||||
uint32_t ScratchpadL2Mask_Calculated;
|
||||
uint32_t ScratchpadL2Mask16_Calculated;
|
||||
uint32_t ScratchpadL3Mask_Calculated;
|
||||
uint32_t ScratchpadL3Mask64_Calculated;
|
||||
|
||||
uint32_t ConditionMask_Calculated;
|
||||
|
||||
int CEIL_IADD_RS;
|
||||
int CEIL_IADD_M;
|
||||
int CEIL_ISUB_R;
|
||||
int CEIL_ISUB_M;
|
||||
int CEIL_IMUL_R;
|
||||
int CEIL_IMUL_M;
|
||||
int CEIL_IMULH_R;
|
||||
int CEIL_IMULH_M;
|
||||
int CEIL_ISMULH_R;
|
||||
int CEIL_ISMULH_M;
|
||||
int CEIL_IMUL_RCP;
|
||||
int CEIL_INEG_R;
|
||||
int CEIL_IXOR_R;
|
||||
int CEIL_IXOR_M;
|
||||
int CEIL_IROR_R;
|
||||
int CEIL_IROL_R;
|
||||
int CEIL_ISWAP_R;
|
||||
int CEIL_FSWAP_R;
|
||||
int CEIL_FADD_R;
|
||||
int CEIL_FADD_M;
|
||||
int CEIL_FSUB_R;
|
||||
int CEIL_FSUB_M;
|
||||
int CEIL_FSCAL_R;
|
||||
int CEIL_FMUL_R;
|
||||
int CEIL_FDIV_M;
|
||||
int CEIL_FSQRT_R;
|
||||
int CEIL_CBRANCH;
|
||||
int CEIL_CFROUND;
|
||||
int CEIL_ISTORE;
|
||||
int CEIL_NOP;
|
||||
};
|
||||
|
||||
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
||||
struct RandomX_ConfigurationWownero : public RandomX_ConfigurationBase { RandomX_ConfigurationWownero(); };
|
||||
struct RandomX_ConfigurationLoki : public RandomX_ConfigurationBase { RandomX_ConfigurationLoki(); };
|
||||
|
||||
extern RandomX_ConfigurationMonero RandomX_MoneroConfig;
|
||||
extern RandomX_ConfigurationWownero RandomX_WowneroConfig;
|
||||
extern RandomX_ConfigurationLoki RandomX_LokiConfig;
|
||||
|
||||
extern RandomX_ConfigurationBase RandomX_CurrentConfig;
|
||||
|
||||
template<typename T>
|
||||
void randomx_apply_config(const T& config)
|
||||
{
|
||||
static_assert(sizeof(T) == sizeof(RandomX_ConfigurationBase), "Invalid RandomX configuration struct size");
|
||||
static_assert(std::is_base_of<RandomX_ConfigurationBase, T>::value, "Incompatible RandomX configuration struct");
|
||||
RandomX_CurrentConfig = config;
|
||||
RandomX_CurrentConfig.Apply();
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Creates a randomx_cache structure and allocates memory for RandomX Cache.
|
||||
*
|
||||
* @param flags is any combination of these 2 flags (each flag can be set or not set):
|
||||
* RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
|
||||
* RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
|
||||
* subsequent Dataset initialization faster
|
||||
*
|
||||
* @return Pointer to an allocated randomx_cache structure.
|
||||
* NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT
|
||||
* is set and JIT compilation is not supported on the current platform.
|
||||
*/
|
||||
RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags);
|
||||
|
||||
/**
|
||||
* Initializes the cache memory and SuperscalarHash using the provided key value.
|
||||
*
|
||||
* @param cache is a pointer to a previously allocated randomx_cache structure. Must not be NULL.
|
||||
* @param key is a pointer to memory which contains the key value. Must not be NULL.
|
||||
* @param keySize is the number of bytes of the key.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize);
|
||||
|
||||
/**
|
||||
* Releases all memory occupied by the randomx_cache structure.
|
||||
*
|
||||
* @param cache is a pointer to a previously allocated randomx_cache structure.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache);
|
||||
|
||||
/**
|
||||
* Creates a randomx_dataset structure and allocates memory for RandomX Dataset.
|
||||
*
|
||||
* @param flags is the initialization flags. Only one flag is supported (can be set or not set):
|
||||
* RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
|
||||
*
|
||||
* @return Pointer to an allocated randomx_dataset structure.
|
||||
* NULL is returned if memory allocation fails.
|
||||
*/
|
||||
RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags);
|
||||
|
||||
/**
|
||||
* Gets the number of items contained in the dataset.
|
||||
*
|
||||
* @return the number of items contained in the dataset.
|
||||
*/
|
||||
RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void);
|
||||
|
||||
/**
|
||||
* Initializes dataset items.
|
||||
*
|
||||
* Note: In order to use the Dataset, all items from 0 to (randomx_dataset_item_count() - 1) must be initialized.
|
||||
* This may be done by several calls to this function using non-overlapping item sequences.
|
||||
*
|
||||
* @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL.
|
||||
* @param cache is a pointer to a previously allocated and initialized randomx_cache structure. Must not be NULL.
|
||||
* @param startItem is the item number where intialization should start.
|
||||
* @param itemCount is the number of items that should be initialized.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount);
|
||||
|
||||
/**
|
||||
* Returns a pointer to the internal memory buffer of the dataset structure. The size
|
||||
* of the internal memory buffer is randomx_dataset_item_count() * RANDOMX_DATASET_ITEM_SIZE.
|
||||
*
|
||||
* @param dataset is dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL.
|
||||
*
|
||||
* @return Pointer to the internal memory buffer of the dataset structure.
|
||||
*/
|
||||
RANDOMX_EXPORT void *randomx_get_dataset_memory(randomx_dataset *dataset);
|
||||
|
||||
/**
|
||||
* Releases all memory occupied by the randomx_dataset structure.
|
||||
*
|
||||
* @param dataset is a pointer to a previously allocated randomx_dataset structure.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset);
|
||||
|
||||
/**
|
||||
* Creates and initializes a RandomX virtual machine.
|
||||
*
|
||||
* @param flags is any combination of these 4 flags (each flag can be set or not set):
|
||||
* RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages
|
||||
* RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES
|
||||
* RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset
|
||||
* RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler
|
||||
* The numeric values of the flags are ordered so that a higher value will provide
|
||||
* faster hash calculation and a lower numeric value will provide higher portability.
|
||||
* Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest.
|
||||
* @param cache is a pointer to an initialized randomx_cache structure. Can be
|
||||
* NULL if RANDOMX_FLAG_FULL_MEM is set.
|
||||
* @param dataset is a pointer to a randomx_dataset structure. Can be NULL
|
||||
* if RANDOMX_FLAG_FULL_MEM is not set.
|
||||
*
|
||||
* @return Pointer to an initialized randomx_vm structure.
|
||||
* Returns NULL if:
|
||||
* (1) Scratchpad memory allocation fails.
|
||||
* (2) The requested initialization flags are not supported on the current platform.
|
||||
* (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
|
||||
* (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
|
||||
*/
|
||||
RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset);
|
||||
|
||||
/**
|
||||
* Reinitializes a virtual machine with a new Cache. This function should be called anytime
|
||||
* the Cache is reinitialized with a new key.
|
||||
*
|
||||
* @param machine is a pointer to a randomx_vm structure that was initialized
|
||||
* without RANDOMX_FLAG_FULL_MEM. Must not be NULL.
|
||||
* @param cache is a pointer to an initialized randomx_cache structure. Must not be NULL.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache);
|
||||
|
||||
/**
|
||||
* Reinitializes a virtual machine with a new Dataset.
|
||||
*
|
||||
* @param machine is a pointer to a randomx_vm structure that was initialized
|
||||
* with RANDOMX_FLAG_FULL_MEM. Must not be NULL.
|
||||
* @param dataset is a pointer to an initialized randomx_dataset structure. Must not be NULL.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset);
|
||||
|
||||
/**
|
||||
* Releases all memory occupied by the randomx_vm structure.
|
||||
*
|
||||
* @param machine is a pointer to a previously created randomx_vm structure.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
|
||||
|
||||
/**
|
||||
* Calculates a RandomX hash value.
|
||||
*
|
||||
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
|
||||
* @param input is a pointer to memory to be hashed. Must not be NULL.
|
||||
* @param inputSize is the number of bytes to be hashed.
|
||||
* @param output is a pointer to memory where the hash will be stored. Must not
|
||||
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
80
src/crypto/randomx/reciprocal.c
Normal file
80
src/crypto/randomx/reciprocal.c
Normal file
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "reciprocal.h"
|
||||
|
||||
/*
|
||||
Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
|
||||
divisor must not be 0 or a power of 2
|
||||
|
||||
Equivalent x86 assembly (divisor in rcx):
|
||||
|
||||
mov edx, 1
|
||||
mov r8, rcx
|
||||
xor eax, eax
|
||||
bsr rcx, rcx
|
||||
shl rdx, cl
|
||||
div r8
|
||||
ret
|
||||
|
||||
*/
|
||||
uint64_t randomx_reciprocal(uint64_t divisor) {
|
||||
|
||||
assert(divisor != 0);
|
||||
|
||||
const uint64_t p2exp63 = 1ULL << 63;
|
||||
|
||||
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
|
||||
|
||||
unsigned bsr = 0; //highest set bit in divisor
|
||||
|
||||
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
|
||||
bsr++;
|
||||
|
||||
for (unsigned shift = 0; shift < bsr; shift++) {
|
||||
if (remainder >= divisor - remainder) {
|
||||
quotient = quotient * 2 + 1;
|
||||
remainder = remainder * 2 - divisor;
|
||||
}
|
||||
else {
|
||||
quotient = quotient * 2;
|
||||
remainder = remainder * 2;
|
||||
}
|
||||
}
|
||||
|
||||
return quotient;
|
||||
}
|
||||
|
||||
#if !RANDOMX_HAVE_FAST_RECIPROCAL
|
||||
|
||||
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
|
||||
return randomx_reciprocal(divisor);
|
||||
}
|
||||
|
||||
#endif
|
48
src/crypto/randomx/reciprocal.h
Normal file
48
src/crypto/randomx/reciprocal.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define RANDOMX_HAVE_FAST_RECIPROCAL 1
|
||||
#else
|
||||
#define RANDOMX_HAVE_FAST_RECIPROCAL 0
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint64_t randomx_reciprocal(uint64_t);
|
||||
uint64_t randomx_reciprocal_fast(uint64_t);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
364
src/crypto/randomx/soft_aes.cpp
Normal file
364
src/crypto/randomx/soft_aes.cpp
Normal file
|
@ -0,0 +1,364 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "soft_aes.h"
|
||||
|
||||
alignas(16) const uint8_t sbox[256] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutEnc0[256] = {
|
||||
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
|
||||
0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
|
||||
0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
|
||||
0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
|
||||
0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
|
||||
0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
|
||||
0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
|
||||
0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
|
||||
0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
|
||||
0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
|
||||
0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
|
||||
0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
|
||||
0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
|
||||
0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
|
||||
0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
|
||||
0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
|
||||
0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
|
||||
0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
|
||||
0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
|
||||
0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
|
||||
0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
|
||||
0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
|
||||
0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
|
||||
0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
|
||||
0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
|
||||
0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
|
||||
0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
|
||||
0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
|
||||
0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
|
||||
0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
|
||||
0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
|
||||
0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutEnc1[256] = {
|
||||
0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154,
|
||||
0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a,
|
||||
0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
|
||||
0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b,
|
||||
0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f,
|
||||
0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
|
||||
0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5,
|
||||
0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f,
|
||||
0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
|
||||
0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397,
|
||||
0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed,
|
||||
0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
|
||||
0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194,
|
||||
0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3,
|
||||
0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
|
||||
0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d,
|
||||
0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39,
|
||||
0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
|
||||
0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83,
|
||||
0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76,
|
||||
0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
|
||||
0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b,
|
||||
0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0,
|
||||
0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
|
||||
0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751,
|
||||
0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85,
|
||||
0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
|
||||
0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9,
|
||||
0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7,
|
||||
0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
|
||||
0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8,
|
||||
0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutEnc2[256] = {
|
||||
0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
|
||||
0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
|
||||
0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
|
||||
0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
|
||||
0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
|
||||
0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
|
||||
0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
|
||||
0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
|
||||
0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
|
||||
0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
|
||||
0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
|
||||
0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
|
||||
0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
|
||||
0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
|
||||
0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
|
||||
0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
|
||||
0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
|
||||
0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
|
||||
0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
|
||||
0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
|
||||
0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
|
||||
0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
|
||||
0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
|
||||
0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
|
||||
0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
|
||||
0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
|
||||
0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
|
||||
0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
|
||||
0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
|
||||
0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
|
||||
0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
|
||||
0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutEnc3[256] = {
|
||||
0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5,
|
||||
0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676,
|
||||
0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
|
||||
0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0,
|
||||
0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc,
|
||||
0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
|
||||
0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a,
|
||||
0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575,
|
||||
0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
|
||||
0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484,
|
||||
0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b,
|
||||
0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
|
||||
0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585,
|
||||
0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8,
|
||||
0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
|
||||
0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2,
|
||||
0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717,
|
||||
0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
|
||||
0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888,
|
||||
0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb,
|
||||
0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
|
||||
0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979,
|
||||
0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9,
|
||||
0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
|
||||
0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6,
|
||||
0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a,
|
||||
0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
|
||||
0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e,
|
||||
0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494,
|
||||
0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
|
||||
0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868,
|
||||
0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutDec0[256] = {
|
||||
0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
|
||||
0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
|
||||
0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
|
||||
0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
|
||||
0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
|
||||
0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
|
||||
0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
|
||||
0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
|
||||
0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
|
||||
0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
|
||||
0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
|
||||
0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
|
||||
0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
|
||||
0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
|
||||
0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
|
||||
0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
|
||||
0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
|
||||
0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
|
||||
0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
|
||||
0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
|
||||
0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
|
||||
0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
|
||||
0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
|
||||
0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
|
||||
0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
|
||||
0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
|
||||
0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
|
||||
0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
|
||||
0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
|
||||
0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
|
||||
0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
|
||||
0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutDec1[256] = {
|
||||
0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93,
|
||||
0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f,
|
||||
0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
|
||||
0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44,
|
||||
0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4,
|
||||
0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
|
||||
0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a,
|
||||
0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c,
|
||||
0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
|
||||
0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51,
|
||||
0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff,
|
||||
0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
|
||||
0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e,
|
||||
0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a,
|
||||
0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
|
||||
0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8,
|
||||
0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34,
|
||||
0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
|
||||
0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0,
|
||||
0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef,
|
||||
0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
|
||||
0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5,
|
||||
0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b,
|
||||
0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
|
||||
0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0,
|
||||
0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f,
|
||||
0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
|
||||
0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13,
|
||||
0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c,
|
||||
0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
|
||||
0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41,
|
||||
0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutDec2[256] = {
|
||||
0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303,
|
||||
0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3,
|
||||
0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
|
||||
0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8,
|
||||
0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a,
|
||||
0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
|
||||
0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab,
|
||||
0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82,
|
||||
0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
|
||||
0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110,
|
||||
0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15,
|
||||
0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
|
||||
0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72,
|
||||
0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e,
|
||||
0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
|
||||
0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9,
|
||||
0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e,
|
||||
0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
|
||||
0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3,
|
||||
0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90,
|
||||
0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
|
||||
0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af,
|
||||
0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb,
|
||||
0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
|
||||
0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066,
|
||||
0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6,
|
||||
0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
|
||||
0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347,
|
||||
0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1,
|
||||
0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
|
||||
0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195,
|
||||
0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257,
|
||||
};
|
||||
|
||||
alignas(16) const uint32_t lutDec3[256] = {
|
||||
0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3,
|
||||
0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362,
|
||||
0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
|
||||
0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9,
|
||||
0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace,
|
||||
0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
|
||||
0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55,
|
||||
0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216,
|
||||
0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
|
||||
0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e,
|
||||
0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550,
|
||||
0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
|
||||
0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a,
|
||||
0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36,
|
||||
0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
|
||||
0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e,
|
||||
0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb,
|
||||
0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
|
||||
0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1,
|
||||
0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033,
|
||||
0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
|
||||
0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3,
|
||||
0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b,
|
||||
0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
|
||||
0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2,
|
||||
0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691,
|
||||
0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
|
||||
0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6,
|
||||
0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147,
|
||||
0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
|
||||
0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d,
|
||||
0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8,
|
||||
};
|
||||
|
||||
rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||
);
|
||||
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
46
src/crypto/randomx/soft_aes.h
Normal file
46
src/crypto/randomx/soft_aes.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "intrin_portable.h"
|
||||
|
||||
rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||
|
||||
rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||
|
||||
template<bool soft>
|
||||
inline rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return soft ? soft_aesenc(in, key) : rx_aesenc_vec_i128(in, key);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
inline rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
return soft ? soft_aesdec(in, key) : rx_aesdec_vec_i128(in, key);
|
||||
}
|
891
src/crypto/randomx/superscalar.cpp
Normal file
891
src/crypto/randomx/superscalar.cpp
Normal file
|
@ -0,0 +1,891 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "configuration.h"
|
||||
#include "program.hpp"
|
||||
#include "blake2/endian.h"
|
||||
#include "superscalar.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "reciprocal.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static bool isMultiplication(SuperscalarInstructionType type) {
|
||||
return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
|
||||
}
|
||||
|
||||
//uOPs (micro-ops) are represented only by the execution port they can go to
|
||||
namespace ExecutionPort {
|
||||
using type = int;
|
||||
constexpr type Null = 0;
|
||||
constexpr type P0 = 1;
|
||||
constexpr type P1 = 2;
|
||||
constexpr type P5 = 4;
|
||||
constexpr type P01 = P0 | P1;
|
||||
constexpr type P05 = P0 | P5;
|
||||
constexpr type P015 = P0 | P1 | P5;
|
||||
}
|
||||
|
||||
//Macro-operation as output of the x86 decoder
|
||||
//Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
|
||||
//Macro-op can consist of 1 or 2 uOPs.
|
||||
class MacroOp {
|
||||
public:
|
||||
MacroOp(const char* name, int size)
|
||||
: name_(name), size_(size), latency_(0), uop1_(ExecutionPort::Null), uop2_(ExecutionPort::Null) {}
|
||||
MacroOp(const char* name, int size, int latency, ExecutionPort::type uop)
|
||||
: name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {}
|
||||
MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2)
|
||||
: name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {}
|
||||
MacroOp(const MacroOp& parent, bool dependent)
|
||||
: name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {}
|
||||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
int getSize() const {
|
||||
return size_;
|
||||
}
|
||||
int getLatency() const {
|
||||
return latency_;
|
||||
}
|
||||
ExecutionPort::type getUop1() const {
|
||||
return uop1_;
|
||||
}
|
||||
ExecutionPort::type getUop2() const {
|
||||
return uop2_;
|
||||
}
|
||||
bool isSimple() const {
|
||||
return uop2_ == ExecutionPort::Null;
|
||||
}
|
||||
bool isEliminated() const {
|
||||
return uop1_ == ExecutionPort::Null;
|
||||
}
|
||||
bool isDependent() const {
|
||||
return dependent_;
|
||||
}
|
||||
static const MacroOp Add_rr;
|
||||
static const MacroOp Add_ri;
|
||||
static const MacroOp Lea_sib;
|
||||
static const MacroOp Sub_rr;
|
||||
static const MacroOp Imul_rr;
|
||||
static const MacroOp Imul_r;
|
||||
static const MacroOp Mul_r;
|
||||
static const MacroOp Mov_rr;
|
||||
static const MacroOp Mov_ri64;
|
||||
static const MacroOp Xor_rr;
|
||||
static const MacroOp Xor_ri;
|
||||
static const MacroOp Ror_rcl;
|
||||
static const MacroOp Ror_ri;
|
||||
static const MacroOp TestJz_fused;
|
||||
static const MacroOp Xor_self;
|
||||
static const MacroOp Cmp_ri;
|
||||
static const MacroOp Setcc_r;
|
||||
private:
|
||||
const char* name_;
|
||||
int size_;
|
||||
int latency_;
|
||||
ExecutionPort::type uop1_;
|
||||
ExecutionPort::type uop2_;
|
||||
bool dependent_ = false;
|
||||
};
|
||||
|
||||
//Size: 3 bytes
|
||||
const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Sub_rr = MacroOp("sub r,r", 3, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Xor_rr = MacroOp("xor r,r", 3, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5);
|
||||
const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5);
|
||||
const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3);
|
||||
|
||||
//Size: 4 bytes
|
||||
const MacroOp MacroOp::Lea_sib = MacroOp("lea r,r+r*s", 4, 1, ExecutionPort::P01);
|
||||
const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1);
|
||||
const MacroOp MacroOp::Ror_ri = MacroOp("ror r,i", 4, 1, ExecutionPort::P05);
|
||||
|
||||
//Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes)
|
||||
const MacroOp MacroOp::Add_ri = MacroOp("add r,i", 7, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Xor_ri = MacroOp("xor r,i", 7, 1, ExecutionPort::P015);
|
||||
|
||||
//Size: 10 bytes
|
||||
const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015);
|
||||
|
||||
//Unused:
|
||||
const MacroOp MacroOp::Ror_rcl = MacroOp("ror r,cl", 3, 1, ExecutionPort::P0, ExecutionPort::P5);
|
||||
const MacroOp MacroOp::Xor_self = MacroOp("xor rcx,rcx", 3);
|
||||
const MacroOp MacroOp::Cmp_ri = MacroOp("cmp r,i", 7, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05);
|
||||
const MacroOp MacroOp::TestJz_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5);
|
||||
|
||||
const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr };
|
||||
const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
|
||||
const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
|
||||
|
||||
class SuperscalarInstructionInfo {
|
||||
public:
|
||||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
int getSize() const {
|
||||
return ops_.size();
|
||||
}
|
||||
bool isSimple() const {
|
||||
return getSize() == 1;
|
||||
}
|
||||
int getLatency() const {
|
||||
return latency_;
|
||||
}
|
||||
const MacroOp& getOp(int index) const {
|
||||
return ops_[index];
|
||||
}
|
||||
SuperscalarInstructionType getType() const {
|
||||
return type_;
|
||||
}
|
||||
int getResultOp() const {
|
||||
return resultOp_;
|
||||
}
|
||||
int getDstOp() const {
|
||||
return dstOp_;
|
||||
}
|
||||
int getSrcOp() const {
|
||||
return srcOp_;
|
||||
}
|
||||
static const SuperscalarInstructionInfo ISUB_R;
|
||||
static const SuperscalarInstructionInfo IXOR_R;
|
||||
static const SuperscalarInstructionInfo IADD_RS;
|
||||
static const SuperscalarInstructionInfo IMUL_R;
|
||||
static const SuperscalarInstructionInfo IROR_C;
|
||||
static const SuperscalarInstructionInfo IADD_C7;
|
||||
static const SuperscalarInstructionInfo IXOR_C7;
|
||||
static const SuperscalarInstructionInfo IADD_C8;
|
||||
static const SuperscalarInstructionInfo IXOR_C8;
|
||||
static const SuperscalarInstructionInfo IADD_C9;
|
||||
static const SuperscalarInstructionInfo IXOR_C9;
|
||||
static const SuperscalarInstructionInfo IMULH_R;
|
||||
static const SuperscalarInstructionInfo ISMULH_R;
|
||||
static const SuperscalarInstructionInfo IMUL_RCP;
|
||||
static const SuperscalarInstructionInfo NOP;
|
||||
private:
|
||||
const char* name_;
|
||||
SuperscalarInstructionType type_;
|
||||
std::vector<MacroOp> ops_;
|
||||
int latency_;
|
||||
int resultOp_ = 0;
|
||||
int dstOp_ = 0;
|
||||
int srcOp_;
|
||||
|
||||
SuperscalarInstructionInfo(const char* name)
|
||||
: name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {}
|
||||
SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp)
|
||||
: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) {
|
||||
ops_.push_back(MacroOp(op));
|
||||
}
|
||||
template <size_t N>
|
||||
SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
|
||||
: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
ops_.push_back(MacroOp(arr[i]));
|
||||
latency_ += ops_.back().getLatency();
|
||||
}
|
||||
static_assert(N > 1, "Invalid array size");
|
||||
}
|
||||
};
|
||||
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
|
||||
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
|
||||
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
|
||||
|
||||
const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
|
||||
|
||||
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
|
||||
//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
|
||||
//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
|
||||
const int buffer0[] = { 4, 8, 4 };
|
||||
const int buffer1[] = { 7, 3, 3, 3 };
|
||||
const int buffer2[] = { 3, 7, 3, 3 };
|
||||
const int buffer3[] = { 4, 9, 3 };
|
||||
const int buffer4[] = { 4, 4, 4, 4 };
|
||||
const int buffer5[] = { 3, 3, 10 };
|
||||
|
||||
class DecoderBuffer {
|
||||
public:
|
||||
static const DecoderBuffer Default;
|
||||
template <size_t N>
|
||||
DecoderBuffer(const char* name, int index, const int(&arr)[N])
|
||||
: name_(name), index_(index), counts_(arr), opsCount_(N) {}
|
||||
const int* getCounts() const {
|
||||
return counts_;
|
||||
}
|
||||
int getSize() const {
|
||||
return opsCount_;
|
||||
}
|
||||
int getIndex() const {
|
||||
return index_;
|
||||
}
|
||||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const {
|
||||
//If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10
|
||||
//because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs.
|
||||
//Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops.
|
||||
if (instrType == SuperscalarInstructionType::IMULH_R || instrType == SuperscalarInstructionType::ISMULH_R)
|
||||
return &decodeBuffer3310;
|
||||
|
||||
//To make sure that the multiplication port is saturated, a 4-4-4-4 configuration is generated if the number of multiplications
|
||||
//is lower than the number of cycles.
|
||||
if (mulCount < cycle + 1)
|
||||
return &decodeBuffer4444;
|
||||
|
||||
//If the current RandomX instruction is "IMUL_RCP", the next buffer must begin with a 4-byte slot for multiplication.
|
||||
if(instrType == SuperscalarInstructionType::IMUL_RCP)
|
||||
return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493;
|
||||
|
||||
//Default: select a random fetch configuration.
|
||||
return fetchNextDefault(gen);
|
||||
}
|
||||
private:
|
||||
const char* name_;
|
||||
int index_;
|
||||
const int* counts_;
|
||||
int opsCount_;
|
||||
DecoderBuffer() : index_(-1) {}
|
||||
static const DecoderBuffer decodeBuffer484;
|
||||
static const DecoderBuffer decodeBuffer7333;
|
||||
static const DecoderBuffer decodeBuffer3733;
|
||||
static const DecoderBuffer decodeBuffer493;
|
||||
static const DecoderBuffer decodeBuffer4444;
|
||||
static const DecoderBuffer decodeBuffer3310;
|
||||
static const DecoderBuffer* decodeBuffers[4];
|
||||
const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
|
||||
return decodeBuffers[gen.getByte() & 3];
|
||||
}
|
||||
};
|
||||
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4);
|
||||
const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5);
|
||||
|
||||
const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = {
|
||||
&DecoderBuffer::decodeBuffer484,
|
||||
&DecoderBuffer::decodeBuffer7333,
|
||||
&DecoderBuffer::decodeBuffer3733,
|
||||
&DecoderBuffer::decodeBuffer493,
|
||||
};
|
||||
|
||||
const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
|
||||
|
||||
const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R };
|
||||
const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R };
|
||||
const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS };
|
||||
const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 };
|
||||
const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 };
|
||||
const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 };
|
||||
const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP;
|
||||
|
||||
static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) {
|
||||
int index;
|
||||
if (availableRegisters.size() == 0)
|
||||
return false;
|
||||
|
||||
if (availableRegisters.size() > 1) {
|
||||
index = gen.getInt32() % availableRegisters.size();
|
||||
}
|
||||
else {
|
||||
index = 0;
|
||||
}
|
||||
reg = availableRegisters[index];
|
||||
return true;
|
||||
}
|
||||
|
||||
class RegisterInfo {
|
||||
public:
|
||||
RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {}
|
||||
int latency;
|
||||
SuperscalarInstructionType lastOpGroup;
|
||||
int lastOpPar;
|
||||
int value;
|
||||
};
|
||||
|
||||
//"SuperscalarInstruction" consists of one or more macro-ops
|
||||
class SuperscalarInstruction {
|
||||
public:
|
||||
void toInstr(Instruction& instr) { //translate to a RandomX instruction format
|
||||
instr.opcode = (int)getType();
|
||||
instr.dst = dst_;
|
||||
instr.src = src_ >= 0 ? src_ : dst_;
|
||||
instr.setMod(mod_);
|
||||
instr.setImm32(imm32_);
|
||||
}
|
||||
|
||||
void createForSlot(Blake2Generator& gen, int slotSize, int fetchType, bool isLast, bool isFirst) {
|
||||
switch (slotSize)
|
||||
{
|
||||
case 3:
|
||||
//if this is the last slot, we can also select "IMULH" instructions
|
||||
if (isLast) {
|
||||
create(slot_3L[gen.getByte() & 3], gen);
|
||||
}
|
||||
else {
|
||||
create(slot_3[gen.getByte() & 1], gen);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if (fetchType == 4 && !isLast) {
|
||||
create(&SuperscalarInstructionInfo::IMUL_R, gen);
|
||||
}
|
||||
else {
|
||||
create(slot_4[gen.getByte() & 1], gen);
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
create(slot_7[gen.getByte() & 1], gen);
|
||||
break;
|
||||
case 8:
|
||||
create(slot_8[gen.getByte() & 1], gen);
|
||||
break;
|
||||
case 9:
|
||||
create(slot_9[gen.getByte() & 1], gen);
|
||||
break;
|
||||
case 10:
|
||||
create(slot_10, gen);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) {
|
||||
info_ = info;
|
||||
reset();
|
||||
switch (info->getType())
|
||||
{
|
||||
case SuperscalarInstructionType::ISUB_R: {
|
||||
mod_ = 0;
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::IADD_RS;
|
||||
groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IXOR_R: {
|
||||
mod_ = 0;
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::IXOR_R;
|
||||
groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IADD_RS: {
|
||||
mod_ = gen.getByte();
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::IADD_RS;
|
||||
groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IMUL_R: {
|
||||
mod_ = 0;
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::IMUL_R;
|
||||
groupParIsSource_ = true;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IROR_C: {
|
||||
mod_ = 0;
|
||||
do {
|
||||
imm32_ = gen.getByte() & 63;
|
||||
} while (imm32_ == 0);
|
||||
opGroup_ = SuperscalarInstructionType::IROR_C;
|
||||
opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
case SuperscalarInstructionType::IADD_C9: {
|
||||
mod_ = 0;
|
||||
imm32_ = gen.getInt32();
|
||||
opGroup_ = SuperscalarInstructionType::IADD_C7;
|
||||
opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
case SuperscalarInstructionType::IXOR_C9: {
|
||||
mod_ = 0;
|
||||
imm32_ = gen.getInt32();
|
||||
opGroup_ = SuperscalarInstructionType::IXOR_C7;
|
||||
opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IMULH_R: {
|
||||
canReuse_ = true;
|
||||
mod_ = 0;
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::IMULH_R;
|
||||
opGroupPar_ = gen.getInt32();
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::ISMULH_R: {
|
||||
canReuse_ = true;
|
||||
mod_ = 0;
|
||||
imm32_ = 0;
|
||||
opGroup_ = SuperscalarInstructionType::ISMULH_R;
|
||||
opGroupPar_ = gen.getInt32();
|
||||
} break;
|
||||
|
||||
case SuperscalarInstructionType::IMUL_RCP: {
|
||||
mod_ = 0;
|
||||
do {
|
||||
imm32_ = gen.getInt32();
|
||||
} while ((imm32_ & (imm32_ - 1)) == 0);
|
||||
opGroup_ = SuperscalarInstructionType::IMUL_RCP;
|
||||
opGroupPar_ = -1;
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) {
|
||||
/*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
|
||||
std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/
|
||||
std::vector<int> availableRegisters;
|
||||
//Conditions for the destination register:
|
||||
// * value must be ready at the required cycle
|
||||
// * cannot be the same as the source register unless the instruction allows it
|
||||
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
|
||||
// * register cannot be multiplied twice in a row unless allowChainedMul is true
|
||||
// - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
|
||||
// - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
|
||||
// * either the last instruction applied to the register or its source must be different than this instruction
|
||||
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
|
||||
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement))
|
||||
availableRegisters.push_back(i);
|
||||
}
|
||||
return selectRegister(availableRegisters, gen, dst_);
|
||||
}
|
||||
|
||||
bool selectSource(int cycle, RegisterInfo(®isters)[8], Blake2Generator& gen) {
|
||||
std::vector<int> availableRegisters;
|
||||
//all registers that are ready at the cycle
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registers[i].latency <= cycle)
|
||||
availableRegisters.push_back(i);
|
||||
}
|
||||
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
|
||||
if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) {
|
||||
if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) {
|
||||
opGroupPar_ = src_ = RegisterNeedsDisplacement;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (selectRegister(availableRegisters, gen, src_)) {
|
||||
if (groupParIsSource_)
|
||||
opGroupPar_ = src_;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SuperscalarInstructionType getType() {
|
||||
return info_->getType();
|
||||
}
|
||||
int getSource() {
|
||||
return src_;
|
||||
}
|
||||
int getDestination() {
|
||||
return dst_;
|
||||
}
|
||||
SuperscalarInstructionType getGroup() {
|
||||
return opGroup_;
|
||||
}
|
||||
int getGroupPar() {
|
||||
return opGroupPar_;
|
||||
}
|
||||
|
||||
const SuperscalarInstructionInfo& getInfo() const {
|
||||
return *info_;
|
||||
}
|
||||
|
||||
static const SuperscalarInstruction Null;
|
||||
|
||||
private:
|
||||
const SuperscalarInstructionInfo* info_;
|
||||
int src_ = -1;
|
||||
int dst_ = -1;
|
||||
int mod_;
|
||||
uint32_t imm32_;
|
||||
SuperscalarInstructionType opGroup_;
|
||||
int opGroupPar_;
|
||||
bool canReuse_ = false;
|
||||
bool groupParIsSource_ = false;
|
||||
|
||||
void reset() {
|
||||
src_ = dst_ = -1;
|
||||
canReuse_ = groupParIsSource_ = false;
|
||||
}
|
||||
|
||||
SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) {
|
||||
}
|
||||
};
|
||||
|
||||
const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
|
||||
|
||||
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_MAX_LATENCY + 4;
|
||||
constexpr int LOOK_FORWARD_CYCLES = 4;
|
||||
constexpr int MAX_THROWAWAY_COUNT = 256;
|
||||
|
||||
template<bool commit>
|
||||
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
|
||||
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
|
||||
//port P1 (multiplication) by instructions that can go to any port.
|
||||
for (; cycle < RandomX_CurrentConfig.SuperscalarLatency + 4; ++cycle) {
|
||||
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
|
||||
if (commit) {
|
||||
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][2] = uop;
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
|
||||
if (commit) {
|
||||
if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][0] = uop;
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
|
||||
if (commit) {
|
||||
if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][1] = uop;
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<bool commit>
|
||||
static int scheduleMop(const MacroOp& mop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle, int depCycle) {
|
||||
//if this macro-op depends on the previous one, increase the starting cycle if needed
|
||||
//this handles an explicit dependency chain in IMUL_RCP
|
||||
if (mop.isDependent()) {
|
||||
cycle = (cycle > depCycle) ? cycle : depCycle;
|
||||
}
|
||||
//move instructions are eliminated and don't need an execution unit
|
||||
if (mop.isEliminated()) {
|
||||
if (commit)
|
||||
if (trace) std::cout << "; (eliminated)" << std::endl;
|
||||
return cycle;
|
||||
}
|
||||
else if (mop.isSimple()) {
|
||||
//this macro-op has only one uOP
|
||||
return scheduleUop<commit>(mop.getUop1(), portBusy, cycle);
|
||||
}
|
||||
else {
|
||||
//macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle
|
||||
for (; cycle < RandomX_CurrentConfig.SuperscalarLatency + 4; ++cycle) {
|
||||
|
||||
int cycle1 = scheduleUop<false>(mop.getUop1(), portBusy, cycle);
|
||||
int cycle2 = scheduleUop<false>(mop.getUop2(), portBusy, cycle);
|
||||
|
||||
if (cycle1 == cycle2) {
|
||||
if (commit) {
|
||||
scheduleUop<true>(mop.getUop1(), portBusy, cycle1);
|
||||
scheduleUop<true>(mop.getUop2(), portBusy, cycle2);
|
||||
}
|
||||
return cycle1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) {
|
||||
|
||||
ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
|
||||
memset(portBusy, 0, sizeof(portBusy));
|
||||
RegisterInfo registers[8];
|
||||
|
||||
const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default;
|
||||
SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null;
|
||||
int macroOpIndex = 0;
|
||||
int codeSize = 0;
|
||||
int macroOpCount = 0;
|
||||
int cycle = 0;
|
||||
int depCycle = 0;
|
||||
int retireCycle = 0;
|
||||
bool portsSaturated = false;
|
||||
int programSize = 0;
|
||||
int mulCount = 0;
|
||||
int decodeCycle;
|
||||
int throwAwayCount = 0;
|
||||
|
||||
//decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated.
|
||||
//Each decode cycle decodes 16 bytes of x86 code.
|
||||
//Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always
|
||||
//saturated first. The cycle limit is present only to guarantee loop termination.
|
||||
//Program size is limited to SuperscalarMaxSize instructions.
|
||||
for (decodeCycle = 0; decodeCycle < RandomX_CurrentConfig.SuperscalarLatency && !portsSaturated && programSize < 3 * RandomX_CurrentConfig.SuperscalarLatency + 2; ++decodeCycle) {
|
||||
|
||||
//select a decode configuration
|
||||
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
|
||||
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
|
||||
|
||||
int bufferIndex = 0;
|
||||
|
||||
//fill all instruction slots in the current decode buffer
|
||||
while (bufferIndex < decodeBuffer->getSize()) {
|
||||
int topCycle = cycle;
|
||||
|
||||
//if we have issued all macro-ops for the current RandomX instruction, create a new instruction
|
||||
if (macroOpIndex >= currentInstruction.getInfo().getSize()) {
|
||||
if (portsSaturated || programSize >= 3 * RandomX_CurrentConfig.SuperscalarLatency + 2)
|
||||
break;
|
||||
//select an instruction so that the first macro-op fits into the current slot
|
||||
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
|
||||
macroOpIndex = 0;
|
||||
if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||
}
|
||||
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
|
||||
if (trace) std::cout << mop.getName() << " ";
|
||||
|
||||
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
||||
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
|
||||
if (scheduleCycle < 0) {
|
||||
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
|
||||
//__debugbreak();
|
||||
portsSaturated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
//find a source register (if applicable) that will be ready when this instruction executes
|
||||
if (macroOpIndex == currentInstruction.getInfo().getSrcOp()) {
|
||||
int forward;
|
||||
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
|
||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
|
||||
if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
|
||||
++scheduleCycle;
|
||||
++cycle;
|
||||
}
|
||||
//if no register was found, throw the instruction away and try another one
|
||||
if (forward == LOOK_FORWARD_CYCLES) {
|
||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||
throwAwayCount++;
|
||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
//cycle = topCycle;
|
||||
continue;
|
||||
}
|
||||
//abort this decode buffer
|
||||
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
|
||||
currentInstruction = SuperscalarInstruction::Null;
|
||||
break;
|
||||
}
|
||||
if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
|
||||
}
|
||||
//find a destination register that will be ready when this instruction executes
|
||||
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
|
||||
int forward;
|
||||
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
|
||||
if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
|
||||
++scheduleCycle;
|
||||
++cycle;
|
||||
}
|
||||
if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away
|
||||
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
|
||||
throwAwayCount++;
|
||||
macroOpIndex = currentInstruction.getInfo().getSize();
|
||||
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
|
||||
//cycle = topCycle;
|
||||
continue;
|
||||
}
|
||||
//abort this decode buffer
|
||||
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
|
||||
currentInstruction = SuperscalarInstruction::Null;
|
||||
break;
|
||||
}
|
||||
if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
|
||||
}
|
||||
throwAwayCount = 0;
|
||||
|
||||
//recalculate when the instruction can be scheduled for execution based on operand availability
|
||||
scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle);
|
||||
|
||||
//calculate when the result will be ready
|
||||
depCycle = scheduleCycle + mop.getLatency();
|
||||
|
||||
//if this instruction writes the result, modify register information
|
||||
// RegisterInfo.latency - which cycle the register will be ready
|
||||
// RegisterInfo.lastOpGroup - the last operation that was applied to the register
|
||||
// RegisterInfo.lastOpPar - the last operation source value (-1 = constant, 0-7 = register)
|
||||
if (macroOpIndex == currentInstruction.getInfo().getResultOp()) {
|
||||
int dst = currentInstruction.getDestination();
|
||||
RegisterInfo& ri = registers[dst];
|
||||
retireCycle = depCycle;
|
||||
ri.latency = retireCycle;
|
||||
ri.lastOpGroup = currentInstruction.getGroup();
|
||||
ri.lastOpPar = currentInstruction.getGroupPar();
|
||||
if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
|
||||
}
|
||||
codeSize += mop.getSize();
|
||||
bufferIndex++;
|
||||
macroOpIndex++;
|
||||
macroOpCount++;
|
||||
|
||||
//terminating condition
|
||||
if (scheduleCycle >= RandomX_CurrentConfig.SuperscalarLatency) {
|
||||
portsSaturated = true;
|
||||
}
|
||||
cycle = topCycle;
|
||||
|
||||
//when all macro-ops of the current instruction have been issued, add the instruction into the program
|
||||
if (macroOpIndex >= currentInstruction.getInfo().getSize()) {
|
||||
currentInstruction.toInstr(prog(programSize++));
|
||||
mulCount += isMultiplication(currentInstruction.getType());
|
||||
}
|
||||
}
|
||||
++cycle;
|
||||
}
|
||||
|
||||
double ipc = (macroOpCount / (double)retireCycle);
|
||||
|
||||
memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies));
|
||||
|
||||
//Calculate ASIC latency:
|
||||
//Assumes 1 cycle latency for all operations and unlimited parallelization.
|
||||
for (int i = 0; i < programSize; ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
int latDst = prog.asicLatencies[instr.dst] + 1;
|
||||
int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0;
|
||||
prog.asicLatencies[instr.dst] = (latDst > latSrc) ? latDst : latSrc;
|
||||
}
|
||||
|
||||
//address register is the register with the highest ASIC latency
|
||||
int asicLatencyMax = 0;
|
||||
int addressReg = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (prog.asicLatencies[i] > asicLatencyMax) {
|
||||
asicLatencyMax = prog.asicLatencies[i];
|
||||
addressReg = i;
|
||||
}
|
||||
prog.cpuLatencies[i] = registers[i].latency;
|
||||
}
|
||||
|
||||
prog.setSize(programSize);
|
||||
prog.setAddressRegister(addressReg);
|
||||
|
||||
prog.cpuLatency = retireCycle;
|
||||
prog.asicLatency = asicLatencyMax;
|
||||
prog.codeSize = codeSize;
|
||||
prog.macroOps = macroOpCount;
|
||||
prog.decodeCycles = decodeCycle;
|
||||
prog.ipc = ipc;
|
||||
prog.mulCount = mulCount;
|
||||
|
||||
|
||||
/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
|
||||
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
|
||||
|
||||
int portCycles = 0;
|
||||
for (int i = 0; i < RandomX_Config.SuperscalarLatency + 4; ++i) {
|
||||
std::cout << "; " << std::setw(3) << i << " ";
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
std::cout << (portBusy[i][j] ? '*' : '_');
|
||||
portCycles += !!portBusy[i][j];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}*/
|
||||
}
|
||||
|
||||
void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals) {
|
||||
for (unsigned j = 0; j < prog.getSize(); ++j) {
|
||||
Instruction& instr = prog(j);
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case SuperscalarInstructionType::ISUB_R:
|
||||
r[instr.dst] -= r[instr.src];
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_R:
|
||||
r[instr.dst] ^= r[instr.src];
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_RS:
|
||||
r[instr.dst] += r[instr.src] << instr.getModShift();
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_R:
|
||||
r[instr.dst] *= r[instr.src];
|
||||
break;
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
r[instr.dst] = rotr64(r[instr.dst], instr.getImm32());
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
case SuperscalarInstructionType::IADD_C9:
|
||||
r[instr.dst] += signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
case SuperscalarInstructionType::IXOR_C9:
|
||||
r[instr.dst] ^= signExtend2sCompl(instr.getImm32());
|
||||
break;
|
||||
case SuperscalarInstructionType::IMULH_R:
|
||||
r[instr.dst] = mulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case SuperscalarInstructionType::ISMULH_R:
|
||||
r[instr.dst] = smulh(r[instr.dst], r[instr.src]);
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_RCP:
|
||||
if (reciprocals != nullptr)
|
||||
r[instr.dst] *= (*reciprocals)[instr.getImm32()];
|
||||
else
|
||||
r[instr.dst] *= randomx_reciprocal(instr.getImm32());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
60
src/crypto/randomx/superscalar.hpp
Normal file
60
src/crypto/randomx/superscalar.hpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include "superscalar_program.hpp"
|
||||
#include "blake2_generator.hpp"
|
||||
|
||||
namespace randomx {
|
||||
// Intel Ivy Bridge reference
|
||||
enum class SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
|
||||
ISUB_R = 0, //1 p015 1 3 (sub)
|
||||
IXOR_R = 1, //1 p015 1 3 (xor)
|
||||
IADD_RS = 2, //1 p01 1 4 (lea)
|
||||
IMUL_R = 3, //1 p1 3 4 (imul)
|
||||
IROR_C = 4, //1 p05 1 4 (ror)
|
||||
IADD_C7 = 5, //1 p015 1 7 (add)
|
||||
IXOR_C7 = 6, //1 p015 1 7 (xor)
|
||||
IADD_C8 = 7, //1+0 p015 1 7+1 (add+nop)
|
||||
IXOR_C8 = 8, //1+0 p015 1 7+1 (xor+nop)
|
||||
IADD_C9 = 9, //1+0 p015 1 7+2 (add+nop)
|
||||
IXOR_C9 = 10, //1+0 p015 1 7+2 (xor+nop)
|
||||
IMULH_R = 11, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov)
|
||||
ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov)
|
||||
IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul)
|
||||
|
||||
COUNT = 14,
|
||||
INVALID = -1
|
||||
};
|
||||
|
||||
void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
|
||||
void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals = nullptr);
|
||||
}
|
73
src/crypto/randomx/superscalar_program.hpp
Normal file
73
src/crypto/randomx/superscalar_program.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "instruction.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class SuperscalarProgram {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
uint32_t getSize() {
|
||||
return size;
|
||||
}
|
||||
void setSize(uint32_t val) {
|
||||
size = val;
|
||||
}
|
||||
int getAddressRegister() {
|
||||
return addrReg;
|
||||
}
|
||||
void setAddressRegister(int val) {
|
||||
addrReg = val;
|
||||
}
|
||||
|
||||
Instruction programBuffer[SuperscalarMaxSize];
|
||||
uint32_t size
|
||||
#ifndef NDEBUG
|
||||
= 0
|
||||
#endif
|
||||
;
|
||||
int addrReg;
|
||||
double ipc;
|
||||
int codeSize;
|
||||
int macroOps;
|
||||
int decodeCycles;
|
||||
int cpuLatency;
|
||||
int asicLatency;
|
||||
int mulCount;
|
||||
int cpuLatencies[8];
|
||||
int asicLatencies[8];
|
||||
};
|
||||
|
||||
}
|
137
src/crypto/randomx/virtual_machine.cpp
Normal file
137
src/crypto/randomx/virtual_machine.cpp
Normal file
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include "virtual_machine.hpp"
|
||||
#include "common.hpp"
|
||||
#include "aes_hash.hpp"
|
||||
#include "blake2/blake2.h"
|
||||
#include "intrin_portable.h"
|
||||
#include "allocator.hpp"
|
||||
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
||||
}
|
||||
|
||||
void randomx_vm::resetRoundingMode() {
|
||||
rx_reset_float_state();
|
||||
}
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||
auto exponent = entropy >> 59; //0..31
|
||||
auto mantissa = entropy & mantissaMask;
|
||||
exponent += exponentBias;
|
||||
exponent &= exponentMask;
|
||||
exponent <<= mantissaSize;
|
||||
return exponent | mantissa;
|
||||
}
|
||||
|
||||
static inline uint64_t getStaticExponent(uint64_t entropy) {
|
||||
auto exponent = constExponentBits;
|
||||
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits;
|
||||
exponent <<= mantissaSize;
|
||||
return exponent;
|
||||
}
|
||||
|
||||
static inline uint64_t getFloatMask(uint64_t entropy) {
|
||||
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
||||
return (entropy & mask22bit) | getStaticExponent(entropy);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void randomx_vm::initialize() {
|
||||
store64(®.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0)));
|
||||
store64(®.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1)));
|
||||
store64(®.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2)));
|
||||
store64(®.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3)));
|
||||
store64(®.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4)));
|
||||
store64(®.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5)));
|
||||
store64(®.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6)));
|
||||
store64(®.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7)));
|
||||
mem.ma = program.getEntropy(8) & CacheLineAlignMask;
|
||||
mem.mx = program.getEntropy(10);
|
||||
auto addressRegisters = program.getEntropy(12);
|
||||
config.readReg0 = 0 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg1 = 2 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg2 = 4 + (addressRegisters & 1);
|
||||
addressRegisters >>= 1;
|
||||
config.readReg3 = 6 + (addressRegisters & 1);
|
||||
datasetOffset = (program.getEntropy(13) % (DatasetExtraItems + 1)) * randomx::CacheLineSize;
|
||||
store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14)));
|
||||
store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15)));
|
||||
}
|
||||
|
||||
namespace randomx {
|
||||
|
||||
alignas(16) volatile static rx_vec_i128 aesDummy;
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
VmBase<Allocator, softAes>::~VmBase() {
|
||||
Allocator::freeMemory(scratchpad, RANDOMX_SCRATCHPAD_L3_MAX_SIZE);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::allocate() {
|
||||
if (datasetPtr == nullptr)
|
||||
throw std::invalid_argument("Cache/Dataset not set");
|
||||
if (!softAes) { //if hardware AES is not supported, it's better to fail now than to return a ticking bomb
|
||||
rx_vec_i128 tmp = rx_load_vec_i128((const rx_vec_i128*)&aesDummy);
|
||||
tmp = rx_aesenc_vec_i128(tmp, tmp);
|
||||
rx_store_vec_i128((rx_vec_i128*)&aesDummy, tmp);
|
||||
}
|
||||
scratchpad = (uint8_t*)Allocator::allocMemory(RANDOMX_SCRATCHPAD_L3_MAX_SIZE);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::getFinalResult(void* out, size_t outSize) {
|
||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||
blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
|
||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::generateProgram(void* seed) {
|
||||
fillAes4Rx4<softAes>(seed, sizeof(program), &program);
|
||||
}
|
||||
|
||||
template class VmBase<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class VmBase<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class VmBase<LargePageAllocator, false>;
|
||||
template class VmBase<LargePageAllocator, true>;
|
||||
}
|
83
src/crypto/randomx/virtual_machine.hpp
Normal file
83
src/crypto/randomx/virtual_machine.hpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "common.hpp"
|
||||
#include "program.hpp"
|
||||
|
||||
/* Global namespace for C binding */
|
||||
class randomx_vm {
|
||||
public:
|
||||
virtual ~randomx_vm() = 0;
|
||||
virtual void allocate() = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
virtual void initScratchpad(void* seed) = 0;
|
||||
virtual void run(void* seed) = 0;
|
||||
void resetRoundingMode();
|
||||
randomx::RegisterFile *getRegisterFile() {
|
||||
return ®
|
||||
}
|
||||
const void* getScratchpad() {
|
||||
return scratchpad;
|
||||
}
|
||||
const randomx::Program& getProgram()
|
||||
{
|
||||
return program;
|
||||
}
|
||||
protected:
|
||||
void initialize();
|
||||
alignas(64) randomx::Program program;
|
||||
alignas(64) randomx::RegisterFile reg;
|
||||
alignas(16) randomx::ProgramConfiguration config;
|
||||
randomx::MemoryRegisters mem;
|
||||
uint8_t* scratchpad;
|
||||
union {
|
||||
randomx_cache* cachePtr = nullptr;
|
||||
randomx_dataset* datasetPtr;
|
||||
};
|
||||
uint64_t datasetOffset;
|
||||
};
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class VmBase : public randomx_vm {
|
||||
public:
|
||||
~VmBase() override;
|
||||
void allocate() override;
|
||||
void initScratchpad(void* seed) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
protected:
|
||||
void generateProgram(void* seed);
|
||||
};
|
||||
|
||||
}
|
105
src/crypto/randomx/virtual_memory.cpp
Normal file
105
src/crypto/randomx/virtual_memory.cpp
Normal file
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "virtual_memory.hpp"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#include <windows.h>
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
#include <mach/vm_statistics.h>
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
std::string getErrorMessage(const char* function) {
|
||||
LPSTR messageBuffer = nullptr;
|
||||
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
|
||||
std::string message(messageBuffer, size);
|
||||
LocalFree(messageBuffer);
|
||||
return std::string(function) + std::string(": ") + message;
|
||||
}
|
||||
#endif
|
||||
|
||||
void* allocExecutableMemory(std::size_t bytes) {
|
||||
void* mem;
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocExecutableMemory - VirtualAlloc"));
|
||||
#else
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocExecutableMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
constexpr std::size_t align(std::size_t pos, std::size_t align) {
|
||||
return ((pos - 1) / align + 1) * align;
|
||||
}
|
||||
|
||||
void* allocLargePagesMemory(std::size_t bytes) {
|
||||
void* mem;
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
auto pageMinimum = GetLargePageMinimum();
|
||||
if (pageMinimum > 0)
|
||||
mem = VirtualAlloc(NULL, align(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
else
|
||||
throw std::runtime_error("allocLargePagesMemory - Large pages are not supported");
|
||||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
#elif defined(__FreeBSD__)
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
|
||||
#else
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
|
||||
#endif
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocLargePagesMemory - mmap failed");
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
void freePagedMemory(void* ptr, std::size_t bytes) {
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
VirtualFree(ptr, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(ptr, bytes);
|
||||
#endif
|
||||
}
|
35
src/crypto/randomx/virtual_memory.hpp
Normal file
35
src/crypto/randomx/virtual_memory.hpp
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
void* allocExecutableMemory(std::size_t);
|
||||
void* allocLargePagesMemory(std::size_t);
|
||||
void freePagedMemory(void*, std::size_t);
|
60
src/crypto/randomx/vm_compiled.cpp
Normal file
60
src/crypto/randomx/vm_compiled.cpp
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "vm_compiled.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::run(void* seed) {
|
||||
VmBase<Allocator, softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
compiler.generateProgram(program, config);
|
||||
mem.memory = datasetPtr->memory + datasetOffset;
|
||||
execute();
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledVm<Allocator, softAes>::execute() {
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations);
|
||||
}
|
||||
|
||||
template class CompiledVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class CompiledVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class CompiledVm<LargePageAllocator, false>;
|
||||
template class CompiledVm<LargePageAllocator, true>;
|
||||
}
|
72
src/crypto/randomx/vm_compiled.hpp
Normal file
72
src/crypto/randomx/vm_compiled.hpp
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include <cstdint>
|
||||
#include "virtual_machine.hpp"
|
||||
#include "jit_compiler.hpp"
|
||||
#include "allocator.hpp"
|
||||
#include "dataset.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class CompiledVm : public VmBase<Allocator, softAes> {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledVm));
|
||||
}
|
||||
void setDataset(randomx_dataset* dataset) override;
|
||||
void run(void* seed) override;
|
||||
|
||||
using VmBase<Allocator, softAes>::mem;
|
||||
using VmBase<Allocator, softAes>::program;
|
||||
using VmBase<Allocator, softAes>::config;
|
||||
using VmBase<Allocator, softAes>::reg;
|
||||
using VmBase<Allocator, softAes>::scratchpad;
|
||||
using VmBase<Allocator, softAes>::datasetPtr;
|
||||
using VmBase<Allocator, softAes>::datasetOffset;
|
||||
protected:
|
||||
void execute();
|
||||
|
||||
JitCompiler compiler;
|
||||
};
|
||||
|
||||
using CompiledVmDefault = CompiledVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using CompiledVmHardAes = CompiledVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using CompiledVmLargePage = CompiledVm<LargePageAllocator, true>;
|
||||
using CompiledVmLargePageHardAes = CompiledVm<LargePageAllocator, false>;
|
||||
}
|
54
src/crypto/randomx/vm_compiled_light.cpp
Normal file
54
src/crypto/randomx/vm_compiled_light.cpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "vm_compiled_light.hpp"
|
||||
#include "common.hpp"
|
||||
#include <stdexcept>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void CompiledLightVm<Allocator, softAes>::run(void* seed) {
|
||||
VmBase<Allocator, softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
compiler.generateProgramLight(program, config, datasetOffset);
|
||||
CompiledVm<Allocator, softAes>::execute();
|
||||
}
|
||||
|
||||
template class CompiledLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class CompiledLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class CompiledLightVm<LargePageAllocator, false>;
|
||||
template class CompiledLightVm<LargePageAllocator, true>;
|
||||
}
|
64
src/crypto/randomx/vm_compiled_light.hpp
Normal file
64
src/crypto/randomx/vm_compiled_light.hpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include "vm_compiled.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class CompiledLightVm : public CompiledVm<Allocator, softAes> {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledLightVm));
|
||||
}
|
||||
void setCache(randomx_cache* cache) override;
|
||||
void setDataset(randomx_dataset* dataset) override { }
|
||||
void run(void* seed) override;
|
||||
|
||||
using CompiledVm<Allocator, softAes>::mem;
|
||||
using CompiledVm<Allocator, softAes>::compiler;
|
||||
using CompiledVm<Allocator, softAes>::program;
|
||||
using CompiledVm<Allocator, softAes>::config;
|
||||
using CompiledVm<Allocator, softAes>::cachePtr;
|
||||
using CompiledVm<Allocator, softAes>::datasetOffset;
|
||||
};
|
||||
|
||||
using CompiledLightVmDefault = CompiledLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using CompiledLightVmHardAes = CompiledLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using CompiledLightVmLargePage = CompiledLightVm<LargePageAllocator, true>;
|
||||
using CompiledLightVmLargePageHardAes = CompiledLightVm<LargePageAllocator, false>;
|
||||
}
|
125
src/crypto/randomx/vm_interpreted.cpp
Normal file
125
src/crypto/randomx/vm_interpreted.cpp
Normal file
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "vm_interpreted.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "reciprocal.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
|
||||
datasetPtr = dataset;
|
||||
mem.memory = dataset->memory;
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::run(void* seed) {
|
||||
VmBase<Allocator, softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
execute();
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::execute() {
|
||||
|
||||
NativeRegisterFile nreg;
|
||||
|
||||
for(unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
nreg.a[i] = rx_load_vec_f128(®.a[i].lo);
|
||||
|
||||
compileProgram(program, bytecode, nreg);
|
||||
|
||||
uint32_t spAddr0 = mem.mx;
|
||||
uint32_t spAddr1 = mem.ma;
|
||||
|
||||
for(unsigned ic = 0; ic < RandomX_CurrentConfig.ProgramIterations; ++ic) {
|
||||
uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1];
|
||||
spAddr0 ^= spMix;
|
||||
spAddr0 &= ScratchpadL3Mask64;
|
||||
spAddr1 ^= spMix >> 32;
|
||||
spAddr1 &= ScratchpadL3Mask64;
|
||||
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
|
||||
|
||||
executeBytecode(bytecode, scratchpad, config);
|
||||
|
||||
mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
|
||||
mem.mx &= CacheLineAlignMask;
|
||||
datasetPrefetch(datasetOffset + mem.mx);
|
||||
datasetRead(datasetOffset + mem.ma, nreg.r);
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]);
|
||||
|
||||
spAddr0 = 0;
|
||||
spAddr1 = 0;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
store64(®.r[i], nreg.r[i]);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
rx_store_vec_f128(®.f[i].lo, nreg.f[i]);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::datasetPrefetch(uint64_t address) {
|
||||
rx_prefetch_nta(mem.memory + address);
|
||||
}
|
||||
|
||||
template class InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class InterpretedVm<LargePageAllocator, false>;
|
||||
template class InterpretedVm<LargePageAllocator, true>;
|
||||
}
|
75
src/crypto/randomx/vm_interpreted.hpp
Normal file
75
src/crypto/randomx/vm_interpreted.hpp
Normal file
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include <vector>
|
||||
#include "common.hpp"
|
||||
#include "virtual_machine.hpp"
|
||||
#include "bytecode_machine.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "allocator.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class InterpretedVm : public VmBase<Allocator, softAes>, public BytecodeMachine {
|
||||
public:
|
||||
using VmBase<Allocator, softAes>::mem;
|
||||
using VmBase<Allocator, softAes>::scratchpad;
|
||||
using VmBase<Allocator, softAes>::program;
|
||||
using VmBase<Allocator, softAes>::config;
|
||||
using VmBase<Allocator, softAes>::reg;
|
||||
using VmBase<Allocator, softAes>::datasetPtr;
|
||||
using VmBase<Allocator, softAes>::datasetOffset;
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedVm));
|
||||
}
|
||||
void run(void* seed) override;
|
||||
void setDataset(randomx_dataset* dataset) override;
|
||||
protected:
|
||||
virtual void datasetRead(uint64_t blockNumber, int_reg_t(&r)[RegistersCount]);
|
||||
virtual void datasetPrefetch(uint64_t blockNumber);
|
||||
private:
|
||||
void execute();
|
||||
|
||||
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
|
||||
};
|
||||
|
||||
using InterpretedVmDefault = InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using InterpretedVmHardAes = InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using InterpretedVmLargePage = InterpretedVm<LargePageAllocator, true>;
|
||||
using InterpretedVmLargePageHardAes = InterpretedVm<LargePageAllocator, false>;
|
||||
}
|
55
src/crypto/randomx/vm_interpreted_light.cpp
Normal file
55
src/crypto/randomx/vm_interpreted_light.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "vm_interpreted_light.hpp"
|
||||
#include "dataset.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedLightVm<Allocator, softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedLightVm<Allocator, softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
|
||||
uint32_t itemNumber = address / CacheLineSize;
|
||||
int_reg_t rl[8];
|
||||
|
||||
initDatasetItem(cachePtr, (uint8_t*)rl, itemNumber);
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
r[q] ^= rl[q];
|
||||
}
|
||||
|
||||
template class InterpretedLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
template class InterpretedLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
template class InterpretedLightVm<LargePageAllocator, false>;
|
||||
template class InterpretedLightVm<LargePageAllocator, true>;
|
||||
}
|
61
src/crypto/randomx/vm_interpreted_light.hpp
Normal file
61
src/crypto/randomx/vm_interpreted_light.hpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include "vm_interpreted.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
class InterpretedLightVm : public InterpretedVm<Allocator, softAes> {
|
||||
public:
|
||||
using VmBase<Allocator, softAes>::mem;
|
||||
using VmBase<Allocator, softAes>::cachePtr;
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
void operator delete(void* ptr) {
|
||||
AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedLightVm));
|
||||
}
|
||||
void setDataset(randomx_dataset* dataset) override { }
|
||||
void setCache(randomx_cache* cache) override;
|
||||
protected:
|
||||
void datasetRead(uint64_t address, int_reg_t(&r)[8]) override;
|
||||
void datasetPrefetch(uint64_t address) override { }
|
||||
};
|
||||
|
||||
using InterpretedLightVmDefault = InterpretedLightVm<AlignedAllocator<CacheLineSize>, true>;
|
||||
using InterpretedLightVmHardAes = InterpretedLightVm<AlignedAllocator<CacheLineSize>, false>;
|
||||
using InterpretedLightVmLargePage = InterpretedLightVm<LargePageAllocator, true>;
|
||||
using InterpretedLightVmLargePageHardAes = InterpretedLightVm<LargePageAllocator, false>;
|
||||
}
|
|
@ -391,6 +391,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_LITE
|
||||
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
|
||||
|
@ -431,6 +432,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# else
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||
|
@ -450,6 +452,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
|
@ -503,6 +506,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# else
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||
|
@ -522,6 +526,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
|
@ -554,6 +559,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# else
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||
|
@ -573,6 +579,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
# endif
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||
|
@ -592,6 +599,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_WOW
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RX_LOKI
|
||||
};
|
||||
|
||||
static_assert(count == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch");
|
||||
|
|
|
@ -162,9 +162,9 @@ void MultiWorker<N>::start()
|
|||
const xmrig::Variant v = m_state.job.algorithm().variant();
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
if (v == xmrig::VARIANT_RX_WOW) {
|
||||
if ((v == xmrig::VARIANT_RX_WOW) || (v == xmrig::VARIANT_RX_LOKI)) {
|
||||
allocateRandomX_VM();
|
||||
Workers::updateDataset(m_state.job.seedHash(), m_totalWays);
|
||||
Workers::updateDataset(m_state.job.seedHash(), v, m_totalWays);
|
||||
randomx_calculate_hash(m_rx_vm, m_state.blob, m_state.job.size(), m_hash);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -65,6 +65,7 @@ uv_rwlock_t Workers::m_rx_dataset_lock;
|
|||
randomx_cache *Workers::m_rx_cache = nullptr;
|
||||
randomx_dataset *Workers::m_rx_dataset = nullptr;
|
||||
uint8_t Workers::m_rx_seed_hash[32] = {};
|
||||
xmrig::Variant Workers::m_rx_variant = xmrig::VARIANT_MAX;
|
||||
std::atomic<uint32_t> Workers::m_rx_dataset_init_thread_counter = {};
|
||||
#endif
|
||||
|
||||
|
@ -380,10 +381,10 @@ void Workers::start(IWorker *worker)
|
|||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
void Workers::updateDataset(const uint8_t* seed_hash, const uint32_t num_threads)
|
||||
void Workers::updateDataset(const uint8_t* seed_hash, xmrig::Variant variant, const uint32_t num_threads)
|
||||
{
|
||||
// Check if we need to update cache and dataset
|
||||
if (memcmp(m_rx_seed_hash, seed_hash, sizeof(m_rx_seed_hash)) == 0)
|
||||
if ((memcmp(m_rx_seed_hash, seed_hash, sizeof(m_rx_seed_hash)) == 0) && (m_rx_variant == variant))
|
||||
return;
|
||||
|
||||
const uint32_t thread_id = m_rx_dataset_init_thread_counter++;
|
||||
|
@ -400,10 +401,27 @@ void Workers::updateDataset(const uint8_t* seed_hash, const uint32_t num_threads
|
|||
|
||||
// One of the threads updates cache
|
||||
uv_rwlock_wrlock(&m_rx_dataset_lock);
|
||||
|
||||
if (m_rx_variant != variant) {
|
||||
switch (variant) {
|
||||
case xmrig::VARIANT_RX_WOW:
|
||||
randomx_apply_config(RandomX_WowneroConfig);
|
||||
break;
|
||||
case xmrig::VARIANT_RX_LOKI:
|
||||
randomx_apply_config(RandomX_LokiConfig);
|
||||
break;
|
||||
default:
|
||||
randomx_apply_config(RandomX_MoneroConfig);
|
||||
break;
|
||||
}
|
||||
m_rx_variant = variant;
|
||||
}
|
||||
|
||||
if (memcmp(m_rx_seed_hash, seed_hash, sizeof(m_rx_seed_hash)) != 0) {
|
||||
memcpy(m_rx_seed_hash, seed_hash, sizeof(m_rx_seed_hash));
|
||||
randomx_init_cache(m_rx_cache, m_rx_seed_hash, sizeof(m_rx_seed_hash));
|
||||
}
|
||||
|
||||
uv_rwlock_wrunlock(&m_rx_dataset_lock);
|
||||
|
||||
// All threads update dataset
|
||||
|
|
|
@ -77,7 +77,7 @@ public:
|
|||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
static void updateDataset(const uint8_t* seed_hash, uint32_t num_threads);
|
||||
static void updateDataset(const uint8_t* seed_hash, xmrig::Variant variant, uint32_t num_threads);
|
||||
static randomx_dataset* getDataset();
|
||||
# endif
|
||||
|
||||
|
@ -131,6 +131,7 @@ private:
|
|||
static randomx_cache *m_rx_cache;
|
||||
static randomx_dataset *m_rx_dataset;
|
||||
static uint8_t m_rx_seed_hash[32];
|
||||
static xmrig::Variant m_rx_variant;
|
||||
static std::atomic<uint32_t> m_rx_dataset_init_thread_counter;
|
||||
# endif
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue