mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-31 16:09:46 +00:00
Merge branch 'dev'
This commit is contained in:
commit
54e75bc7c4
31 changed files with 888 additions and 91 deletions
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -1,3 +1,13 @@
|
||||||
|
# v6.18.1
|
||||||
|
- [#3129](https://github.com/xmrig/xmrig/pull/3129) Fix: protectRX flushed CPU cache only on MacOS/iOS.
|
||||||
|
- [#3126](https://github.com/xmrig/xmrig/pull/3126) Don't reset when pool sends the same job blob.
|
||||||
|
- [#3120](https://github.com/xmrig/xmrig/pull/3120) RandomX: optimized `CFROUND` elimination.
|
||||||
|
- [#3109](https://github.com/xmrig/xmrig/pull/3109) RandomX: added Blake2 AVX2 version.
|
||||||
|
- [#3082](https://github.com/xmrig/xmrig/pull/3082) Fixed GCC 12 warnings.
|
||||||
|
- [#3075](https://github.com/xmrig/xmrig/pull/3075) Recognize `armv7ve` as valid ARMv7 target.
|
||||||
|
- [#3132](https://github.com/xmrig/xmrig/pull/3132) RandomX: added MSR mod for Zen 4.
|
||||||
|
- [#3134](https://github.com/xmrig/xmrig/pull/3134) Added Zen4 to `randomx_boost.sh`.
|
||||||
|
|
||||||
# v6.18.0
|
# v6.18.0
|
||||||
- [#3067](https://github.com/xmrig/xmrig/pull/3067) Monero v15 network upgrade support and more house keeping.
|
- [#3067](https://github.com/xmrig/xmrig/pull/3067) Monero v15 network upgrade support and more house keeping.
|
||||||
- Removed deprecated AstroBWTv1 and v2.
|
- Removed deprecated AstroBWTv1 and v2.
|
||||||
|
|
|
@ -27,6 +27,7 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||||
|
option(WITH_AVX2 "Enable AVX2 for Blake2" ON)
|
||||||
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
|
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
|
||||||
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
||||||
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers)
|
[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers)
|
||||||
[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network)
|
[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network)
|
||||||
|
|
||||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight, AstroBWT and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||||
|
|
||||||
## Mining backends
|
## Mining backends
|
||||||
- **CPU** (x64/ARMv7/ARMv8)
|
- **CPU** (x64/ARMv7/ARMv8)
|
||||||
|
|
|
@ -25,13 +25,14 @@ if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||||
add_definitions(-DRAPIDJSON_SSE2)
|
add_definitions(-DRAPIDJSON_SSE2)
|
||||||
else()
|
else()
|
||||||
set(WITH_SSE4_1 OFF)
|
set(WITH_SSE4_1 OFF)
|
||||||
|
set(WITH_AVX2 OFF)
|
||||||
set(WITH_VAES OFF)
|
set(WITH_VAES OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT ARM_TARGET)
|
if (NOT ARM_TARGET)
|
||||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$")
|
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$")
|
||||||
set(ARM_TARGET 8)
|
set(ARM_TARGET 8)
|
||||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l)$")
|
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
|
||||||
set(ARM_TARGET 7)
|
set(ARM_TARGET 7)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@ -57,3 +58,7 @@ endif()
|
||||||
if (WITH_SSE4_1)
|
if (WITH_SSE4_1)
|
||||||
add_definitions(-DXMRIG_FEATURE_SSE4_1)
|
add_definitions(-DXMRIG_FEATURE_SSE4_1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WITH_AVX2)
|
||||||
|
add_definitions(-DXMRIG_FEATURE_AVX2)
|
||||||
|
endif()
|
||||||
|
|
|
@ -76,7 +76,15 @@ if (WITH_RANDOMX)
|
||||||
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
|
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||||
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS -msse4.1)
|
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS "-Ofast -msse4.1")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (WITH_AVX2)
|
||||||
|
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/avx2/blake2b_avx2.c)
|
||||||
|
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||||
|
set_source_files_properties(src/crypto/randomx/blake2/avx2/blake2b_avx2.c PROPERTIES COMPILE_FLAGS "-Ofast -mavx2")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -10,14 +10,24 @@ fi
|
||||||
|
|
||||||
if grep -E 'AMD Ryzen|AMD EPYC' /proc/cpuinfo > /dev/null;
|
if grep -E 'AMD Ryzen|AMD EPYC' /proc/cpuinfo > /dev/null;
|
||||||
then
|
then
|
||||||
if grep "cpu family[[:space:]]:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
||||||
then
|
then
|
||||||
echo "Detected Zen3 CPU"
|
if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
|
||||||
wrmsr -a 0xc0011020 0x4480000000000
|
then
|
||||||
wrmsr -a 0xc0011021 0x1c000200000040
|
echo "Detected Zen4 CPU"
|
||||||
wrmsr -a 0xc0011022 0xc000000401500000
|
wrmsr -a 0xc0011020 0x4400000000000
|
||||||
wrmsr -a 0xc001102b 0x2000cc14
|
wrmsr -a 0xc0011021 0x4000000000040
|
||||||
echo "MSR register values for Zen3 applied"
|
wrmsr -a 0xc0011022 0x8680000401570000
|
||||||
|
wrmsr -a 0xc001102b 0x2040cc10
|
||||||
|
echo "MSR register values for Zen4 applied"
|
||||||
|
else
|
||||||
|
echo "Detected Zen3 CPU"
|
||||||
|
wrmsr -a 0xc0011020 0x4480000000000
|
||||||
|
wrmsr -a 0xc0011021 0x1c000200000040
|
||||||
|
wrmsr -a 0xc0011022 0xc000000401500000
|
||||||
|
wrmsr -a 0xc001102b 0x2000cc14
|
||||||
|
echo "MSR register values for Zen3 applied"
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
echo "Detected Zen1/Zen2 CPU"
|
echo "Detected Zen1/Zen2 CPU"
|
||||||
wrmsr -a 0xc0011020 0
|
wrmsr -a 0xc0011020 0
|
||||||
|
|
|
@ -77,8 +77,11 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
||||||
{
|
{
|
||||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
// cn-heavy optimization for Zen3 CPUs
|
// cn-heavy optimization for Zen3 CPUs
|
||||||
const bool is_vermeer = (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21);
|
const auto arch = Cpu::info()->arch();
|
||||||
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && is_vermeer) {
|
const uint32_t model = Cpu::info()->model();
|
||||||
|
const bool is_vermeer = (arch == ICpuInfo::ARCH_ZEN3) && (model == 0x21);
|
||||||
|
const bool is_raphael = (arch == ICpuInfo::ARCH_ZEN4) && (model == 0x61);
|
||||||
|
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && (is_vermeer || is_raphael)) {
|
||||||
std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
|
std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
|
||||||
if (!cn_heavyZen3Memory) {
|
if (!cn_heavyZen3Memory) {
|
||||||
// Round up number of threads to the multiple of 8
|
// Round up number of threads to the multiple of 8
|
||||||
|
|
|
@ -45,19 +45,21 @@ public:
|
||||||
ARCH_ZEN,
|
ARCH_ZEN,
|
||||||
ARCH_ZEN_PLUS,
|
ARCH_ZEN_PLUS,
|
||||||
ARCH_ZEN2,
|
ARCH_ZEN2,
|
||||||
ARCH_ZEN3
|
ARCH_ZEN3,
|
||||||
|
ARCH_ZEN4
|
||||||
};
|
};
|
||||||
|
|
||||||
enum MsrMod : uint32_t {
|
enum MsrMod : uint32_t {
|
||||||
MSR_MOD_NONE,
|
MSR_MOD_NONE,
|
||||||
MSR_MOD_RYZEN_17H,
|
MSR_MOD_RYZEN_17H,
|
||||||
MSR_MOD_RYZEN_19H,
|
MSR_MOD_RYZEN_19H,
|
||||||
|
MSR_MOD_RYZEN_19H_ZEN4,
|
||||||
MSR_MOD_INTEL,
|
MSR_MOD_INTEL,
|
||||||
MSR_MOD_CUSTOM,
|
MSR_MOD_CUSTOM,
|
||||||
MSR_MOD_MAX
|
MSR_MOD_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
# define MSR_NAMES_LIST "none", "ryzen_17h", "ryzen_19h", "intel", "custom"
|
# define MSR_NAMES_LIST "none", "ryzen_17h", "ryzen_19h", "ryzen_19h_zen4", "intel", "custom"
|
||||||
|
|
||||||
enum Flag : uint32_t {
|
enum Flag : uint32_t {
|
||||||
FLAG_AES,
|
FLAG_AES,
|
||||||
|
|
|
@ -64,7 +64,7 @@ static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX m
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_FEATURE_MSR
|
#ifdef XMRIG_FEATURE_MSR
|
||||||
constexpr size_t kMsrArraySize = 5;
|
constexpr size_t kMsrArraySize = 6;
|
||||||
static const std::array<const char *, kMsrArraySize> msrNames = { MSR_NAMES_LIST };
|
static const std::array<const char *, kMsrArraySize> msrNames = { MSR_NAMES_LIST };
|
||||||
static_assert(kMsrArraySize == ICpuInfo::MSR_MOD_MAX, "kMsrArraySize and MSR_MOD_MAX mismatch");
|
static_assert(kMsrArraySize == ICpuInfo::MSR_MOD_MAX, "kMsrArraySize and MSR_MOD_MAX mismatch");
|
||||||
#endif
|
#endif
|
||||||
|
@ -250,8 +250,14 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x19:
|
case 0x19:
|
||||||
m_arch = ARCH_ZEN3;
|
if (m_model == 0x61) {
|
||||||
m_msrMod = MSR_MOD_RYZEN_19H;
|
m_arch = ARCH_ZEN4;
|
||||||
|
m_msrMod = MSR_MOD_RYZEN_19H_ZEN4;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_arch = ARCH_ZEN3;
|
||||||
|
m_msrMod = MSR_MOD_RYZEN_19H;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -66,7 +66,6 @@ Storage<DaemonClient> DaemonClient::m_storage;
|
||||||
|
|
||||||
static const char* kBlocktemplateBlob = "blocktemplate_blob";
|
static const char* kBlocktemplateBlob = "blocktemplate_blob";
|
||||||
static const char* kBlockhashingBlob = "blockhashing_blob";
|
static const char* kBlockhashingBlob = "blockhashing_blob";
|
||||||
static const char* kLastError = "lasterror";
|
|
||||||
static const char *kGetHeight = "/getheight";
|
static const char *kGetHeight = "/getheight";
|
||||||
static const char *kGetInfo = "/getinfo";
|
static const char *kGetInfo = "/getinfo";
|
||||||
static const char *kHash = "hash";
|
static const char *kHash = "hash";
|
||||||
|
|
|
@ -48,7 +48,13 @@ xmrig::Job::Job(bool nicehash, const Algorithm &algorithm, const String &clientI
|
||||||
|
|
||||||
bool xmrig::Job::isEqual(const Job &other) const
|
bool xmrig::Job::isEqual(const Job &other) const
|
||||||
{
|
{
|
||||||
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0 && m_target == other.m_target;
|
return m_id == other.m_id && m_clientId == other.m_clientId && isEqualBlob(other) && m_target == other.m_target;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool xmrig::Job::isEqualBlob(const Job &other) const
|
||||||
|
{
|
||||||
|
return (m_size == other.m_size) && (memcmp(m_blob, other.m_blob, m_size) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,19 +64,19 @@ bool xmrig::Job::setBlob(const char *blob)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_size = strlen(blob);
|
size_t size = strlen(blob);
|
||||||
if (m_size % 2 != 0) {
|
if (size % 2 != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_size /= 2;
|
size /= 2;
|
||||||
|
|
||||||
const size_t minSize = nonceOffset() + nonceSize();
|
const size_t minSize = nonceOffset() + nonceSize();
|
||||||
if (m_size < minSize || m_size >= sizeof(m_blob)) {
|
if (size < minSize || size >= sizeof(m_blob)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Cvt::fromHex(m_blob, sizeof(m_blob), blob, m_size * 2)) {
|
if (!Cvt::fromHex(m_blob, sizeof(m_blob), blob, size * 2)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,9 +86,10 @@ bool xmrig::Job::setBlob(const char *blob)
|
||||||
|
|
||||||
# ifdef XMRIG_PROXY_PROJECT
|
# ifdef XMRIG_PROXY_PROJECT
|
||||||
memset(m_rawBlob, 0, sizeof(m_rawBlob));
|
memset(m_rawBlob, 0, sizeof(m_rawBlob));
|
||||||
memcpy(m_rawBlob, blob, m_size * 2);
|
memcpy(m_rawBlob, blob, size * 2);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
m_size = size;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@ public:
|
||||||
~Job() = default;
|
~Job() = default;
|
||||||
|
|
||||||
bool isEqual(const Job &other) const;
|
bool isEqual(const Job &other) const;
|
||||||
|
bool isEqualBlob(const Job &other) const;
|
||||||
bool setBlob(const char *blob);
|
bool setBlob(const char *blob);
|
||||||
bool setSeedHash(const char *hash);
|
bool setSeedHash(const char *hash);
|
||||||
bool setTarget(const char *target);
|
bool setTarget(const char *target);
|
||||||
|
|
|
@ -561,6 +561,12 @@ void xmrig::Miner::setJob(const Job &job, bool donate)
|
||||||
const uint8_t index = donate ? 1 : 0;
|
const uint8_t index = donate ? 1 : 0;
|
||||||
|
|
||||||
d_ptr->reset = !(d_ptr->job.index() == 1 && index == 0 && d_ptr->userJobId == job.id());
|
d_ptr->reset = !(d_ptr->job.index() == 1 && index == 0 && d_ptr->userJobId == job.id());
|
||||||
|
|
||||||
|
// Don't reset nonce if pool sends the same hashing blob again, but with different difficulty (for example)
|
||||||
|
if (d_ptr->job.isEqualBlob(job)) {
|
||||||
|
d_ptr->reset = false;
|
||||||
|
}
|
||||||
|
|
||||||
d_ptr->job = job;
|
d_ptr->job = job;
|
||||||
d_ptr->job.setIndex(index);
|
d_ptr->job.setIndex(index);
|
||||||
|
|
||||||
|
|
|
@ -407,8 +407,12 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||||
// cn-heavy optimization for Zen3 CPUs
|
// cn-heavy optimization for Zen3/Zen4 CPUs
|
||||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
|
const auto arch = Cpu::info()->arch();
|
||||||
|
const uint32_t model = Cpu::info()->model();
|
||||||
|
const bool is_vermeer = (arch == ICpuInfo::ARCH_ZEN3) && (model == 0x21);
|
||||||
|
const bool is_raphael = (arch == ICpuInfo::ARCH_ZEN4) && (model == 0x61);
|
||||||
|
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (is_vermeer || is_raphael)) {
|
||||||
switch (algorithm.id()) {
|
switch (algorithm.id()) {
|
||||||
case Algorithm::CN_HEAVY_0:
|
case Algorithm::CN_HEAVY_0:
|
||||||
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
|
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
|
||||||
|
|
|
@ -112,13 +112,19 @@ bool xmrig::VirtualMemory::protectRWX(void *p, size_t size)
|
||||||
|
|
||||||
bool xmrig::VirtualMemory::protectRX(void *p, size_t size)
|
bool xmrig::VirtualMemory::protectRX(void *p, size_t size)
|
||||||
{
|
{
|
||||||
|
bool result = true;
|
||||||
|
|
||||||
# if defined(XMRIG_OS_APPLE) && defined(XMRIG_ARM)
|
# if defined(XMRIG_OS_APPLE) && defined(XMRIG_ARM)
|
||||||
pthread_jit_write_protect_np(true);
|
pthread_jit_write_protect_np(true);
|
||||||
flushInstructionCache(p, size);
|
|
||||||
return true;
|
|
||||||
# else
|
# else
|
||||||
return mprotect(p, size, PROT_READ | PROT_EXEC) == 0;
|
result = (mprotect(p, size, PROT_READ | PROT_EXEC) == 0);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# if defined(XMRIG_ARM)
|
||||||
|
flushInstructionCache(p, size);
|
||||||
|
# endif
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
121
src/crypto/randomx/blake2/avx2/LICENSE
Normal file
121
src/crypto/randomx/blake2/avx2/LICENSE
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
Creative Commons Legal Code
|
||||||
|
|
||||||
|
CC0 1.0 Universal
|
||||||
|
|
||||||
|
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
||||||
|
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
||||||
|
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
||||||
|
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
||||||
|
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
||||||
|
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
||||||
|
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
||||||
|
HEREUNDER.
|
||||||
|
|
||||||
|
Statement of Purpose
|
||||||
|
|
||||||
|
The laws of most jurisdictions throughout the world automatically confer
|
||||||
|
exclusive Copyright and Related Rights (defined below) upon the creator
|
||||||
|
and subsequent owner(s) (each and all, an "owner") of an original work of
|
||||||
|
authorship and/or a database (each, a "Work").
|
||||||
|
|
||||||
|
Certain owners wish to permanently relinquish those rights to a Work for
|
||||||
|
the purpose of contributing to a commons of creative, cultural and
|
||||||
|
scientific works ("Commons") that the public can reliably and without fear
|
||||||
|
of later claims of infringement build upon, modify, incorporate in other
|
||||||
|
works, reuse and redistribute as freely as possible in any form whatsoever
|
||||||
|
and for any purposes, including without limitation commercial purposes.
|
||||||
|
These owners may contribute to the Commons to promote the ideal of a free
|
||||||
|
culture and the further production of creative, cultural and scientific
|
||||||
|
works, or to gain reputation or greater distribution for their Work in
|
||||||
|
part through the use and efforts of others.
|
||||||
|
|
||||||
|
For these and/or other purposes and motivations, and without any
|
||||||
|
expectation of additional consideration or compensation, the person
|
||||||
|
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
||||||
|
is an owner of Copyright and Related Rights in the Work, voluntarily
|
||||||
|
elects to apply CC0 to the Work and publicly distribute the Work under its
|
||||||
|
terms, with knowledge of his or her Copyright and Related Rights in the
|
||||||
|
Work and the meaning and intended legal effect of CC0 on those rights.
|
||||||
|
|
||||||
|
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||||
|
protected by copyright and related or neighboring rights ("Copyright and
|
||||||
|
Related Rights"). Copyright and Related Rights include, but are not
|
||||||
|
limited to, the following:
|
||||||
|
|
||||||
|
i. the right to reproduce, adapt, distribute, perform, display,
|
||||||
|
communicate, and translate a Work;
|
||||||
|
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||||
|
iii. publicity and privacy rights pertaining to a person's image or
|
||||||
|
likeness depicted in a Work;
|
||||||
|
iv. rights protecting against unfair competition in regards to a Work,
|
||||||
|
subject to the limitations in paragraph 4(a), below;
|
||||||
|
v. rights protecting the extraction, dissemination, use and reuse of data
|
||||||
|
in a Work;
|
||||||
|
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||||
|
European Parliament and of the Council of 11 March 1996 on the legal
|
||||||
|
protection of databases, and under any national implementation
|
||||||
|
thereof, including any amended or successor version of such
|
||||||
|
directive); and
|
||||||
|
vii. other similar, equivalent or corresponding rights throughout the
|
||||||
|
world based on applicable law or treaty, and any national
|
||||||
|
implementations thereof.
|
||||||
|
|
||||||
|
2. Waiver. To the greatest extent permitted by, but not in contravention
|
||||||
|
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
||||||
|
irrevocably and unconditionally waives, abandons, and surrenders all of
|
||||||
|
Affirmer's Copyright and Related Rights and associated claims and causes
|
||||||
|
of action, whether now known or unknown (including existing as well as
|
||||||
|
future claims and causes of action), in the Work (i) in all territories
|
||||||
|
worldwide, (ii) for the maximum duration provided by applicable law or
|
||||||
|
treaty (including future time extensions), (iii) in any current or future
|
||||||
|
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
||||||
|
including without limitation commercial, advertising or promotional
|
||||||
|
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
||||||
|
member of the public at large and to the detriment of Affirmer's heirs and
|
||||||
|
successors, fully intending that such Waiver shall not be subject to
|
||||||
|
revocation, rescission, cancellation, termination, or any other legal or
|
||||||
|
equitable action to disrupt the quiet enjoyment of the Work by the public
|
||||||
|
as contemplated by Affirmer's express Statement of Purpose.
|
||||||
|
|
||||||
|
3. Public License Fallback. Should any part of the Waiver for any reason
|
||||||
|
be judged legally invalid or ineffective under applicable law, then the
|
||||||
|
Waiver shall be preserved to the maximum extent permitted taking into
|
||||||
|
account Affirmer's express Statement of Purpose. In addition, to the
|
||||||
|
extent the Waiver is so judged Affirmer hereby grants to each affected
|
||||||
|
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
||||||
|
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
||||||
|
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
||||||
|
maximum duration provided by applicable law or treaty (including future
|
||||||
|
time extensions), (iii) in any current or future medium and for any number
|
||||||
|
of copies, and (iv) for any purpose whatsoever, including without
|
||||||
|
limitation commercial, advertising or promotional purposes (the
|
||||||
|
"License"). The License shall be deemed effective as of the date CC0 was
|
||||||
|
applied by Affirmer to the Work. Should any part of the License for any
|
||||||
|
reason be judged legally invalid or ineffective under applicable law, such
|
||||||
|
partial invalidity or ineffectiveness shall not invalidate the remainder
|
||||||
|
of the License, and in such case Affirmer hereby affirms that he or she
|
||||||
|
will not (i) exercise any of his or her remaining Copyright and Related
|
||||||
|
Rights in the Work or (ii) assert any associated claims and causes of
|
||||||
|
action with respect to the Work, in either case contrary to Affirmer's
|
||||||
|
express Statement of Purpose.
|
||||||
|
|
||||||
|
4. Limitations and Disclaimers.
|
||||||
|
|
||||||
|
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||||
|
surrendered, licensed or otherwise affected by this document.
|
||||||
|
b. Affirmer offers the Work as-is and makes no representations or
|
||||||
|
warranties of any kind concerning the Work, express, implied,
|
||||||
|
statutory or otherwise, including without limitation warranties of
|
||||||
|
title, merchantability, fitness for a particular purpose, non
|
||||||
|
infringement, or the absence of latent or other defects, accuracy, or
|
||||||
|
the present or absence of errors, whether or not discoverable, all to
|
||||||
|
the greatest extent permissible under applicable law.
|
||||||
|
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||||
|
that may apply to the Work or any use thereof, including without
|
||||||
|
limitation any person's Copyright and Related Rights in the Work.
|
||||||
|
Further, Affirmer disclaims responsibility for obtaining any necessary
|
||||||
|
consents, permissions or other rights required for any use of the
|
||||||
|
Work.
|
||||||
|
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||||
|
party to this document and has no duty or obligation with respect to
|
||||||
|
this CC0 or use of the Work.
|
38
src/crypto/randomx/blake2/avx2/blake2.h
Normal file
38
src/crypto/randomx/blake2/avx2/blake2.h
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
#ifndef BLAKE2_AVX2_BLAKE2_H
|
||||||
|
#define BLAKE2_AVX2_BLAKE2_H
|
||||||
|
|
||||||
|
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define INLINE __inline
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define INLINE __inline__
|
||||||
|
#else
|
||||||
|
#define INLINE
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define INLINE inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define ALIGN(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
#define ALIGN(x) __attribute__((aligned(x)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum blake2s_constant {
|
||||||
|
BLAKE2S_BLOCKBYTES = 64,
|
||||||
|
BLAKE2S_OUTBYTES = 32,
|
||||||
|
BLAKE2S_KEYBYTES = 32,
|
||||||
|
BLAKE2S_SALTBYTES = 8,
|
||||||
|
BLAKE2S_PERSONALBYTES = 8
|
||||||
|
};
|
||||||
|
|
||||||
|
enum blake2b_constant {
|
||||||
|
BLAKE2B_BLOCKBYTES = 128,
|
||||||
|
BLAKE2B_OUTBYTES = 64,
|
||||||
|
BLAKE2B_KEYBYTES = 64,
|
||||||
|
BLAKE2B_SALTBYTES = 16,
|
||||||
|
BLAKE2B_PERSONALBYTES = 16
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
48
src/crypto/randomx/blake2/avx2/blake2b-common.h
Normal file
48
src/crypto/randomx/blake2/avx2/blake2b-common.h
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#ifndef BLAKE2_AVX2_BLAKE2B_COMMON_H
|
||||||
|
#define BLAKE2_AVX2_BLAKE2B_COMMON_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
|
||||||
|
#define LOAD128(p) _mm_load_si128( (__m128i *)(p) )
|
||||||
|
#define STORE128(p,r) _mm_store_si128((__m128i *)(p), r)
|
||||||
|
|
||||||
|
#define LOADU128(p) _mm_loadu_si128( (__m128i *)(p) )
|
||||||
|
#define STOREU128(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
||||||
|
|
||||||
|
#define LOAD(p) _mm256_load_si256( (__m256i *)(p) )
|
||||||
|
#define STORE(p,r) _mm256_store_si256((__m256i *)(p), r)
|
||||||
|
|
||||||
|
#define LOADU(p) _mm256_loadu_si256( (__m256i *)(p) )
|
||||||
|
#define STOREU(p,r) _mm256_storeu_si256((__m256i *)(p), r)
|
||||||
|
|
||||||
|
static INLINE uint64_t LOADU64(void const * p) {
|
||||||
|
uint64_t v;
|
||||||
|
memcpy(&v, p, sizeof v);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ROTATE16 _mm256_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, \
|
||||||
|
2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 )
|
||||||
|
|
||||||
|
#define ROTATE24 _mm256_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, \
|
||||||
|
3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 )
|
||||||
|
|
||||||
|
#define ADD(a, b) _mm256_add_epi64(a, b)
|
||||||
|
#define SUB(a, b) _mm256_sub_epi64(a, b)
|
||||||
|
|
||||||
|
#define XOR(a, b) _mm256_xor_si256(a, b)
|
||||||
|
#define AND(a, b) _mm256_and_si256(a, b)
|
||||||
|
#define OR(a, b) _mm256_or_si256(a, b)
|
||||||
|
|
||||||
|
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
|
||||||
|
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
|
||||||
|
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
|
||||||
|
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
|
||||||
|
|
||||||
|
#endif
|
340
src/crypto/randomx/blake2/avx2/blake2b-load-avx2.h
Normal file
340
src/crypto/randomx/blake2/avx2/blake2b-load-avx2.h
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
#ifndef BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H
|
||||||
|
#define BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_0_1(b0) do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m0, m1); \
|
||||||
|
t1 = _mm256_unpacklo_epi64(m2, m3); \
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_0_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m0, m1);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m2, m3);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_0_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m7, m4);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m5, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_0_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m7, m4);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m5, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_1_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m7, m2);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m4, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_1_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m5, m4);\
|
||||||
|
t1 = _mm256_alignr_epi8(m3, m7, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_1_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m2, m0);\
|
||||||
|
t1 = _mm256_blend_epi32(m5, m0, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_1_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m6, m1, 8);\
|
||||||
|
t1 = _mm256_blend_epi32(m3, m1, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_2_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m6, m5, 8);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m2, m7);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_2_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m4, m0);\
|
||||||
|
t1 = _mm256_blend_epi32(m6, m1, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_2_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m5, m4, 8);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m1, m3);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_2_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m2, m7);\
|
||||||
|
t1 = _mm256_blend_epi32(m0, m3, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_3_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m3, m1);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m6, m5);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_3_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m4, m0);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m6, m7);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_3_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m1, m7, 8);\
|
||||||
|
t1 = _mm256_shuffle_epi32(m2, _MM_SHUFFLE(1,0,3,2));\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_3_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m4, m3);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m5, m0);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_4_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m4, m2);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m1, m5);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_4_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_blend_epi32(m3, m0, 0x33);\
|
||||||
|
t1 = _mm256_blend_epi32(m7, m2, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_4_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m7, m1, 8);\
|
||||||
|
t1 = _mm256_alignr_epi8(m3, m5, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_4_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m6, m0);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m6, m4);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_5_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m1, m3);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m0, m4);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_5_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m6, m5);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m5, m1);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_5_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m2, m0, 8);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m3, m7);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_5_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m4, m6);\
|
||||||
|
t1 = _mm256_alignr_epi8(m7, m2, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_6_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_blend_epi32(m0, m6, 0x33);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m7, m2);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_6_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m2, m7);\
|
||||||
|
t1 = _mm256_alignr_epi8(m5, m6, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_6_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m4, m0);\
|
||||||
|
t1 = _mm256_blend_epi32(m4, m3, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_6_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m5, m3);\
|
||||||
|
t1 = _mm256_shuffle_epi32(m1, _MM_SHUFFLE(1,0,3,2));\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_7_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m6, m3);\
|
||||||
|
t1 = _mm256_blend_epi32(m1, m6, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_7_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m7, m5, 8);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m0, m4);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_7_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_blend_epi32(m2, m1, 0x33);\
|
||||||
|
t1 = _mm256_alignr_epi8(m4, m7, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_7_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m5, m0);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m2, m3);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_8_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m3, m7);\
|
||||||
|
t1 = _mm256_alignr_epi8(m0, m5, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_8_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m7, m4);\
|
||||||
|
t1 = _mm256_alignr_epi8(m4, m1, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_8_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m5, m6);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m6, m0);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_8_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m1, m2, 8);\
|
||||||
|
t1 = _mm256_alignr_epi8(m2, m3, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_9_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m5, m4);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m3, m0);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_9_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m1, m2);\
|
||||||
|
t1 = _mm256_blend_epi32(m2, m3, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_9_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m6, m7);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m4, m1);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_9_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_blend_epi32(m5, m0, 0x33);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m7, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_10_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m0, m1);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m2, m3);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_10_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m0, m1);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m2, m3);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_10_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m7, m4);\
|
||||||
|
t1 = _mm256_unpacklo_epi64(m5, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_10_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m7, m4);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m5, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_11_1(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m7, m2);\
|
||||||
|
t1 = _mm256_unpackhi_epi64(m4, m6);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_11_2(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpacklo_epi64(m5, m4);\
|
||||||
|
t1 = _mm256_alignr_epi8(m3, m7, 8);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_11_3(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_unpackhi_epi64(m2, m0);\
|
||||||
|
t1 = _mm256_blend_epi32(m5, m0, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_LOAD_MSG_11_4(b0) \
|
||||||
|
do { \
|
||||||
|
t0 = _mm256_alignr_epi8(m6, m1, 8);\
|
||||||
|
t1 = _mm256_blend_epi32(m3, m1, 0x33);\
|
||||||
|
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
16
src/crypto/randomx/blake2/avx2/blake2b.h
Normal file
16
src/crypto/randomx/blake2/avx2/blake2b.h
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#ifndef BLAKE2_AVX2_BLAKE2B_H
|
||||||
|
#define BLAKE2_AVX2_BLAKE2B_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int blake2b_avx2(void* out, size_t outlen, const void* in, size_t inlen);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
141
src/crypto/randomx/blake2/avx2/blake2b_avx2.c
Normal file
141
src/crypto/randomx/blake2/avx2/blake2b_avx2.c
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "blake2.h"
|
||||||
|
#include "blake2b.h"
|
||||||
|
#include "blake2b-common.h"
|
||||||
|
|
||||||
|
ALIGN(64) static const uint64_t blake2b_IV[8] = {
|
||||||
|
UINT64_C(0x6A09E667F3BCC908), UINT64_C(0xBB67AE8584CAA73B),
|
||||||
|
UINT64_C(0x3C6EF372FE94F82B), UINT64_C(0xA54FF53A5F1D36F1),
|
||||||
|
UINT64_C(0x510E527FADE682D1), UINT64_C(0x9B05688C2B3E6C1F),
|
||||||
|
UINT64_C(0x1F83D9ABFB41BD6B), UINT64_C(0x5BE0CD19137E2179),
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BLAKE2B_G1_V1(a, b, c, d, m) do { \
|
||||||
|
a = ADD(a, m); \
|
||||||
|
a = ADD(a, b); d = XOR(d, a); d = ROT32(d); \
|
||||||
|
c = ADD(c, d); b = XOR(b, c); b = ROT24(b); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_G2_V1(a, b, c, d, m) do { \
|
||||||
|
a = ADD(a, m); \
|
||||||
|
a = ADD(a, b); d = XOR(d, a); d = ROT16(d); \
|
||||||
|
c = ADD(c, d); b = XOR(b, c); b = ROT63(b); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_DIAG_V1(a, b, c, d) do { \
|
||||||
|
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(2,1,0,3)); \
|
||||||
|
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1,0,3,2)); \
|
||||||
|
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(0,3,2,1)); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_UNDIAG_V1(a, b, c, d) do { \
|
||||||
|
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(0,3,2,1)); \
|
||||||
|
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1,0,3,2)); \
|
||||||
|
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(2,1,0,3)); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#include "blake2b-load-avx2.h"
|
||||||
|
|
||||||
|
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \
|
||||||
|
__m256i b0; \
|
||||||
|
BLAKE2B_LOAD_MSG_ ##r ##_1(b0); \
|
||||||
|
BLAKE2B_G1_V1(a, b, c, d, b0); \
|
||||||
|
BLAKE2B_LOAD_MSG_ ##r ##_2(b0); \
|
||||||
|
BLAKE2B_G2_V1(a, b, c, d, b0); \
|
||||||
|
BLAKE2B_DIAG_V1(a, b, c, d); \
|
||||||
|
BLAKE2B_LOAD_MSG_ ##r ##_3(b0); \
|
||||||
|
BLAKE2B_G1_V1(a, b, c, d, b0); \
|
||||||
|
BLAKE2B_LOAD_MSG_ ##r ##_4(b0); \
|
||||||
|
BLAKE2B_G2_V1(a, b, c, d, b0); \
|
||||||
|
BLAKE2B_UNDIAG_V1(a, b, c, d); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
|
||||||
|
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define DECLARE_MESSAGE_WORDS(m) \
|
||||||
|
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
|
||||||
|
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
|
||||||
|
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
|
||||||
|
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
|
||||||
|
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
|
||||||
|
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
|
||||||
|
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
|
||||||
|
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
|
||||||
|
__m256i t0, t1;
|
||||||
|
|
||||||
|
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) do { \
|
||||||
|
DECLARE_MESSAGE_WORDS(m) \
|
||||||
|
const __m256i iv0 = a; \
|
||||||
|
const __m256i iv1 = b; \
|
||||||
|
__m256i c = LOAD(&blake2b_IV[0]); \
|
||||||
|
__m256i d = XOR( \
|
||||||
|
LOAD(&blake2b_IV[4]), \
|
||||||
|
_mm256_set_epi64x(f1, f0, t1, t0) \
|
||||||
|
); \
|
||||||
|
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
|
||||||
|
a = XOR(a, c); \
|
||||||
|
b = XOR(b, d); \
|
||||||
|
a = XOR(a, iv0); \
|
||||||
|
b = XOR(b, iv1); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
int blake2b_avx2(void* out_ptr, size_t outlen, const void* in_ptr, size_t inlen) {
|
||||||
|
const __m256i parameter_block = _mm256_set_epi64x(0, 0, 0, 0x01010000UL | (uint32_t)outlen);
|
||||||
|
ALIGN(64) uint8_t buffer[BLAKE2B_BLOCKBYTES];
|
||||||
|
__m256i a = XOR(LOAD(&blake2b_IV[0]), parameter_block);
|
||||||
|
__m256i b = LOAD(&blake2b_IV[4]);
|
||||||
|
uint64_t counter = 0;
|
||||||
|
const uint8_t* in = (const uint8_t*)in_ptr;
|
||||||
|
do {
|
||||||
|
const uint64_t flag = (inlen <= BLAKE2B_BLOCKBYTES) ? -1 : 0;
|
||||||
|
size_t block_size = BLAKE2B_BLOCKBYTES;
|
||||||
|
if(inlen < BLAKE2B_BLOCKBYTES) {
|
||||||
|
memcpy(buffer, in, inlen);
|
||||||
|
memset(buffer + inlen, 0, BLAKE2B_BLOCKBYTES - inlen);
|
||||||
|
block_size = inlen;
|
||||||
|
in = buffer;
|
||||||
|
}
|
||||||
|
counter += block_size;
|
||||||
|
BLAKE2B_COMPRESS_V1(a, b, in, counter, 0, flag, 0);
|
||||||
|
inlen -= block_size;
|
||||||
|
in += block_size;
|
||||||
|
} while(inlen > 0);
|
||||||
|
|
||||||
|
uint8_t* out = (uint8_t*)out_ptr;
|
||||||
|
|
||||||
|
switch (outlen) {
|
||||||
|
case 64:
|
||||||
|
STOREU(out + 32, b);
|
||||||
|
// Fall through
|
||||||
|
|
||||||
|
case 32:
|
||||||
|
STOREU(out, a);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
STOREU(buffer, a);
|
||||||
|
STOREU(buffer + 32, b);
|
||||||
|
memcpy(out, buffer, outlen);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm256_zeroupper();
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -92,7 +92,12 @@ extern "C" {
|
||||||
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||||
|
|
||||||
/* Simple API */
|
/* Simple API */
|
||||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
|
void rx_blake2b_compress_integer(blake2b_state * S, const uint8_t * block);
|
||||||
|
void rx_blake2b_compress_sse41(blake2b_state * S, const uint8_t * block);
|
||||||
|
int rx_blake2b_default(void* out, size_t outlen, const void* in, size_t inlen);
|
||||||
|
|
||||||
|
extern void (*rx_blake2b_compress)(blake2b_state * S, const uint8_t * block);
|
||||||
|
extern int (*rx_blake2b)(void* out, size_t outlen, const void* in, size_t inlen);
|
||||||
|
|
||||||
/* Argon2 Team - Begin Code */
|
/* Argon2 Team - Begin Code */
|
||||||
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||||
|
|
|
@ -179,7 +179,7 @@ int rx_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
||||||
uint64_t m[16];
|
uint64_t m[16];
|
||||||
uint64_t v[16];
|
uint64_t v[16];
|
||||||
unsigned int i, r;
|
unsigned int i, r;
|
||||||
|
@ -237,21 +237,6 @@ static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block)
|
||||||
#undef ROUND
|
#undef ROUND
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(XMRIG_FEATURE_SSE4_1)
|
|
||||||
|
|
||||||
uint32_t rx_blake2b_use_sse41 = 0;
|
|
||||||
void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t* block);
|
|
||||||
|
|
||||||
#define rx_blake2b_compress(S, block) \
|
|
||||||
if (rx_blake2b_use_sse41) \
|
|
||||||
rx_blake2b_compress_sse41(S, block); \
|
|
||||||
else \
|
|
||||||
rx_blake2b_compress_integer(S, block);
|
|
||||||
|
|
||||||
#else
|
|
||||||
#define rx_blake2b_compress(S, block) rx_blake2b_compress_integer(S, block);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||||
const uint8_t *pin = (const uint8_t *)in;
|
const uint8_t *pin = (const uint8_t *)in;
|
||||||
|
|
||||||
|
@ -322,7 +307,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
|
int rx_blake2b_default(void *out, size_t outlen, const void *in, size_t inlen) {
|
||||||
blake2b_state S;
|
blake2b_state S;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
|
|
||||||
|
|
|
@ -240,10 +240,17 @@ namespace randomx {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cleanup() {
|
||||||
|
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||||
|
registerUsage[i] = -1;
|
||||||
|
}
|
||||||
|
nreg = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static const int_reg_t zero;
|
static const int_reg_t zero;
|
||||||
int registerUsage[RegistersCount];
|
int registerUsage[RegistersCount] = {};
|
||||||
NativeRegisterFile* nreg;
|
NativeRegisterFile* nreg = nullptr;
|
||||||
|
|
||||||
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
|
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
|
||||||
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
||||||
|
|
|
@ -167,6 +167,11 @@ namespace randomx {
|
||||||
|
|
||||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 };
|
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8, NOP9 };
|
||||||
|
|
||||||
|
static const uint8_t NOP13[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||||
|
static const uint8_t NOP14[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
static const uint8_t NOP25[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
static const uint8_t NOP26[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||||
|
|
||||||
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
||||||
{},
|
{},
|
||||||
{0x2E},
|
{0x2E},
|
||||||
|
@ -257,6 +262,10 @@ namespace randomx {
|
||||||
// AVX2 init is faster on Zen3
|
// AVX2 init is faster on Zen3
|
||||||
initDatasetAVX2 = true;
|
initDatasetAVX2 = true;
|
||||||
break;
|
break;
|
||||||
|
case xmrig::ICpuInfo::ARCH_ZEN4:
|
||||||
|
// AVX2 init is slower on Zen4
|
||||||
|
initDatasetAVX2 = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -407,7 +416,7 @@ namespace randomx {
|
||||||
*(uint32_t*)(code + codePos + 14) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(code + codePos + 14) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||||
if (hasAVX) {
|
if (hasAVX) {
|
||||||
uint32_t* p = (uint32_t*)(code + codePos + 61);
|
uint32_t* p = (uint32_t*)(code + codePos + 61);
|
||||||
*p = (*p & 0xFF000000U) | 0x0077F8C5U;
|
*p = (*p & 0xFF000000U) | 0x0077F8C5U; // vzeroupper
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifdef XMRIG_FIX_RYZEN
|
# ifdef XMRIG_FIX_RYZEN
|
||||||
|
@ -419,7 +428,8 @@ namespace randomx {
|
||||||
|
|
||||||
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
|
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
codePos = codePosFirst;
|
codePos = codePosFirst;
|
||||||
prevCFROUND = 0;
|
prevCFROUND = -1;
|
||||||
|
prevFPOperation = -1;
|
||||||
|
|
||||||
//mark all registers as used
|
//mark all registers as used
|
||||||
uint64_t* r = (uint64_t*)registerUsage;
|
uint64_t* r = (uint64_t*)registerUsage;
|
||||||
|
@ -1155,7 +1165,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
@ -1170,7 +1180,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
@ -1187,7 +1197,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
@ -1202,7 +1212,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
@ -1230,7 +1240,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
const uint64_t src = instr.src % RegisterCountFlt;
|
const uint64_t src = instr.src % RegisterCountFlt;
|
||||||
|
@ -1245,7 +1255,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
@ -1272,7 +1282,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
prevFPOperation = pos;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
|
@ -1283,21 +1293,18 @@ namespace randomx {
|
||||||
|
|
||||||
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
void JitCompilerX86::h_CFROUND(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = prevCFROUND;
|
int32_t t = prevCFROUND;
|
||||||
|
|
||||||
if (pos) {
|
if (t > prevFPOperation) {
|
||||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||||
memcpy(p + pos + 0, NOP9, 9);
|
memcpy(p + t, NOP26, 26);
|
||||||
memcpy(p + pos + 9, NOP9, 9);
|
|
||||||
memcpy(p + pos + 18, NOP8, 8);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memcpy(p + pos + 0, NOP8, 8);
|
memcpy(p + t, NOP14, 14);
|
||||||
memcpy(p + pos + 8, NOP6, 6);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = codePos;
|
uint32_t pos = codePos;
|
||||||
prevCFROUND = pos;
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint32_t src = instr.src % RegistersCount;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
|
@ -1322,21 +1329,18 @@ namespace randomx {
|
||||||
|
|
||||||
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
|
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = prevCFROUND;
|
int32_t t = prevCFROUND;
|
||||||
|
|
||||||
if (pos) {
|
if (t > prevFPOperation) {
|
||||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||||
memcpy(p + pos + 0, NOP9, 9);
|
memcpy(p + t, NOP25, 25);
|
||||||
memcpy(p + pos + 9, NOP9, 9);
|
|
||||||
memcpy(p + pos + 18, NOP7, 7);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memcpy(p + pos + 0, NOP8, 8);
|
memcpy(p + t, NOP13, 13);
|
||||||
memcpy(p + pos + 8, NOP5, 5);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = codePos;
|
uint32_t pos = codePos;
|
||||||
prevCFROUND = pos;
|
prevCFROUND = pos;
|
||||||
|
|
||||||
const uint64_t src = instr.src % RegistersCount;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
|
@ -1363,10 +1367,15 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
prevCFROUND = 0;
|
|
||||||
|
|
||||||
const int reg = instr.dst % RegistersCount;
|
const int reg = instr.dst % RegistersCount;
|
||||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
int32_t jmp_offset = registerUsage[reg];
|
||||||
|
|
||||||
|
// if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
|
||||||
|
if (jmp_offset <= prevFPOperation) {
|
||||||
|
prevFPOperation = pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
jmp_offset -= pos + 16;
|
||||||
|
|
||||||
if (jccErratum) {
|
if (jccErratum) {
|
||||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
||||||
|
|
|
@ -89,7 +89,8 @@ namespace randomx {
|
||||||
uint32_t codePos = 0;
|
uint32_t codePos = 0;
|
||||||
uint32_t codePosFirst = 0;
|
uint32_t codePosFirst = 0;
|
||||||
uint32_t vm_flags = 0;
|
uint32_t vm_flags = 0;
|
||||||
uint32_t prevCFROUND = 0;
|
int32_t prevCFROUND = -1;
|
||||||
|
int32_t prevFPOperation = -1;
|
||||||
|
|
||||||
# ifdef XMRIG_FIX_RYZEN
|
# ifdef XMRIG_FIX_RYZEN
|
||||||
std::pair<const void*, const void*> mainLoopBounds;
|
std::pair<const void*, const void*> mainLoopBounds;
|
||||||
|
|
|
@ -104,6 +104,8 @@ namespace randomx {
|
||||||
|
|
||||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||||
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
||||||
|
|
||||||
|
cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int softAes>
|
template<int softAes>
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "crypto/rx/Rx.h"
|
#include "crypto/rx/Rx.h"
|
||||||
|
#include "backend/cpu/Cpu.h"
|
||||||
#include "backend/cpu/CpuConfig.h"
|
#include "backend/cpu/CpuConfig.h"
|
||||||
#include "backend/cpu/CpuThreads.h"
|
#include "backend/cpu/CpuThreads.h"
|
||||||
#include "crypto/rx/RxConfig.h"
|
#include "crypto/rx/RxConfig.h"
|
||||||
|
@ -84,6 +85,16 @@ void xmrig::Rx::init(IRxListener *listener)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
|
#if defined(XMRIG_FEATURE_AVX2)
|
||||||
|
#include "crypto/randomx/blake2/avx2/blake2b.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void (*rx_blake2b_compress)(blake2b_state* S, const uint8_t * block) = rx_blake2b_compress_integer;
|
||||||
|
int (*rx_blake2b)(void* out, size_t outlen, const void* in, size_t inlen) = rx_blake2b_default;
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu)
|
bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu)
|
||||||
{
|
{
|
||||||
|
@ -133,6 +144,19 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
||||||
if (!cpu.isHwAES()) {
|
if (!cpu.isHwAES()) {
|
||||||
SelectSoftAESImpl(cpu.threads().get(seed.algorithm()).count());
|
SelectSoftAESImpl(cpu.threads().get(seed.algorithm()).count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# if defined(XMRIG_FEATURE_SSE4_1)
|
||||||
|
if (Cpu::info()->has(ICpuInfo::FLAG_SSE41)) {
|
||||||
|
rx_blake2b_compress = rx_blake2b_compress_sse41;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#if defined(XMRIG_FEATURE_AVX2)
|
||||||
|
if (Cpu::info()->has(ICpuInfo::FLAG_AVX2)) {
|
||||||
|
rx_blake2b = blake2b_avx2;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
osInitialized = true;
|
osInitialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,12 +58,13 @@ static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "
|
||||||
|
|
||||||
|
|
||||||
#ifdef XMRIG_FEATURE_MSR
|
#ifdef XMRIG_FEATURE_MSR
|
||||||
constexpr size_t kMsrArraySize = 5;
|
constexpr size_t kMsrArraySize = 6;
|
||||||
|
|
||||||
static const std::array<MsrItems, kMsrArraySize> msrPresets = {
|
static const std::array<MsrItems, kMsrArraySize> msrPresets = {
|
||||||
MsrItems(),
|
MsrItems(),
|
||||||
MsrItems{{ 0xC0011020, 0ULL }, { 0xC0011021, 0x40ULL, ~0x20ULL }, { 0xC0011022, 0x1510000ULL }, { 0xC001102b, 0x2000cc16ULL }},
|
MsrItems{{ 0xC0011020, 0ULL }, { 0xC0011021, 0x40ULL, ~0x20ULL }, { 0xC0011022, 0x1510000ULL }, { 0xC001102b, 0x2000cc16ULL }},
|
||||||
MsrItems{{ 0xC0011020, 0x0004480000000000ULL }, { 0xC0011021, 0x001c000200000040ULL, ~0x20ULL }, { 0xC0011022, 0xc000000401500000ULL }, { 0xC001102b, 0x2000cc14ULL }},
|
MsrItems{{ 0xC0011020, 0x0004480000000000ULL }, { 0xC0011021, 0x001c000200000040ULL, ~0x20ULL }, { 0xC0011022, 0xc000000401500000ULL }, { 0xC001102b, 0x2000cc14ULL }},
|
||||||
|
MsrItems{{ 0xC0011020, 0x0004400000000000ULL }, { 0xC0011021, 0x0004000000000040ULL, ~0x20ULL }, { 0xC0011022, 0x8680000401570000ULL }, { 0xC001102b, 0x2040cc10ULL }},
|
||||||
MsrItems{{ 0x1a4, 0xf }},
|
MsrItems{{ 0x1a4, 0xf }},
|
||||||
MsrItems()
|
MsrItems()
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,11 +25,6 @@
|
||||||
#include "crypto/rx/RxVm.h"
|
#include "crypto/rx/RxVm.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(XMRIG_FEATURE_SSE4_1)
|
|
||||||
extern "C" uint32_t rx_blake2b_use_sse41;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, const Assembly &assembly, uint32_t node)
|
randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, const Assembly &assembly, uint32_t node)
|
||||||
{
|
{
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
|
@ -51,10 +46,6 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
|
||||||
flags |= RANDOMX_FLAG_AMD;
|
flags |= RANDOMX_FLAG_AMD;
|
||||||
}
|
}
|
||||||
|
|
||||||
# if defined(XMRIG_FEATURE_SSE4_1)
|
|
||||||
rx_blake2b_use_sse41 = Cpu::info()->has(ICpuInfo::FLAG_SSE41) ? 1 : 0;
|
|
||||||
# endif
|
|
||||||
|
|
||||||
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
|
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#define APP_ID "xmrig"
|
#define APP_ID "xmrig"
|
||||||
#define APP_NAME "XMRig"
|
#define APP_NAME "XMRig"
|
||||||
#define APP_DESC "XMRig miner"
|
#define APP_DESC "XMRig miner"
|
||||||
#define APP_VERSION "6.18.0"
|
#define APP_VERSION "6.18.1-dev"
|
||||||
#define APP_DOMAIN "xmrig.com"
|
#define APP_DOMAIN "xmrig.com"
|
||||||
#define APP_SITE "www.xmrig.com"
|
#define APP_SITE "www.xmrig.com"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2016-2022 xmrig.com"
|
#define APP_COPYRIGHT "Copyright (C) 2016-2022 xmrig.com"
|
||||||
|
@ -30,7 +30,7 @@
|
||||||
|
|
||||||
#define APP_VER_MAJOR 6
|
#define APP_VER_MAJOR 6
|
||||||
#define APP_VER_MINOR 18
|
#define APP_VER_MINOR 18
|
||||||
#define APP_VER_PATCH 0
|
#define APP_VER_PATCH 1
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1930)
|
# if (_MSC_VER >= 1930)
|
||||||
|
|
Loading…
Reference in a new issue