mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 18:11:05 +00:00
Merge branch 'dev'
This commit is contained in:
commit
8aa4c8f19b
42 changed files with 1918 additions and 3406 deletions
12
CHANGELOG.md
12
CHANGELOG.md
|
@ -1,5 +1,15 @@
|
||||||
|
# v2.14.0
|
||||||
|
- **[#969](https://github.com/xmrig/xmrig/pull/969) Added new algorithm `cryptonight/rwz`, short alias `cn/rwz` (also known as CryptoNight ReverseWaltz), for upcoming [Graft](https://www.graft.network/) fork.**
|
||||||
|
- **[#931](https://github.com/xmrig/xmrig/issues/931) Added new algorithm `cryptonight/zls`, short alias `cn/zls` for [Zelerius Network](https://zelerius.org) fork.**
|
||||||
|
- **[#940](https://github.com/xmrig/xmrig/issues/940) Added new algorithm `cryptonight/double`, short alias `cn/double` (also known as CryptoNight HeavyX), for [X-CASH](https://x-cash.org/).**
|
||||||
|
- [#951](https://github.com/xmrig/xmrig/issues/951#issuecomment-469581529) Fixed crash if AVX was disabled on OS level.
|
||||||
|
- [#952](https://github.com/xmrig/xmrig/issues/952) Fixed compile error on some Linux.
|
||||||
|
- [#957](https://github.com/xmrig/xmrig/issues/957#issuecomment-468890667) Added support for embedded config.
|
||||||
|
- [#958](https://github.com/xmrig/xmrig/pull/958) Fixed incorrect user agent on ARM platforms.
|
||||||
|
- [#968](https://github.com/xmrig/xmrig/pull/968) Optimized `cn/r` algorithm performance.
|
||||||
|
|
||||||
# v2.13.1
|
# v2.13.1
|
||||||
[#946](https://github.com/xmrig/xmrig/pull/946) Optimized software AES implementations for CPUs without hardware AES support. `cn/r`, `cn/wow` up to 2.6 times faster, 4-9% improvements for other algorithms.
|
- [#946](https://github.com/xmrig/xmrig/pull/946) Optimized software AES implementations for CPUs without hardware AES support. `cn/r`, `cn/wow` up to 2.6 times faster, 4-9% improvements for other algorithms.
|
||||||
|
|
||||||
# v2.13.0
|
# v2.13.0
|
||||||
- **[#938](https://github.com/xmrig/xmrig/issues/938) Added support for new algorithm `cryptonight/r`, short alias `cn/r` (also known as CryptoNightR or CryptoNight variant 4), for upcoming [Monero](https://www.getmonero.org/) fork on March 9, thanks [@SChernykh](https://github.com/SChernykh).**
|
- **[#938](https://github.com/xmrig/xmrig/issues/938) Added support for new algorithm `cryptonight/r`, short alias `cn/r` (also known as CryptoNightR or CryptoNight variant 4), for upcoming [Monero](https://www.getmonero.org/) fork on March 9, thanks [@SChernykh](https://github.com/SChernykh).**
|
||||||
|
|
|
@ -1,17 +1,18 @@
|
||||||
cmake_minimum_required(VERSION 2.8)
|
cmake_minimum_required(VERSION 2.8)
|
||||||
project(xmrig)
|
project(xmrig)
|
||||||
|
|
||||||
option(WITH_LIBCPUID "Use Libcpuid" ON)
|
option(WITH_LIBCPUID "Use Libcpuid" ON)
|
||||||
option(WITH_AEON "CryptoNight-Lite support" ON)
|
option(WITH_AEON "CryptoNight-Lite support" ON)
|
||||||
option(WITH_SUMO "CryptoNight-Heavy support" ON)
|
option(WITH_SUMO "CryptoNight-Heavy support" ON)
|
||||||
option(WITH_CN_PICO "CryptoNight-Pico support" ON)
|
option(WITH_CN_PICO "CryptoNight-Pico support" ON)
|
||||||
option(WITH_CN_GPU "CryptoNight-GPU support" ON)
|
option(WITH_CN_GPU "CryptoNight-GPU support" ON)
|
||||||
option(WITH_HTTPD "HTTP REST API" ON)
|
option(WITH_HTTPD "HTTP REST API" ON)
|
||||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||||
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||||
option(BUILD_STATIC "Build static binary" OFF)
|
option(BUILD_STATIC "Build static binary" OFF)
|
||||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||||
|
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
|
||||||
|
|
||||||
include (CheckIncludeFile)
|
include (CheckIncludeFile)
|
||||||
include (cmake/cpu.cmake)
|
include (cmake/cpu.cmake)
|
||||||
|
@ -65,6 +66,7 @@ set(HEADERS
|
||||||
src/common/utils/mm_malloc.h
|
src/common/utils/mm_malloc.h
|
||||||
src/common/xmrig.h
|
src/common/xmrig.h
|
||||||
src/core/ConfigLoader_platform.h
|
src/core/ConfigLoader_platform.h
|
||||||
|
src/core/ConfigLoader_default.h
|
||||||
src/core/Controller.h
|
src/core/Controller.h
|
||||||
src/interfaces/IJobResultListener.h
|
src/interfaces/IJobResultListener.h
|
||||||
src/interfaces/IThread.h
|
src/interfaces/IThread.h
|
||||||
|
@ -154,7 +156,6 @@ set(SOURCES_CRYPTO
|
||||||
src/crypto/c_blake256.c
|
src/crypto/c_blake256.c
|
||||||
src/crypto/c_jh.c
|
src/crypto/c_jh.c
|
||||||
src/crypto/c_skein.c
|
src/crypto/c_skein.c
|
||||||
src/crypto/CryptonightR_gen.cpp
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -249,6 +250,10 @@ if (NOT WITH_CN_PICO)
|
||||||
add_definitions(/DXMRIG_NO_CN_PICO)
|
add_definitions(/DXMRIG_NO_CN_PICO)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WITH_EMBEDDED_CONFIG)
|
||||||
|
add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (WITH_HTTPD)
|
if (WITH_HTTPD)
|
||||||
find_package(MHD)
|
find_package(MHD)
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||||
set(XMRIG_ASM_FILES
|
set(XMRIG_ASM_FILES
|
||||||
"src/crypto/asm/win64/cn_main_loop.S"
|
"src/crypto/asm/win64/cn_main_loop.S"
|
||||||
"src/crypto/asm/win64/CryptonightR_template.S"
|
"src/crypto/asm/CryptonightR_template.S"
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
set(XMRIG_ASM_FILES
|
set(XMRIG_ASM_FILES
|
||||||
|
@ -36,7 +36,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
|
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
|
||||||
set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp)
|
set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp src/crypto/CryptonightR_gen.cpp)
|
||||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||||
else()
|
else()
|
||||||
set(XMRIG_ASM_SOURCES "")
|
set(XMRIG_ASM_SOURCES "")
|
||||||
|
|
|
@ -47,3 +47,47 @@ const char *xmrig::Json::getString(const rapidjson::Value &obj, const char *key,
|
||||||
|
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int xmrig::Json::getInt(const rapidjson::Value &obj, const char *key, int defaultValue)
|
||||||
|
{
|
||||||
|
auto i = obj.FindMember(key);
|
||||||
|
if (i != obj.MemberEnd() && i->value.IsInt()) {
|
||||||
|
return i->value.GetInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int64_t xmrig::Json::getInt64(const rapidjson::Value &obj, const char *key, int64_t defaultValue)
|
||||||
|
{
|
||||||
|
auto i = obj.FindMember(key);
|
||||||
|
if (i != obj.MemberEnd() && i->value.IsInt64()) {
|
||||||
|
return i->value.GetInt64();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t xmrig::Json::getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue)
|
||||||
|
{
|
||||||
|
auto i = obj.FindMember(key);
|
||||||
|
if (i != obj.MemberEnd() && i->value.IsUint64()) {
|
||||||
|
return i->value.GetUint64();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
unsigned xmrig::Json::getUint(const rapidjson::Value &obj, const char *key, unsigned defaultValue)
|
||||||
|
{
|
||||||
|
auto i = obj.FindMember(key);
|
||||||
|
if (i != obj.MemberEnd() && i->value.IsUint()) {
|
||||||
|
return i->value.GetUint();
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
|
@ -36,7 +36,11 @@ class Json
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static bool getBool(const rapidjson::Value &obj, const char *key, bool defaultValue = false);
|
static bool getBool(const rapidjson::Value &obj, const char *key, bool defaultValue = false);
|
||||||
static const char *getString(const rapidjson::Value &obj, const char *key, const char *defaultValue = nullptr);
|
static const char *getString(const rapidjson::Value &obj, const char *key, const char *defaultValue = nullptr);
|
||||||
|
static int getInt(const rapidjson::Value &obj, const char *key, int defaultValue = 0);
|
||||||
|
static int64_t getInt64(const rapidjson::Value &obj, const char *key, int64_t defaultValue = 0);
|
||||||
|
static uint64_t getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue = 0);
|
||||||
|
static unsigned getUint(const rapidjson::Value &obj, const char *key, unsigned defaultValue = 0);
|
||||||
|
|
||||||
static bool get(const char *fileName, rapidjson::Document &doc);
|
static bool get(const char *fileName, rapidjson::Document &doc);
|
||||||
static bool save(const char *fileName, const rapidjson::Document &doc);
|
static bool save(const char *fileName, const rapidjson::Document &doc);
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
#include <uv.h>
|
#include <uv.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,8 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace xmrig {
|
||||||
|
|
||||||
static const char *kEnabled = "enabled";
|
static const char *kEnabled = "enabled";
|
||||||
static const char *kFingerprint = "tls-fingerprint";
|
static const char *kFingerprint = "tls-fingerprint";
|
||||||
static const char *kKeepalive = "keepalive";
|
static const char *kKeepalive = "keepalive";
|
||||||
|
@ -56,6 +58,8 @@ static const char *kUrl = "url";
|
||||||
static const char *kUser = "user";
|
static const char *kUser = "user";
|
||||||
static const char *kVariant = "variant";
|
static const char *kVariant = "variant";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
xmrig::Pool::Pool() :
|
xmrig::Pool::Pool() :
|
||||||
m_enabled(true),
|
m_enabled(true),
|
||||||
|
@ -128,6 +132,7 @@ xmrig::Pool::Pool(const rapidjson::Value &object) :
|
||||||
|
|
||||||
|
|
||||||
xmrig::Pool::Pool(const char *host, uint16_t port, const char *user, const char *password, int keepAlive, bool nicehash, bool tls) :
|
xmrig::Pool::Pool(const char *host, uint16_t port, const char *user, const char *password, int keepAlive, bool nicehash, bool tls) :
|
||||||
|
m_enabled(true),
|
||||||
m_nicehash(nicehash),
|
m_nicehash(nicehash),
|
||||||
m_tls(tls),
|
m_tls(tls),
|
||||||
m_keepAlive(keepAlive),
|
m_keepAlive(keepAlive),
|
||||||
|
@ -492,6 +497,9 @@ void xmrig::Pool::rebuild()
|
||||||
addVariant(VARIANT_XAO);
|
addVariant(VARIANT_XAO);
|
||||||
addVariant(VARIANT_RTO);
|
addVariant(VARIANT_RTO);
|
||||||
addVariant(VARIANT_GPU);
|
addVariant(VARIANT_GPU);
|
||||||
|
addVariant(VARIANT_RWZ);
|
||||||
|
addVariant(VARIANT_ZLS);
|
||||||
|
addVariant(VARIANT_DOUBLE);
|
||||||
addVariant(VARIANT_AUTO);
|
addVariant(VARIANT_AUTO);
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,11 @@
|
||||||
#include "rapidjson/fwd.h"
|
#include "rapidjson/fwd.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_FEATURE_EMBEDDED_CONFIG
|
||||||
|
# include "core/ConfigLoader_default.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
xmrig::ConfigWatcher *xmrig::ConfigLoader::m_watcher = nullptr;
|
xmrig::ConfigWatcher *xmrig::ConfigLoader::m_watcher = nullptr;
|
||||||
xmrig::IConfigCreator *xmrig::ConfigLoader::m_creator = nullptr;
|
xmrig::IConfigCreator *xmrig::ConfigLoader::m_creator = nullptr;
|
||||||
xmrig::IConfigListener *xmrig::ConfigLoader::m_listener = nullptr;
|
xmrig::IConfigListener *xmrig::ConfigLoader::m_listener = nullptr;
|
||||||
|
@ -180,6 +185,15 @@ xmrig::IConfig *xmrig::ConfigLoader::load(Process *process, IConfigCreator *crea
|
||||||
loadFromFile(config, process->location(Process::ExeLocation, "config.json"));
|
loadFromFile(config, process->location(Process::ExeLocation, "config.json"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifdef XMRIG_FEATURE_EMBEDDED_CONFIG
|
||||||
|
if (!config->finalize()) {
|
||||||
|
delete config;
|
||||||
|
|
||||||
|
config = m_creator->create();
|
||||||
|
loadFromJSON(config, default_config);
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
if (!config->finalize()) {
|
if (!config->finalize()) {
|
||||||
if (!config->algorithm().isValid()) {
|
if (!config->algorithm().isValid()) {
|
||||||
fprintf(stderr, "No valid algorithm specified. Exiting.\n");
|
fprintf(stderr, "No valid algorithm specified. Exiting.\n");
|
||||||
|
|
|
@ -36,6 +36,10 @@
|
||||||
# define bit_AES (1 << 25)
|
# define bit_AES (1 << 25)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef bit_OSXSAVE
|
||||||
|
# define bit_OSXSAVE (1 << 27)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef bit_AVX2
|
#ifndef bit_AVX2
|
||||||
# define bit_AVX2 (1 << 5)
|
# define bit_AVX2 (1 << 5)
|
||||||
#endif
|
#endif
|
||||||
|
@ -107,10 +111,19 @@ static inline bool has_avx2()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline bool has_ossave()
|
||||||
|
{
|
||||||
|
int32_t cpu_info[4] = { 0 };
|
||||||
|
cpuid(PROCESSOR_INFO, cpu_info);
|
||||||
|
|
||||||
|
return (cpu_info[ECX_Reg] & bit_OSXSAVE) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
xmrig::BasicCpuInfo::BasicCpuInfo() :
|
xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
m_assembly(ASM_NONE),
|
m_assembly(ASM_NONE),
|
||||||
m_aes(has_aes_ni()),
|
m_aes(has_aes_ni()),
|
||||||
m_avx2(has_avx2()),
|
m_avx2(has_avx2() && has_ossave()),
|
||||||
m_brand(),
|
m_brand(),
|
||||||
m_threads(std::thread::hardware_concurrency())
|
m_threads(std::thread::hardware_concurrency())
|
||||||
{
|
{
|
||||||
|
|
|
@ -54,18 +54,21 @@ struct AlgoData
|
||||||
|
|
||||||
|
|
||||||
static AlgoData const algorithms[] = {
|
static AlgoData const algorithms[] = {
|
||||||
{ "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO },
|
{ "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO },
|
||||||
{ "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 },
|
{ "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 },
|
||||||
{ "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
|
{ "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
|
||||||
{ "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL },
|
{ "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL },
|
||||||
{ "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR },
|
{ "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR },
|
||||||
{ "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO },
|
{ "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO },
|
||||||
{ "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO },
|
{ "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO },
|
||||||
{ "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
|
{ "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
|
||||||
{ "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
|
{ "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
|
||||||
{ "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
|
{ "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF },
|
||||||
{ "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW },
|
{ "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW },
|
||||||
{ "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 },
|
{ "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 },
|
||||||
|
{ "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ },
|
||||||
|
{ "cryptonight/zls", "cn/zls", xmrig::CRYPTONIGHT, xmrig::VARIANT_ZLS },
|
||||||
|
{ "cryptonight/double", "cn/double", xmrig::CRYPTONIGHT, xmrig::VARIANT_DOUBLE },
|
||||||
|
|
||||||
# ifndef XMRIG_NO_AEON
|
# ifndef XMRIG_NO_AEON
|
||||||
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
|
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
|
||||||
|
@ -133,6 +136,9 @@ static const char *variants[] = {
|
||||||
"gpu",
|
"gpu",
|
||||||
"wow",
|
"wow",
|
||||||
"r",
|
"r",
|
||||||
|
"rwz",
|
||||||
|
"zls",
|
||||||
|
"double"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -137,6 +137,12 @@ bool xmrig::Job::setBlob(const char *blob)
|
||||||
else if (m_algorithm.variant() == VARIANT_WOW && m_blob[0] < 11) {
|
else if (m_algorithm.variant() == VARIANT_WOW && m_blob[0] < 11) {
|
||||||
m_algorithm.setVariant(VARIANT_2);
|
m_algorithm.setVariant(VARIANT_2);
|
||||||
}
|
}
|
||||||
|
else if (m_algorithm.variant() == VARIANT_RWZ && m_blob[0] < 12) {
|
||||||
|
m_algorithm.setVariant(VARIANT_2);
|
||||||
|
}
|
||||||
|
else if (m_algorithm.variant() == VARIANT_ZLS && m_blob[0] < 8) {
|
||||||
|
m_algorithm.setVariant(VARIANT_2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifdef XMRIG_PROXY_PROJECT
|
# ifdef XMRIG_PROXY_PROJECT
|
||||||
|
|
|
@ -61,21 +61,24 @@ enum AlgoVariant {
|
||||||
|
|
||||||
|
|
||||||
enum Variant {
|
enum Variant {
|
||||||
VARIANT_AUTO = -1, // Autodetect
|
VARIANT_AUTO = -1, // Autodetect
|
||||||
VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy
|
VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy
|
||||||
VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7
|
VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7
|
||||||
VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only)
|
VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only)
|
||||||
VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only)
|
VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only)
|
||||||
VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only)
|
VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only)
|
||||||
VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only)
|
VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only)
|
||||||
VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only)
|
VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only)
|
||||||
VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only)
|
VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only)
|
||||||
VARIANT_2 = 8, // CryptoNight variant 2
|
VARIANT_2 = 8, // CryptoNight variant 2
|
||||||
VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite)
|
VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite)
|
||||||
VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL)
|
VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL)
|
||||||
VARIANT_GPU = 11, // CryptoNight-GPU (Ryo)
|
VARIANT_GPU = 11, // CryptoNight-GPU (Ryo)
|
||||||
VARIANT_WOW = 12, // CryptoNightR (Wownero)
|
VARIANT_WOW = 12, // CryptoNightR (Wownero)
|
||||||
VARIANT_4 = 13, // CryptoNightR (Monero's variant 4)
|
VARIANT_4 = 13, // CryptoNightR (Monero's variant 4)
|
||||||
|
VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft)
|
||||||
|
VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius)
|
||||||
|
VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH)
|
||||||
VARIANT_MAX
|
VARIANT_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -39,5 +39,5 @@
|
||||||
"safe": false,
|
"safe": false,
|
||||||
"threads": null,
|
"threads": null,
|
||||||
"user-agent": null,
|
"user-agent": null,
|
||||||
"watch": false
|
"watch": true
|
||||||
}
|
}
|
84
src/core/ConfigLoader_default.h
Normal file
84
src/core/ConfigLoader_default.h
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_CONFIGLOADER_DEFAULT_H
|
||||||
|
#define XMRIG_CONFIGLOADER_DEFAULT_H
|
||||||
|
|
||||||
|
|
||||||
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_FEATURE_EMBEDDED_CONFIG
|
||||||
|
const static char *default_config =
|
||||||
|
R"===(
|
||||||
|
{
|
||||||
|
"algo": "cryptonight",
|
||||||
|
"api": {
|
||||||
|
"port": 0,
|
||||||
|
"access-token": null,
|
||||||
|
"id": null,
|
||||||
|
"worker-id": null,
|
||||||
|
"ipv6": false,
|
||||||
|
"restricted": true
|
||||||
|
},
|
||||||
|
"asm": true,
|
||||||
|
"autosave": true,
|
||||||
|
"av": 0,
|
||||||
|
"background": false,
|
||||||
|
"colors": true,
|
||||||
|
"cpu-affinity": null,
|
||||||
|
"cpu-priority": null,
|
||||||
|
"donate-level": 5,
|
||||||
|
"huge-pages": true,
|
||||||
|
"hw-aes": null,
|
||||||
|
"log-file": null,
|
||||||
|
"max-cpu-usage": 100,
|
||||||
|
"pools": [
|
||||||
|
{
|
||||||
|
"url": "donate.v2.xmrig.com:3333",
|
||||||
|
"user": "YOUR_WALLET_ADDRESS",
|
||||||
|
"pass": "x",
|
||||||
|
"rig-id": null,
|
||||||
|
"nicehash": false,
|
||||||
|
"keepalive": false,
|
||||||
|
"variant": -1,
|
||||||
|
"tls": false,
|
||||||
|
"tls-fingerprint": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"print-time": 60,
|
||||||
|
"retries": 5,
|
||||||
|
"retry-pause": 5,
|
||||||
|
"safe": false,
|
||||||
|
"threads": null,
|
||||||
|
"user-agent": null,
|
||||||
|
"watch": false
|
||||||
|
}
|
||||||
|
)===";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
} /* namespace xmrig */
|
||||||
|
|
||||||
|
#endif /* XMRIG_CONFIGLOADER_DEFAULT_H */
|
|
@ -85,7 +85,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_avx2 = data.flags[CPU_FEATURE_AVX2];
|
m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -436,7 +436,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
||||||
uint64_t* mem_out = (uint64_t*)&l[idx];
|
uint64_t* mem_out = (uint64_t*)&l[idx];
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx);
|
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
||||||
} else {
|
} else {
|
||||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||||
|
@ -530,9 +530,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -709,9 +709,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -767,9 +767,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
|
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
|
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,9 @@ constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0;
|
||||||
constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000;
|
constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000;
|
||||||
constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000;
|
constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000;
|
||||||
constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000;
|
constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000;
|
||||||
|
constexpr const uint32_t CRYPTONIGHT_DOUBLE_ITER = 0x100000;
|
||||||
|
constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000;
|
||||||
|
constexpr const uint32_t CRYPTONIGHT_ZLS_ITER = 0x60000;
|
||||||
|
|
||||||
constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000;
|
constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000;
|
||||||
constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0;
|
constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0;
|
||||||
|
@ -134,6 +137,9 @@ template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>()
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RTO>() { return CRYPTONIGHT_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RTO>() { return CRYPTONIGHT_ITER; }
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_GPU>() { return CRYPTONIGHT_GPU_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_GPU>() { return CRYPTONIGHT_GPU_ITER; }
|
||||||
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RWZ>() { return CRYPTONIGHT_WALTZ_ITER; }
|
||||||
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_ZLS>() { return CRYPTONIGHT_ZLS_ITER; }
|
||||||
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_DOUBLE>() { return CRYPTONIGHT_DOUBLE_ITER; }
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_0>() { return CRYPTONIGHT_LITE_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_0>() { return CRYPTONIGHT_LITE_ITER; }
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_1>() { return CRYPTONIGHT_LITE_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_1>() { return CRYPTONIGHT_LITE_ITER; }
|
||||||
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_0>() { return CRYPTONIGHT_HEAVY_ITER; }
|
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_0>() { return CRYPTONIGHT_HEAVY_ITER; }
|
||||||
|
@ -153,11 +159,16 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
|
||||||
return CRYPTONIGHT_GPU_ITER;
|
return CRYPTONIGHT_GPU_ITER;
|
||||||
|
|
||||||
case VARIANT_RTO:
|
case VARIANT_RTO:
|
||||||
|
case VARIANT_DOUBLE:
|
||||||
return CRYPTONIGHT_XAO_ITER;
|
return CRYPTONIGHT_XAO_ITER;
|
||||||
|
|
||||||
case VARIANT_TRTL:
|
case VARIANT_TRTL:
|
||||||
return CRYPTONIGHT_TRTL_ITER;
|
return CRYPTONIGHT_TRTL_ITER;
|
||||||
|
|
||||||
|
case VARIANT_RWZ:
|
||||||
|
case VARIANT_ZLS:
|
||||||
|
return CRYPTONIGHT_WALTZ_ITER;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -184,26 +195,29 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<Variant variant> inline constexpr Variant cn_base_variant() { return VARIANT_0; }
|
template<Variant variant> inline constexpr Variant cn_base_variant() { return VARIANT_0; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_0>() { return VARIANT_0; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_0>() { return VARIANT_0; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_1>() { return VARIANT_1; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_1>() { return VARIANT_1; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_TUBE>() { return VARIANT_1; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_TUBE>() { return VARIANT_1; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_XTL>() { return VARIANT_1; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_XTL>() { return VARIANT_1; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_MSR>() { return VARIANT_1; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_MSR>() { return VARIANT_1; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_XHV>() { return VARIANT_0; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_XHV>() { return VARIANT_0; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_XAO>() { return VARIANT_0; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_XAO>() { return VARIANT_0; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_RTO>() { return VARIANT_1; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_RTO>() { return VARIANT_1; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_2>() { return VARIANT_2; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_2>() { return VARIANT_2; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_HALF>() { return VARIANT_2; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_HALF>() { return VARIANT_2; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_TRTL>() { return VARIANT_2; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_TRTL>() { return VARIANT_2; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_GPU>() { return VARIANT_GPU; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_GPU>() { return VARIANT_GPU; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_WOW>() { return VARIANT_2; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_WOW>() { return VARIANT_2; }
|
||||||
template<> inline constexpr Variant cn_base_variant<VARIANT_4>() { return VARIANT_2; }
|
template<> inline constexpr Variant cn_base_variant<VARIANT_4>() { return VARIANT_2; }
|
||||||
|
template<> inline constexpr Variant cn_base_variant<VARIANT_RWZ>() { return VARIANT_2; }
|
||||||
|
template<> inline constexpr Variant cn_base_variant<VARIANT_ZLS>() { return VARIANT_2; }
|
||||||
|
template<> inline constexpr Variant cn_base_variant<VARIANT_DOUBLE>() { return VARIANT_2; }
|
||||||
|
|
||||||
|
|
||||||
template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }
|
template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }
|
||||||
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_WOW>() { return true; }
|
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_WOW>() { return true; }
|
||||||
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_4>() { return true; }
|
template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_4>() { return true; }
|
||||||
|
|
||||||
} /* namespace xmrig */
|
} /* namespace xmrig */
|
||||||
|
|
||||||
|
|
|
@ -83,11 +83,11 @@
|
||||||
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
|
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
|
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
|
||||||
do { \
|
do { \
|
||||||
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
|
||||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||||
|
@ -96,15 +96,20 @@
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
|
||||||
do { \
|
do { \
|
||||||
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
||||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||||
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
||||||
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
||||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
if (reverse) { \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \
|
||||||
|
} else { \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||||
|
} \
|
||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
@ -128,11 +133,11 @@
|
||||||
sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
|
sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
|
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
|
||||||
do { \
|
do { \
|
||||||
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \
|
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
|
||||||
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
|
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
|
||||||
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
|
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
||||||
|
@ -141,15 +146,20 @@
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
|
||||||
do { \
|
do { \
|
||||||
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
|
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
|
||||||
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
|
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
|
||||||
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
||||||
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
||||||
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
|
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
if (reverse) { \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \
|
||||||
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \
|
||||||
|
} else { \
|
||||||
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
|
||||||
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
|
||||||
|
} \
|
||||||
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -58,27 +58,58 @@ const static uint8_t test_input[380] = {
|
||||||
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
|
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
|
||||||
};
|
};
|
||||||
|
|
||||||
const static char* test_input_WOW = R"===(9d47bf4c41b7e8e727e681715acb47fa1677cdba9ca7bcb05ad8cc8abd5daa66 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260
|
|
||||||
0d4a495cb844a3ca8ba4edb8e6bcf829ef1c06d9cdea2b62ca46c2a21b8b0a79 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261
|
|
||||||
a1d6d848b5c5915fccd2f64cf216c6b1a02cf7c77bc80d8d4e51b419e88ff0dd 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262
|
|
||||||
af3a8544a0221a148c2ac90484b19861e3afca33fe17021efb8ad6496b567915 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263
|
|
||||||
313399e0963ae8a99dab8af66d343e097dae0c0feb08dbc43ccdafef5515f413 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264
|
|
||||||
6021c6ef90bff9ae94a7506d623d3a7a86c1756d655f50dd558f716d64622a34 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265
|
|
||||||
2b13000535f3db5f9b9b84a65c4351f386cd2cdedebb8c3ad2eab086e6a3fee5 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266
|
|
||||||
fc0e1dad8e895749dc90eb690bc1ba059a1cd772afaaf65a106bf9e5e6b80503 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267
|
|
||||||
b60b0afe144deff7d903ed2d5545e77ebe66a3c51fee7016eeb8fee9eb630c0f 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268
|
|
||||||
64774b27e7d5fec862fc4c0c13ac6bf09123b6f05bb0e4b75c97f379a2b3a679 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269)===";
|
|
||||||
|
|
||||||
const static char* test_input_R = R"===(f759588ad57e758467295443a9bd71490abff8e9dad1b95b6bf2f5d0d78387bc 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260
|
struct cn_r_test_input_data
|
||||||
5bb833deca2bdd7252a9ccd7b4ce0b6a4854515794b56c207262f7a5b9bdb566 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261
|
{
|
||||||
1ee6728da60fbd8d7d55b2b1ade487a3cf52a2c3ac6f520db12c27d8921f6cab 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262
|
uint64_t height;
|
||||||
6969fe2ddfb758438d48049f302fc2108a4fcc93e37669170e6db4b0b9b4c4cb 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263
|
size_t size;
|
||||||
7f3048b4e90d0cbe7a57c0394f37338a01fae3adfdc0e5126d863a895eb04e02 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264
|
uint8_t data[64];
|
||||||
1d290443a4b542af04a82f6b2494a6ee7f20f2754c58e0849032483a56e8e2ef 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265
|
};
|
||||||
c43cc6567436a86afbd6aa9eaa7c276e9806830334b614b2bee23cc76634f6fd 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266
|
|
||||||
87be2479c0c4e8edfdfaa5603e93f4265b3f8224c1c5946feb424819d18990a4 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267
|
|
||||||
dd9d6a6d8e47465cceac0877ef889b93e7eba979557e3935d7f86dce11b070f3 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268
|
const static cn_r_test_input_data cn_r_test_input[] = {
|
||||||
75c6f2ae49a20521de97285b431e717125847fb8935ed84a61e7f8d36a2c3d8e 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269)===";
|
{ 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
|
||||||
|
{ 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
|
||||||
|
{ 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
|
||||||
|
{ 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
|
||||||
|
{ 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
|
||||||
|
{ 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
|
||||||
|
{ 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
|
||||||
|
{ 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
|
||||||
|
{ 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
|
||||||
|
{ 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// "cn/wow"
|
||||||
|
const static uint8_t test_output_wow[] = {
|
||||||
|
0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66,
|
||||||
|
0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79,
|
||||||
|
0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd,
|
||||||
|
0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15,
|
||||||
|
0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13,
|
||||||
|
0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34,
|
||||||
|
0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5,
|
||||||
|
0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03,
|
||||||
|
0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f,
|
||||||
|
0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// "cn/r"
|
||||||
|
const static uint8_t test_output_r[] = {
|
||||||
|
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
|
||||||
|
0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
|
||||||
|
0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
|
||||||
|
0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
|
||||||
|
0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
|
||||||
|
0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
|
||||||
|
0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
|
||||||
|
0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
|
||||||
|
0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
|
||||||
|
0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// "cn/0"
|
// "cn/0"
|
||||||
const static uint8_t test_output_v0[160] = {
|
const static uint8_t test_output_v0[160] = {
|
||||||
|
@ -199,6 +230,47 @@ const static uint8_t test_output_rto[160] = {
|
||||||
0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3
|
0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// "cn/rwz"
|
||||||
|
const static uint8_t test_output_rwz[160] = {
|
||||||
|
0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
|
||||||
|
0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
|
||||||
|
0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
|
||||||
|
0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75,
|
||||||
|
0x13, 0x21, 0x1d, 0xce, 0x61, 0x5a, 0xdc, 0x5f, 0x8c, 0xcb, 0x1f, 0x6f, 0xbb, 0x92, 0x88, 0xc3,
|
||||||
|
0xe3, 0xe2, 0xfc, 0x4f, 0x62, 0xfb, 0xf0, 0x48, 0x02, 0x01, 0xd3, 0xbe, 0x77, 0x6a, 0x40, 0xca,
|
||||||
|
0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0,
|
||||||
|
0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87,
|
||||||
|
0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5,
|
||||||
|
0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f
|
||||||
|
};
|
||||||
|
|
||||||
|
// "cn/zls"
|
||||||
|
const static uint8_t test_output_zls[160] = {
|
||||||
|
0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
|
||||||
|
0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2,
|
||||||
|
0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80,
|
||||||
|
0x17, 0xB5, 0x1B, 0x25, 0x31, 0x39, 0x87, 0xD2, 0x2D, 0x6A, 0x9D, 0x1C, 0x74, 0xF4, 0x43, 0x22,
|
||||||
|
0x4B, 0x97, 0x1F, 0x6A, 0xD0, 0xBE, 0x00, 0x74, 0xEC, 0xC5, 0xD8, 0x3B, 0xE6, 0xF4, 0x03, 0x8A,
|
||||||
|
0x7B, 0xBA, 0x80, 0xCC, 0x9F, 0x00, 0xCB, 0xC2, 0x14, 0x8F, 0xF3, 0xD8, 0x92, 0x73, 0xBF, 0x17,
|
||||||
|
0x3D, 0x9B, 0x22, 0xA3, 0x61, 0x94, 0x41, 0x9E, 0xF9, 0x68, 0x1D, 0x42, 0x48, 0x3B, 0x39, 0x45,
|
||||||
|
0xE2, 0xE6, 0x16, 0x84, 0xFC, 0x21, 0xE6, 0xDA, 0x38, 0x7F, 0x17, 0xAB, 0xD3, 0xF2, 0xCE, 0x1A,
|
||||||
|
0x2F, 0x35, 0xD5, 0x74, 0xFA, 0x45, 0x3B, 0x06, 0xD1, 0x4E, 0x84, 0x3A, 0x5D, 0xE3, 0x0E, 0xA5,
|
||||||
|
0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F
|
||||||
|
};
|
||||||
|
|
||||||
|
// "cn/double"
|
||||||
|
const static uint8_t test_output_double[160] = {
|
||||||
|
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
|
||||||
|
0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21,
|
||||||
|
0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8,
|
||||||
|
0xEB, 0x3A, 0xE1, 0xDC, 0x91, 0xF7, 0x62, 0x4B, 0x9F, 0x48, 0xE6, 0x92, 0xBD, 0xE4, 0x5D, 0xC1,
|
||||||
|
0xF1, 0x3C, 0x63, 0x1D, 0xEB, 0x0B, 0x04, 0xA3, 0x30, 0xD5, 0x11, 0x15, 0x4C, 0xCE, 0xEF, 0x4F,
|
||||||
|
0xDF, 0x69, 0xE3, 0x9E, 0xD2, 0x68, 0xFC, 0x1B, 0x6F, 0xE8, 0x08, 0x9C, 0xBB, 0xA5, 0x2B, 0x60,
|
||||||
|
0x52, 0x0F, 0xE5, 0xD2, 0xF3, 0x8A, 0xB3, 0xE1, 0x76, 0x7F, 0x44, 0x25, 0x76, 0xEC, 0xFF, 0xA2,
|
||||||
|
0x0C, 0x64, 0xD0, 0x0E, 0x32, 0x33, 0x28, 0x20, 0x73, 0xE0, 0x31, 0x66, 0x4E, 0x54, 0x83, 0x49,
|
||||||
|
0x51, 0x55, 0x4D, 0x2E, 0x22, 0xB7, 0x51, 0x09, 0x73, 0x61, 0x7E, 0x6A, 0x57, 0x0B, 0x28, 0x3C,
|
||||||
|
0x5E, 0x2E, 0xC1, 0x80, 0x89, 0x39, 0xB3, 0x54, 0x39, 0x52, 0x0E, 0x69, 0x3D, 0xF6, 0xC5, 0x4A
|
||||||
|
};
|
||||||
|
|
||||||
#ifndef XMRIG_NO_AEON
|
#ifndef XMRIG_NO_AEON
|
||||||
// "cn-lite/0"
|
// "cn-lite/0"
|
||||||
|
@ -295,8 +367,6 @@ const static uint8_t test_output_pico_trtl[160] = {
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned char hf_hex2bin(char c, bool &err);
|
|
||||||
char hf_bin2hex(unsigned char c);
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_CN_GPU
|
#ifndef XMRIG_NO_CN_GPU
|
||||||
// "cn/gpu"
|
// "cn/gpu"
|
||||||
|
|
|
@ -531,7 +531,7 @@ template<xmrig::Variant VARIANT, xmrig::Variant BASE>
|
||||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
|
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
|
||||||
{
|
{
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx);
|
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
||||||
} else {
|
} else {
|
||||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||||
|
@ -653,9 +653,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo);
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -754,6 +754,8 @@ extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
|
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
|
||||||
|
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx);
|
||||||
|
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
|
||||||
|
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm;
|
||||||
|
@ -765,6 +767,16 @@ extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
|
extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
|
||||||
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
|
extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm;
|
||||||
|
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm;
|
||||||
|
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
|
||||||
|
extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm;
|
||||||
|
|
||||||
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
||||||
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
||||||
void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
|
||||||
|
@ -843,6 +855,31 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||||
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
|
cn_trtl_mainloop_bulldozer_asm(ctx[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_RWZ) {
|
||||||
|
cnv2_rwz_mainloop_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_ZLS) {
|
||||||
|
if (ASM == xmrig::ASM_INTEL) {
|
||||||
|
cn_zls_mainloop_ivybridge_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else if (ASM == xmrig::ASM_RYZEN) {
|
||||||
|
cn_zls_mainloop_ryzen_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cn_zls_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_DOUBLE) {
|
||||||
|
if (ASM == xmrig::ASM_INTEL) {
|
||||||
|
cn_double_mainloop_ivybridge_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else if (ASM == xmrig::ASM_RYZEN) {
|
||||||
|
cn_double_mainloop_ryzen_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cn_double_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
|
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
|
||||||
ctx[0]->generated_code(ctx[0]);
|
ctx[0]->generated_code(ctx[0]);
|
||||||
}
|
}
|
||||||
|
@ -881,6 +918,15 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||||
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
else if (VARIANT == xmrig::VARIANT_TRTL) {
|
||||||
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||||
}
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_RWZ) {
|
||||||
|
cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]);
|
||||||
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_ZLS) {
|
||||||
|
cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||||
|
}
|
||||||
|
else if (VARIANT == xmrig::VARIANT_DOUBLE) {
|
||||||
|
cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||||
|
}
|
||||||
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
|
else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
|
||||||
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||||
}
|
}
|
||||||
|
@ -995,9 +1041,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
|
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo);
|
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1051,9 +1097,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
|
|
||||||
if (BASE == xmrig::VARIANT_2) {
|
if (BASE == xmrig::VARIANT_2) {
|
||||||
if (VARIANT == xmrig::VARIANT_4) {
|
if (VARIANT == xmrig::VARIANT_4) {
|
||||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
|
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||||
} else {
|
} else {
|
||||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo);
|
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1155,9 +1201,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
lo = __umul128(idx, cl##part, &hi); \
|
lo = __umul128(idx, cl##part, &hi); \
|
||||||
if (BASE == xmrig::VARIANT_2) { \
|
if (BASE == xmrig::VARIANT_2) { \
|
||||||
if (VARIANT == xmrig::VARIANT_4) { \
|
if (VARIANT == xmrig::VARIANT_4) { \
|
||||||
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c); \
|
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
|
||||||
} else { \
|
} else { \
|
||||||
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \
|
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
if (VARIANT == xmrig::VARIANT_4) { \
|
if (VARIANT == xmrig::VARIANT_4) { \
|
||||||
|
|
|
@ -31,7 +31,6 @@ typedef void(*void_func)();
|
||||||
#include "crypto/asm/CryptonightR_template.h"
|
#include "crypto/asm/CryptonightR_template.h"
|
||||||
#include "Mem.h"
|
#include "Mem.h"
|
||||||
|
|
||||||
#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM
|
|
||||||
|
|
||||||
static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
|
static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
|
||||||
{
|
{
|
||||||
|
@ -186,5 +185,3 @@ void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* m
|
||||||
|
|
||||||
Mem::flushInstructionCache(machine_code, p - p0);
|
Mem::flushInstructionCache(machine_code, p - p0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -70,29 +70,30 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movq r12, xmm5
|
movq r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ FN_PREFIX(CryptonightR_template_mainloop):
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_part2):
|
FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ FN_PREFIX(CryptonightR_template_part2):
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
|
@ -70,29 +70,30 @@ CryptonightR_template_mainloop:
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movq r12, xmm5
|
movq r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ CryptonightR_template_mainloop:
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
CryptonightR_template_part2:
|
CryptonightR_template_part2:
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ CryptonightR_template_part2:
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ CryptonightR_template_part2:
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
410
src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc
Normal file
410
src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc
Normal file
|
@ -0,0 +1,410 @@
|
||||||
|
mov rax, rsp
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 184
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+272]
|
||||||
|
mov DWORD PTR [rsp+276], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+276]
|
||||||
|
|
||||||
|
mov r13, QWORD PTR [rcx+224]
|
||||||
|
mov r9, rdx
|
||||||
|
mov r10, QWORD PTR [rcx+32]
|
||||||
|
mov r8, rcx
|
||||||
|
xor r10, QWORD PTR [rcx]
|
||||||
|
mov r14d, 393216
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rsi, QWORD PTR [rdx+224]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov rdi, QWORD PTR [r9+32]
|
||||||
|
xor rdi, QWORD PTR [r9]
|
||||||
|
mov rbp, QWORD PTR [r9+40]
|
||||||
|
xor rbp, QWORD PTR [r9+8]
|
||||||
|
movq xmm0, rdx
|
||||||
|
movaps XMMWORD PTR [rax-88], xmm6
|
||||||
|
movaps XMMWORD PTR [rax-104], xmm7
|
||||||
|
movaps XMMWORD PTR [rax-120], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm13
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm14
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm15
|
||||||
|
mov rdx, r10
|
||||||
|
movq xmm4, QWORD PTR [r8+96]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xorps xmm13, xmm13
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r8+72]
|
||||||
|
movq xmm5, QWORD PTR [r8+104]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm14, rax
|
||||||
|
punpcklqdq xmm14, xmm14
|
||||||
|
|
||||||
|
mov eax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm12, rax
|
||||||
|
punpcklqdq xmm12, xmm12
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [r8+80]
|
||||||
|
xor rax, QWORD PTR [r8+64]
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r9+56]
|
||||||
|
xor rcx, QWORD PTR [r9+24]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [r9+48]
|
||||||
|
xor rax, QWORD PTR [r9+16]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp], r13
|
||||||
|
mov rcx, QWORD PTR [r9+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm6, rax
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp+256], r10
|
||||||
|
mov rcx, rdi
|
||||||
|
mov QWORD PTR [rsp+264], r11
|
||||||
|
movq xmm8, rax
|
||||||
|
and ecx, 2097136
|
||||||
|
punpcklqdq xmm8, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+104]
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
movdqu xmm11, XMMWORD PTR [r8]
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
rwz_main_loop_double:
|
||||||
|
movdqu xmm9, xmm15
|
||||||
|
mov eax, edx
|
||||||
|
mov ebx, edx
|
||||||
|
xor eax, 16
|
||||||
|
xor ebx, 32
|
||||||
|
xor edx, 48
|
||||||
|
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm2, r10
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
aesenc xmm9, xmm2
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||||
|
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||||
|
|
||||||
|
movq r11, xmm9
|
||||||
|
mov edx, r11d
|
||||||
|
and edx, 2097136
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
pxor xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r9], xmm0
|
||||||
|
|
||||||
|
lea rbx, QWORD PTR [rdx+r13]
|
||||||
|
mov r10, QWORD PTR [rdx+r13]
|
||||||
|
|
||||||
|
movdqu xmm10, xmm11
|
||||||
|
movq xmm0, rbp
|
||||||
|
movq xmm11, rdi
|
||||||
|
punpcklqdq xmm11, xmm0
|
||||||
|
aesenc xmm10, xmm11
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
mov r12d, ecx
|
||||||
|
xor eax, 16
|
||||||
|
xor r12d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||||
|
|
||||||
|
movq rcx, xmm10
|
||||||
|
and ecx, 2097136
|
||||||
|
|
||||||
|
movdqa xmm0, xmm10
|
||||||
|
pxor xmm0, xmm6
|
||||||
|
movdqu XMMWORD PTR [r8], xmm0
|
||||||
|
mov r12, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov r9, QWORD PTR [rbx+8]
|
||||||
|
|
||||||
|
xor edx, 16
|
||||||
|
mov r8d, edx
|
||||||
|
mov r15d, edx
|
||||||
|
|
||||||
|
movq rdx, xmm5
|
||||||
|
shl rdx, 32
|
||||||
|
movq rax, xmm4
|
||||||
|
xor rdx, rax
|
||||||
|
xor r10, rdx
|
||||||
|
mov rax, r10
|
||||||
|
mul r11
|
||||||
|
mov r11d, r8d
|
||||||
|
xor r11d, 48
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdx, [r11+r13]
|
||||||
|
movq xmm1, rax
|
||||||
|
xor rax, [r11+r13+8]
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm0
|
||||||
|
xor r8d, 32
|
||||||
|
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||||
|
|
||||||
|
mov r11, QWORD PTR [rsp+256]
|
||||||
|
add r11, rdx
|
||||||
|
mov rdx, QWORD PTR [rsp+264]
|
||||||
|
add rdx, rax
|
||||||
|
mov QWORD PTR [rbx], r11
|
||||||
|
xor r11, r10
|
||||||
|
mov QWORD PTR [rbx+8], rdx
|
||||||
|
xor rdx, r9
|
||||||
|
mov QWORD PTR [rsp+256], r11
|
||||||
|
and r11d, 2097136
|
||||||
|
mov QWORD PTR [rsp+264], rdx
|
||||||
|
mov QWORD PTR [rsp+8], r11
|
||||||
|
lea r15, QWORD PTR [r11+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||||
|
lea r13, QWORD PTR [rsi+rcx]
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movaps xmm2, xmm13
|
||||||
|
movq r10, xmm0
|
||||||
|
psllq xmm5, 1
|
||||||
|
shl r10, 32
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movdqa xmm1, xmm10
|
||||||
|
movq r11, xmm0
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
psrldq xmm4, 8
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
movq rax, xmm4
|
||||||
|
xor r10, rax
|
||||||
|
movaps xmm1, xmm13
|
||||||
|
xor r10, r12
|
||||||
|
lea rax, QWORD PTR [r11+1]
|
||||||
|
shr rax, 1
|
||||||
|
movdqa xmm3, xmm9
|
||||||
|
punpcklqdq xmm3, xmm10
|
||||||
|
paddq xmm5, xmm3
|
||||||
|
movq rdx, xmm5
|
||||||
|
psrldq xmm5, 8
|
||||||
|
cvtsi2sd xmm2, rax
|
||||||
|
or edx, -2147483647
|
||||||
|
lea rax, QWORD PTR [r8+1]
|
||||||
|
shr rax, 1
|
||||||
|
movq r9, xmm5
|
||||||
|
cvtsi2sd xmm0, rax
|
||||||
|
or r9d, -2147483647
|
||||||
|
cvtsi2sd xmm1, rdx
|
||||||
|
unpcklpd xmm2, xmm0
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
cvtsi2sd xmm0, r9
|
||||||
|
unpcklpd xmm1, xmm0
|
||||||
|
divpd xmm2, xmm1
|
||||||
|
paddq xmm2, xmm14
|
||||||
|
cvttsd2si rax, xmm2
|
||||||
|
psrldq xmm2, 8
|
||||||
|
mov rbx, rax
|
||||||
|
imul rax, rdx
|
||||||
|
sub r11, rax
|
||||||
|
js rwz_div_fix_1
|
||||||
|
rwz_div_fix_1_ret:
|
||||||
|
|
||||||
|
cvttsd2si rdx, xmm2
|
||||||
|
mov rax, rdx
|
||||||
|
imul rax, r9
|
||||||
|
movd xmm2, r11d
|
||||||
|
movd xmm4, ebx
|
||||||
|
sub r8, rax
|
||||||
|
js rwz_div_fix_2
|
||||||
|
rwz_div_fix_2_ret:
|
||||||
|
|
||||||
|
movd xmm1, r8d
|
||||||
|
movd xmm0, edx
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
punpckldq xmm4, xmm0
|
||||||
|
punpckldq xmm4, xmm2
|
||||||
|
paddq xmm3, xmm4
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm12
|
||||||
|
sqrtpd xmm1, xmm0
|
||||||
|
movq r9, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
psrlq xmm5, 19
|
||||||
|
test r9, 524287
|
||||||
|
je rwz_sqrt_fix_1
|
||||||
|
rwz_sqrt_fix_1_ret:
|
||||||
|
|
||||||
|
movq r9, xmm10
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
test r8, 524287
|
||||||
|
je rwz_sqrt_fix_2
|
||||||
|
rwz_sqrt_fix_2_ret:
|
||||||
|
|
||||||
|
mov r12d, ecx
|
||||||
|
mov r8d, ecx
|
||||||
|
xor r12d, 16
|
||||||
|
xor r8d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
mov rax, r10
|
||||||
|
mul r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||||
|
xor rdx, [r8+rsi]
|
||||||
|
xor rax, [r8+rsi+8]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm3, xmm6
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [r8+rsi], xmm3
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
|
||||||
|
add rdi, rdx
|
||||||
|
mov QWORD PTR [r13], rdi
|
||||||
|
xor rdi, r10
|
||||||
|
mov ecx, edi
|
||||||
|
and ecx, 2097136
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov rdx, QWORD PTR [r13+8]
|
||||||
|
add rbp, rax
|
||||||
|
mov QWORD PTR [r13+8], rbp
|
||||||
|
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||||
|
xor rbp, rdx
|
||||||
|
mov r13, QWORD PTR [rsp]
|
||||||
|
movdqa xmm3, xmm7
|
||||||
|
mov rdx, QWORD PTR [rsp+8]
|
||||||
|
movdqa xmm8, xmm6
|
||||||
|
mov r10, QWORD PTR [rsp+256]
|
||||||
|
movdqa xmm7, xmm9
|
||||||
|
mov r11, QWORD PTR [rsp+264]
|
||||||
|
movdqa xmm6, xmm10
|
||||||
|
mov r9, r15
|
||||||
|
dec r14d
|
||||||
|
jne rwz_main_loop_double
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+272]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+184]
|
||||||
|
movaps xmm6, XMMWORD PTR [r11-24]
|
||||||
|
movaps xmm7, XMMWORD PTR [r11-40]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-56]
|
||||||
|
movaps xmm9, XMMWORD PTR [r11-72]
|
||||||
|
movaps xmm10, XMMWORD PTR [r11-88]
|
||||||
|
movaps xmm11, XMMWORD PTR [r11-104]
|
||||||
|
movaps xmm12, XMMWORD PTR [r11-120]
|
||||||
|
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||||
|
|
||||||
|
rwz_div_fix_1:
|
||||||
|
dec rbx
|
||||||
|
add r11, rdx
|
||||||
|
jmp rwz_div_fix_1_ret
|
||||||
|
|
||||||
|
rwz_div_fix_2:
|
||||||
|
dec rdx
|
||||||
|
add r8, r9
|
||||||
|
jmp rwz_div_fix_2_ret
|
||||||
|
|
||||||
|
rwz_sqrt_fix_1:
|
||||||
|
movq r8, xmm3
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
dec r9
|
||||||
|
mov r11d, -1022
|
||||||
|
shl r11, 32
|
||||||
|
mov rax, r9
|
||||||
|
shr r9, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r9
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+r11+1]
|
||||||
|
add rax, r11
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r8
|
||||||
|
adc r9, 0
|
||||||
|
movq xmm5, r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp rwz_sqrt_fix_1_ret
|
||||||
|
|
||||||
|
rwz_sqrt_fix_2:
|
||||||
|
psrldq xmm3, 8
|
||||||
|
movq r11, xmm3
|
||||||
|
dec r8
|
||||||
|
mov ebx, -1022
|
||||||
|
shl rbx, 32
|
||||||
|
mov rax, r8
|
||||||
|
shr r8, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r8
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+rbx+1]
|
||||||
|
add rax, rbx
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r11
|
||||||
|
adc r8, 0
|
||||||
|
movq xmm0, r8
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp rwz_sqrt_fix_2_ret
|
||||||
|
|
||||||
|
rwz_cnv2_double_mainloop_asm_endp:
|
186
src/crypto/asm/cn2/cnv2_rwz_main_loop.inc
Normal file
186
src/crypto/asm/cn2/cnv2_rwz_main_loop.inc
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 393216
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
xor eax, eax
|
||||||
|
mov QWORD PTR [rsp+16], rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
rwz_main_loop:
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movq rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movq rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movq rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||||
|
movq rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je rwz_sqrt_fixup
|
||||||
|
psrlq xmm3, 19
|
||||||
|
rwz_sqrt_fixup_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movq xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
|
movq xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm5
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm2
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
movdqu xmm6, [rdi+rbx]
|
||||||
|
mov r10d, edi
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne rwz_main_loop
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp cnv2_rwz_main_loop_endp
|
||||||
|
|
||||||
|
rwz_sqrt_fixup:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm3, rdx
|
||||||
|
jmp rwz_sqrt_fixup_ret
|
||||||
|
|
||||||
|
cnv2_rwz_main_loop_endp:
|
|
@ -15,6 +15,8 @@
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_rwz_mainloop_asm)
|
||||||
|
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||||
|
|
||||||
ALIGN(64)
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||||
|
@ -52,3 +54,21 @@ FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
mov eax, 3735929054
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
FN_PREFIX(cnv2_rwz_mainloop_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn2/cnv2_rwz_main_loop.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
FN_PREFIX(cnv2_rwz_double_mainloop_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn2/cnv2_rwz_double_main_loop.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
|
@ -3,6 +3,8 @@ PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
PUBLIC cnv2_rwz_mainloop_asm
|
||||||
|
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||||
|
|
||||||
ALIGN(64)
|
ALIGN(64)
|
||||||
cnv2_mainloop_ivybridge_asm PROC
|
cnv2_mainloop_ivybridge_asm PROC
|
||||||
|
@ -32,5 +34,19 @@ cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
mov eax, 3735929054
|
mov eax, 3735929054
|
||||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_mainloop_asm PROC
|
||||||
|
INCLUDE cn2/cnv2_rwz_main_loop.inc
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
cnv2_rwz_mainloop_asm ENDP
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_double_mainloop_asm PROC
|
||||||
|
INCLUDE cn2/cnv2_rwz_double_main_loop.inc
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
cnv2_rwz_double_mainloop_asm ENDP
|
||||||
|
|
||||||
_TEXT_CNV2_MAINLOOP ENDS
|
_TEXT_CNV2_MAINLOOP ENDS
|
||||||
END
|
END
|
||||||
|
|
|
@ -1,279 +0,0 @@
|
||||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
|
||||||
mov QWORD PTR [rsp+8], rcx
|
|
||||||
push rbx
|
|
||||||
push rbp
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
sub rsp, 232
|
|
||||||
|
|
||||||
mov eax, [rcx+96]
|
|
||||||
mov ebx, [rcx+100]
|
|
||||||
mov esi, [rcx+104]
|
|
||||||
mov edx, [rcx+108]
|
|
||||||
mov [rsp+144], eax
|
|
||||||
mov [rsp+148], ebx
|
|
||||||
mov [rsp+152], esi
|
|
||||||
mov [rsp+156], edx
|
|
||||||
|
|
||||||
mov rax, QWORD PTR [rcx+48]
|
|
||||||
mov r10, rcx
|
|
||||||
xor rax, QWORD PTR [rcx+16]
|
|
||||||
mov r8, QWORD PTR [rcx+32]
|
|
||||||
xor r8, QWORD PTR [rcx]
|
|
||||||
mov r9, QWORD PTR [rcx+40]
|
|
||||||
xor r9, QWORD PTR [rcx+8]
|
|
||||||
movd xmm4, rax
|
|
||||||
mov rdx, QWORD PTR [rcx+56]
|
|
||||||
xor rdx, QWORD PTR [rcx+24]
|
|
||||||
mov r11, QWORD PTR [rcx+224]
|
|
||||||
mov rcx, QWORD PTR [rcx+88]
|
|
||||||
xor rcx, QWORD PTR [r10+72]
|
|
||||||
mov rax, QWORD PTR [r10+80]
|
|
||||||
movd xmm0, rdx
|
|
||||||
xor rax, QWORD PTR [r10+64]
|
|
||||||
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm6
|
|
||||||
movaps XMMWORD PTR [rsp+32], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+48], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp+64], xmm9
|
|
||||||
movaps XMMWORD PTR [rsp+80], xmm10
|
|
||||||
movaps XMMWORD PTR [rsp+96], xmm11
|
|
||||||
movaps XMMWORD PTR [rsp+112], xmm12
|
|
||||||
movaps XMMWORD PTR [rsp+128], xmm13
|
|
||||||
|
|
||||||
movd xmm5, rax
|
|
||||||
|
|
||||||
mov rax, r8
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
and eax, 2097136
|
|
||||||
movd xmm10, QWORD PTR [r10+96]
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [r10+104]
|
|
||||||
xorps xmm9, xmm9
|
|
||||||
mov QWORD PTR [rsp+328], rax
|
|
||||||
movd xmm12, r11
|
|
||||||
mov QWORD PTR [rsp+320], r9
|
|
||||||
punpcklqdq xmm5, xmm0
|
|
||||||
movd xmm13, rcx
|
|
||||||
mov r12d, 524288
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
|
||||||
movd xmm11, r12d
|
|
||||||
mov r12, QWORD PTR [r10+272]
|
|
||||||
lea r13, QWORD PTR [rax+r11]
|
|
||||||
mov esi, DWORD PTR [r13]
|
|
||||||
movd xmm0, r9
|
|
||||||
mov r10d, DWORD PTR [r13+4]
|
|
||||||
movd xmm7, r8
|
|
||||||
mov ebp, DWORD PTR [r13+12]
|
|
||||||
mov r14d, DWORD PTR [r13+8]
|
|
||||||
mov rdx, QWORD PTR [rsp+328]
|
|
||||||
movzx ecx, sil
|
|
||||||
shr esi, 8
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
mov r15d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
mov edi, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r14b
|
|
||||||
shr r14d, 8
|
|
||||||
mov ebx, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, bpl
|
|
||||||
shr ebp, 8
|
|
||||||
mov r9d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
movzx ecx, r14b
|
|
||||||
shr r14d, 8
|
|
||||||
mov eax, r14d
|
|
||||||
shr eax, 8
|
|
||||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
add eax, 256
|
|
||||||
movzx ecx, bpl
|
|
||||||
shr ebp, 8
|
|
||||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
movzx ecx, sil
|
|
||||||
shr esi, 8
|
|
||||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
add r12, 2048
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
add r10d, 256
|
|
||||||
mov r11d, DWORD PTR [r12+rax*4]
|
|
||||||
xor r11d, DWORD PTR [r12+rcx*4]
|
|
||||||
xor r11d, r9d
|
|
||||||
movzx ecx, sil
|
|
||||||
mov r10d, DWORD PTR [r12+r10*4]
|
|
||||||
shr esi, 8
|
|
||||||
add esi, 256
|
|
||||||
xor r10d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, bpl
|
|
||||||
xor r10d, ebx
|
|
||||||
shr ebp, 8
|
|
||||||
movd xmm1, r11d
|
|
||||||
add ebp, 256
|
|
||||||
movd r11, xmm12
|
|
||||||
mov r9d, DWORD PTR [r12+rcx*4]
|
|
||||||
xor r9d, DWORD PTR [r12+rsi*4]
|
|
||||||
mov eax, DWORD PTR [r12+rbp*4]
|
|
||||||
xor r9d, edi
|
|
||||||
movzx ecx, r14b
|
|
||||||
movd xmm0, r10d
|
|
||||||
movd xmm2, r9d
|
|
||||||
xor eax, DWORD PTR [r12+rcx*4]
|
|
||||||
mov rcx, rdx
|
|
||||||
xor eax, r15d
|
|
||||||
punpckldq xmm2, xmm1
|
|
||||||
xor rcx, 16
|
|
||||||
movd xmm6, eax
|
|
||||||
mov rax, rdx
|
|
||||||
punpckldq xmm6, xmm0
|
|
||||||
xor rax, 32
|
|
||||||
punpckldq xmm6, xmm2
|
|
||||||
xor rdx, 48
|
|
||||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
|
||||||
pxor xmm6, xmm2
|
|
||||||
pxor xmm6, xmm7
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
|
||||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
|
||||||
pxor xmm6, xmm1
|
|
||||||
pxor xmm6, xmm0
|
|
||||||
paddq xmm0, xmm5
|
|
||||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movd rcx, xmm13
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
|
||||||
movd rdi, xmm6
|
|
||||||
mov r10, rdi
|
|
||||||
and r10d, 2097136
|
|
||||||
movdqa xmm0, xmm6
|
|
||||||
pxor xmm0, xmm4
|
|
||||||
movdqu XMMWORD PTR [r13], xmm0
|
|
||||||
|
|
||||||
mov ebx, [rsp+144]
|
|
||||||
mov ebp, [rsp+152]
|
|
||||||
add ebx, [rsp+148]
|
|
||||||
add ebp, [rsp+156]
|
|
||||||
shl rbp, 32
|
|
||||||
or rbx, rbp
|
|
||||||
|
|
||||||
xor rbx, QWORD PTR [r10+r11]
|
|
||||||
lea r14, QWORD PTR [r10+r11]
|
|
||||||
mov rbp, QWORD PTR [r14+8]
|
|
||||||
|
|
||||||
mov [rsp+160], rbx
|
|
||||||
mov [rsp+168], rdi
|
|
||||||
mov [rsp+176], rbp
|
|
||||||
mov [rsp+184], r10
|
|
||||||
mov r10, rsp
|
|
||||||
|
|
||||||
mov ebx, [rsp+144]
|
|
||||||
mov esi, [rsp+148]
|
|
||||||
mov edi, [rsp+152]
|
|
||||||
mov ebp, [rsp+156]
|
|
||||||
|
|
||||||
movd esp, xmm7
|
|
||||||
movaps xmm0, xmm7
|
|
||||||
psrldq xmm0, 8
|
|
||||||
movd r15d, xmm0
|
|
||||||
movd eax, xmm4
|
|
||||||
movd edx, xmm5
|
|
||||||
movaps xmm0, xmm5
|
|
||||||
psrldq xmm0, 8
|
|
||||||
movd r9d, xmm0
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
|
||||||
mov rsp, r10
|
|
||||||
mov [rsp+144], ebx
|
|
||||||
mov [rsp+148], esi
|
|
||||||
mov [rsp+152], edi
|
|
||||||
mov [rsp+156], ebp
|
|
||||||
|
|
||||||
mov edi, edi
|
|
||||||
shl rbp, 32
|
|
||||||
or rbp, rdi
|
|
||||||
xor r8, rbp
|
|
||||||
|
|
||||||
mov ebx, ebx
|
|
||||||
shl rsi, 32
|
|
||||||
or rsi, rbx
|
|
||||||
xor QWORD PTR [rsp+320], rsi
|
|
||||||
|
|
||||||
mov rbx, [rsp+160]
|
|
||||||
mov rdi, [rsp+168]
|
|
||||||
mov rbp, [rsp+176]
|
|
||||||
mov r10, [rsp+184]
|
|
||||||
|
|
||||||
mov r9, r10
|
|
||||||
xor r9, 16
|
|
||||||
mov rcx, r10
|
|
||||||
xor rcx, 32
|
|
||||||
xor r10, 48
|
|
||||||
mov rax, rbx
|
|
||||||
mul rdi
|
|
||||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
|
||||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
|
||||||
pxor xmm6, xmm2
|
|
||||||
pxor xmm6, xmm1
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
add r8, rdx
|
|
||||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
|
||||||
pxor xmm6, xmm0
|
|
||||||
paddq xmm0, xmm5
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
|
||||||
movdqa xmm5, xmm4
|
|
||||||
mov r9, QWORD PTR [rsp+320]
|
|
||||||
movdqa xmm4, xmm6
|
|
||||||
add r9, rax
|
|
||||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
|
||||||
mov r10, QWORD PTR [rsp+304]
|
|
||||||
movd r12d, xmm11
|
|
||||||
mov QWORD PTR [r14], r8
|
|
||||||
xor r8, rbx
|
|
||||||
mov rax, r8
|
|
||||||
mov QWORD PTR [r14+8], r9
|
|
||||||
and eax, 2097136
|
|
||||||
xor r9, rbp
|
|
||||||
mov QWORD PTR [rsp+320], r9
|
|
||||||
mov QWORD PTR [rsp+328], rax
|
|
||||||
sub r12d, 1
|
|
||||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
|
||||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
|
||||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
|
||||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
|
||||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
|
||||||
|
|
||||||
add rsp, 232
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
pop rbp
|
|
||||||
pop rbx
|
|
||||||
ret
|
|
||||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,529 +0,0 @@
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_end)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_template_part1):
|
|
||||||
mov QWORD PTR [rsp+16], rbx
|
|
||||||
mov QWORD PTR [rsp+24], rbp
|
|
||||||
mov QWORD PTR [rsp+32], rsi
|
|
||||||
push r10
|
|
||||||
push r11
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
push rdi
|
|
||||||
sub rsp, 64
|
|
||||||
mov r12, rcx
|
|
||||||
mov r8, QWORD PTR [r12+32]
|
|
||||||
mov rdx, r12
|
|
||||||
xor r8, QWORD PTR [r12]
|
|
||||||
mov r15, QWORD PTR [r12+40]
|
|
||||||
mov r9, r8
|
|
||||||
xor r15, QWORD PTR [r12+8]
|
|
||||||
mov r11, QWORD PTR [r12+224]
|
|
||||||
mov r12, QWORD PTR [r12+56]
|
|
||||||
xor r12, QWORD PTR [rdx+24]
|
|
||||||
mov rax, QWORD PTR [rdx+48]
|
|
||||||
xor rax, QWORD PTR [rdx+16]
|
|
||||||
movaps XMMWORD PTR [rsp+48], xmm6
|
|
||||||
movd xmm0, r12
|
|
||||||
movaps XMMWORD PTR [rsp+32], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp], xmm9
|
|
||||||
mov r12, QWORD PTR [rdx+88]
|
|
||||||
xor r12, QWORD PTR [rdx+72]
|
|
||||||
movd xmm6, rax
|
|
||||||
mov rax, QWORD PTR [rdx+80]
|
|
||||||
xor rax, QWORD PTR [rdx+64]
|
|
||||||
punpcklqdq xmm6, xmm0
|
|
||||||
and r9d, 2097136
|
|
||||||
movd xmm0, r12
|
|
||||||
movd xmm7, rax
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
mov r10d, r9d
|
|
||||||
movd xmm9, rsp
|
|
||||||
mov rsp, r8
|
|
||||||
mov r8d, 524288
|
|
||||||
|
|
||||||
mov ebx, [rdx+96]
|
|
||||||
mov esi, [rdx+100]
|
|
||||||
mov edi, [rdx+104]
|
|
||||||
mov ebp, [rdx+108]
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_template_mainloop):
|
|
||||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
|
||||||
movd xmm0, r15
|
|
||||||
movd xmm4, rsp
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
lea rdx, QWORD PTR [r9+r11]
|
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
|
||||||
|
|
||||||
mov r12d, r9d
|
|
||||||
mov eax, r9d
|
|
||||||
xor r9d, 48
|
|
||||||
xor r12d, 16
|
|
||||||
xor eax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
|
||||||
movaps xmm3, xmm0
|
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
|
||||||
pxor xmm0, xmm2
|
|
||||||
pxor xmm5, xmm1
|
|
||||||
pxor xmm5, xmm0
|
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movd r12, xmm5
|
|
||||||
movd r10d, xmm5
|
|
||||||
and r10d, 2097136
|
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
|
||||||
pxor xmm0, xmm6
|
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
|
||||||
|
|
||||||
lea r13d, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or r13, rdx
|
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
|
||||||
movd edx, xmm7
|
|
||||||
pextrd r9d, xmm7, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_part2):
|
|
||||||
mov eax, edi
|
|
||||||
mov edx, ebp
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor rsp, rax
|
|
||||||
|
|
||||||
mov eax, ebx
|
|
||||||
mov edx, esi
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r15, rax
|
|
||||||
|
|
||||||
mov rax, r13
|
|
||||||
mul r12
|
|
||||||
|
|
||||||
mov r9d, r10d
|
|
||||||
mov r12d, r10d
|
|
||||||
xor r9d, 16
|
|
||||||
xor r12d, 32
|
|
||||||
xor r10d, 48
|
|
||||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
|
||||||
movaps xmm3, xmm1
|
|
||||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
|
||||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
|
||||||
pxor xmm1, xmm2
|
|
||||||
pxor xmm5, xmm0
|
|
||||||
pxor xmm5, xmm1
|
|
||||||
paddq xmm3, xmm4
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm0, xmm7
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
|
||||||
add r15, rax
|
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
|
||||||
mov r9d, esp
|
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
|
||||||
and r9d, 2097136
|
|
||||||
xor r15, r14
|
|
||||||
movdqa xmm6, xmm5
|
|
||||||
dec r8d
|
|
||||||
jnz FN_PREFIX(CryptonightR_template_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_part3):
|
|
||||||
movd rsp, xmm9
|
|
||||||
|
|
||||||
mov rbx, QWORD PTR [rsp+136]
|
|
||||||
mov rbp, QWORD PTR [rsp+144]
|
|
||||||
mov rsi, QWORD PTR [rsp+152]
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp]
|
|
||||||
add rsp, 64
|
|
||||||
pop rdi
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop r11
|
|
||||||
pop r10
|
|
||||||
ret 0
|
|
||||||
FN_PREFIX(CryptonightR_template_end):
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_template_double_part1):
|
|
||||||
mov QWORD PTR [rsp+24], rbx
|
|
||||||
push rbp
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
sub rsp, 320
|
|
||||||
mov r14, QWORD PTR [rcx+32]
|
|
||||||
mov r8, rcx
|
|
||||||
xor r14, QWORD PTR [rcx]
|
|
||||||
mov r12, QWORD PTR [rcx+40]
|
|
||||||
mov ebx, r14d
|
|
||||||
mov rsi, QWORD PTR [rcx+224]
|
|
||||||
and ebx, 2097136
|
|
||||||
xor r12, QWORD PTR [rcx+8]
|
|
||||||
mov rcx, QWORD PTR [rcx+56]
|
|
||||||
xor rcx, QWORD PTR [r8+24]
|
|
||||||
mov rax, QWORD PTR [r8+48]
|
|
||||||
xor rax, QWORD PTR [r8+16]
|
|
||||||
mov r15, QWORD PTR [rdx+32]
|
|
||||||
xor r15, QWORD PTR [rdx]
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [r8+88]
|
|
||||||
xor rcx, QWORD PTR [r8+72]
|
|
||||||
mov r13, QWORD PTR [rdx+40]
|
|
||||||
mov rdi, QWORD PTR [rdx+224]
|
|
||||||
xor r13, QWORD PTR [rdx+8]
|
|
||||||
movaps XMMWORD PTR [rsp+160], xmm6
|
|
||||||
movaps XMMWORD PTR [rsp+176], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+192], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp+208], xmm9
|
|
||||||
movaps XMMWORD PTR [rsp+224], xmm10
|
|
||||||
movaps XMMWORD PTR [rsp+240], xmm11
|
|
||||||
movaps XMMWORD PTR [rsp+256], xmm12
|
|
||||||
movaps XMMWORD PTR [rsp+272], xmm13
|
|
||||||
movaps XMMWORD PTR [rsp+288], xmm14
|
|
||||||
movaps XMMWORD PTR [rsp+304], xmm15
|
|
||||||
movd xmm7, rax
|
|
||||||
mov rax, QWORD PTR [r8+80]
|
|
||||||
xor rax, QWORD PTR [r8+64]
|
|
||||||
|
|
||||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
|
||||||
movaps xmm2, XMMWORD PTR [r8+96]
|
|
||||||
movaps XMMWORD PTR [rsp], xmm1
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm2
|
|
||||||
|
|
||||||
mov r8d, r15d
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [rdx+56]
|
|
||||||
xor rcx, QWORD PTR [rdx+24]
|
|
||||||
movd xmm9, rax
|
|
||||||
mov QWORD PTR [rsp+128], rsi
|
|
||||||
mov rax, QWORD PTR [rdx+48]
|
|
||||||
xor rax, QWORD PTR [rdx+16]
|
|
||||||
punpcklqdq xmm9, xmm0
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [rdx+88]
|
|
||||||
xor rcx, QWORD PTR [rdx+72]
|
|
||||||
movd xmm8, rax
|
|
||||||
mov QWORD PTR [rsp+136], rdi
|
|
||||||
mov rax, QWORD PTR [rdx+80]
|
|
||||||
xor rax, QWORD PTR [rdx+64]
|
|
||||||
punpcklqdq xmm8, xmm0
|
|
||||||
and r8d, 2097136
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov r11d, 524288
|
|
||||||
movd xmm10, rax
|
|
||||||
punpcklqdq xmm10, xmm0
|
|
||||||
|
|
||||||
movd xmm14, QWORD PTR [rsp+128]
|
|
||||||
movd xmm15, QWORD PTR [rsp+136]
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightR_template_double_mainloop):
|
|
||||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
|
||||||
movd xmm0, r12
|
|
||||||
mov ecx, ebx
|
|
||||||
movd xmm3, r14
|
|
||||||
punpcklqdq xmm3, xmm0
|
|
||||||
xor ebx, 16
|
|
||||||
aesenc xmm6, xmm3
|
|
||||||
movd xmm4, r15
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
|
||||||
pxor xmm6, xmm0
|
|
||||||
xor ebx, 48
|
|
||||||
paddq xmm0, xmm7
|
|
||||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
|
||||||
pxor xmm6, xmm1
|
|
||||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
|
||||||
paddq xmm1, xmm3
|
|
||||||
xor ebx, 16
|
|
||||||
mov eax, ebx
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
|
||||||
pxor xmm6, xmm0
|
|
||||||
movd rdx, xmm6
|
|
||||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
|
||||||
paddq xmm0, xmm9
|
|
||||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
|
||||||
movdqa xmm0, xmm6
|
|
||||||
pxor xmm0, xmm7
|
|
||||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
|
||||||
mov esi, edx
|
|
||||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
|
||||||
and esi, 2097136
|
|
||||||
mov ecx, r8d
|
|
||||||
movd xmm0, r13
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
xor r8d, 16
|
|
||||||
aesenc xmm5, xmm4
|
|
||||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
|
||||||
pxor xmm5, xmm0
|
|
||||||
xor r8d, 48
|
|
||||||
paddq xmm0, xmm8
|
|
||||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
|
||||||
pxor xmm5, xmm1
|
|
||||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
xor r8d, 16
|
|
||||||
mov eax, r8d
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
|
||||||
pxor xmm5, xmm0
|
|
||||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
|
||||||
paddq xmm0, xmm10
|
|
||||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
|
||||||
movdqa xmm0, xmm5
|
|
||||||
pxor xmm0, xmm8
|
|
||||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
|
||||||
movd rdi, xmm5
|
|
||||||
movd rcx, xmm14
|
|
||||||
mov ebp, edi
|
|
||||||
mov r8, QWORD PTR [rcx+rsi]
|
|
||||||
mov r10, QWORD PTR [rcx+rsi+8]
|
|
||||||
lea r9, QWORD PTR [rcx+rsi]
|
|
||||||
xor esi, 16
|
|
||||||
|
|
||||||
movd xmm0, rsp
|
|
||||||
movd xmm1, rsi
|
|
||||||
movd xmm2, rdi
|
|
||||||
movd xmm11, rbp
|
|
||||||
movd xmm12, r15
|
|
||||||
movd xmm13, rdx
|
|
||||||
mov [rsp+104], rcx
|
|
||||||
mov [rsp+112], r9
|
|
||||||
|
|
||||||
mov ebx, DWORD PTR [rsp+16]
|
|
||||||
mov esi, DWORD PTR [rsp+20]
|
|
||||||
mov edi, DWORD PTR [rsp+24]
|
|
||||||
mov ebp, DWORD PTR [rsp+28]
|
|
||||||
|
|
||||||
lea eax, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r8, rax
|
|
||||||
|
|
||||||
movd esp, xmm3
|
|
||||||
pextrd r15d, xmm3, 2
|
|
||||||
movd eax, xmm7
|
|
||||||
movd edx, xmm9
|
|
||||||
pextrd r9d, xmm9, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_double_part2):
|
|
||||||
|
|
||||||
mov eax, edi
|
|
||||||
mov edx, ebp
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r14, rax
|
|
||||||
|
|
||||||
mov eax, ebx
|
|
||||||
mov edx, esi
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r12, rax
|
|
||||||
|
|
||||||
movd rsp, xmm0
|
|
||||||
mov DWORD PTR [rsp+16], ebx
|
|
||||||
mov DWORD PTR [rsp+20], esi
|
|
||||||
mov DWORD PTR [rsp+24], edi
|
|
||||||
mov DWORD PTR [rsp+28], ebp
|
|
||||||
|
|
||||||
movd rsi, xmm1
|
|
||||||
movd rdi, xmm2
|
|
||||||
movd rbp, xmm11
|
|
||||||
movd r15, xmm12
|
|
||||||
movd rdx, xmm13
|
|
||||||
mov rcx, [rsp+104]
|
|
||||||
mov r9, [rsp+112]
|
|
||||||
|
|
||||||
mov rbx, r8
|
|
||||||
mov rax, r8
|
|
||||||
mul rdx
|
|
||||||
and ebp, 2097136
|
|
||||||
mov r8, rax
|
|
||||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
|
||||||
pxor xmm6, xmm1
|
|
||||||
xor esi, 48
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
|
||||||
pxor xmm6, xmm2
|
|
||||||
paddq xmm2, xmm3
|
|
||||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
|
||||||
xor esi, 16
|
|
||||||
mov eax, esi
|
|
||||||
mov rsi, rcx
|
|
||||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
|
||||||
pxor xmm6, xmm0
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
|
||||||
paddq xmm0, xmm9
|
|
||||||
add r12, r8
|
|
||||||
xor rax, 32
|
|
||||||
add r14, rdx
|
|
||||||
movdqa xmm9, xmm7
|
|
||||||
movdqa xmm7, xmm6
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
|
||||||
mov QWORD PTR [r9+8], r12
|
|
||||||
xor r12, r10
|
|
||||||
mov QWORD PTR [r9], r14
|
|
||||||
movd rcx, xmm15
|
|
||||||
xor r14, rbx
|
|
||||||
mov r10d, ebp
|
|
||||||
mov ebx, r14d
|
|
||||||
xor ebp, 16
|
|
||||||
and ebx, 2097136
|
|
||||||
mov r8, QWORD PTR [r10+rcx]
|
|
||||||
mov r9, QWORD PTR [r10+rcx+8]
|
|
||||||
|
|
||||||
movd xmm0, rsp
|
|
||||||
movd xmm1, rbx
|
|
||||||
movd xmm2, rsi
|
|
||||||
movd xmm11, rdi
|
|
||||||
movd xmm12, rbp
|
|
||||||
movd xmm13, r15
|
|
||||||
mov [rsp+104], rcx
|
|
||||||
mov [rsp+112], r9
|
|
||||||
|
|
||||||
mov ebx, DWORD PTR [rsp]
|
|
||||||
mov esi, DWORD PTR [rsp+4]
|
|
||||||
mov edi, DWORD PTR [rsp+8]
|
|
||||||
mov ebp, DWORD PTR [rsp+12]
|
|
||||||
|
|
||||||
lea eax, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
|
|
||||||
xor r8, rax
|
|
||||||
movd xmm3, r8
|
|
||||||
|
|
||||||
movd esp, xmm4
|
|
||||||
pextrd r15d, xmm4, 2
|
|
||||||
movd eax, xmm8
|
|
||||||
movd edx, xmm10
|
|
||||||
pextrd r9d, xmm10, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_double_part3):
|
|
||||||
|
|
||||||
movd r15, xmm13
|
|
||||||
|
|
||||||
mov eax, edi
|
|
||||||
mov edx, ebp
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r15, rax
|
|
||||||
|
|
||||||
mov eax, ebx
|
|
||||||
mov edx, esi
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r13, rax
|
|
||||||
|
|
||||||
movd rsp, xmm0
|
|
||||||
mov DWORD PTR [rsp], ebx
|
|
||||||
mov DWORD PTR [rsp+4], esi
|
|
||||||
mov DWORD PTR [rsp+8], edi
|
|
||||||
mov DWORD PTR [rsp+12], ebp
|
|
||||||
|
|
||||||
movd rbx, xmm1
|
|
||||||
movd rsi, xmm2
|
|
||||||
movd rdi, xmm11
|
|
||||||
movd rbp, xmm12
|
|
||||||
mov rcx, [rsp+104]
|
|
||||||
mov r9, [rsp+112]
|
|
||||||
|
|
||||||
mov rax, r8
|
|
||||||
mul rdi
|
|
||||||
mov rdi, rcx
|
|
||||||
mov r8, rax
|
|
||||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
|
||||||
pxor xmm5, xmm1
|
|
||||||
xor ebp, 48
|
|
||||||
paddq xmm1, xmm8
|
|
||||||
add r13, r8
|
|
||||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
|
||||||
pxor xmm5, xmm2
|
|
||||||
add r15, rdx
|
|
||||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
xor ebp, 16
|
|
||||||
mov eax, ebp
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
|
||||||
pxor xmm5, xmm0
|
|
||||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
|
||||||
paddq xmm0, xmm10
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
|
||||||
movd rax, xmm3
|
|
||||||
movdqa xmm10, xmm8
|
|
||||||
mov QWORD PTR [r10+rcx], r15
|
|
||||||
movdqa xmm8, xmm5
|
|
||||||
xor r15, rax
|
|
||||||
mov QWORD PTR [r10+rcx+8], r13
|
|
||||||
mov r8d, r15d
|
|
||||||
xor r13, r9
|
|
||||||
and r8d, 2097136
|
|
||||||
dec r11d
|
|
||||||
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightR_template_double_part4):
|
|
||||||
|
|
||||||
mov rbx, QWORD PTR [rsp+400]
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
|
||||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
|
||||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
|
||||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
|
||||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
|
||||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
|
||||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
|
||||||
add rsp, 320
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
pop rbp
|
|
||||||
ret 0
|
|
||||||
FN_PREFIX(CryptonightR_template_double_end):
|
|
|
@ -70,29 +70,30 @@ CryptonightR_template_mainloop:
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
aesenc xmm5, xmm4
|
||||||
|
|
||||||
mov r12d, r9d
|
mov r13d, r9d
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
xor r9d, 48
|
xor r9d, 48
|
||||||
xor r12d, 16
|
xor r13d, 16
|
||||||
xor eax, 32
|
xor eax, 32
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||||
movaps xmm3, xmm0
|
movaps xmm3, xmm0
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
pxor xmm0, xmm2
|
pxor xmm0, xmm2
|
||||||
pxor xmm5, xmm1
|
pxor xmm5, xmm1
|
||||||
pxor xmm5, xmm0
|
pxor xmm5, xmm0
|
||||||
paddq xmm3, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movd r12, xmm5
|
movd r12, xmm5
|
||||||
movd r10d, xmm5
|
movd r10d, xmm5
|
||||||
and r10d, 2097136
|
and r10d, 2097136
|
||||||
|
|
||||||
|
paddq xmm3, xmm7
|
||||||
|
paddq xmm2, xmm6
|
||||||
|
paddq xmm1, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
movdqa xmm0, xmm5
|
||||||
pxor xmm0, xmm6
|
pxor xmm0, xmm6
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
@ -102,14 +103,16 @@ CryptonightR_template_mainloop:
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
or r13, rdx
|
or r13, rdx
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
movd eax, xmm6
|
||||||
movd edx, xmm7
|
movd edx, xmm7
|
||||||
pextrd r9d, xmm7, 2
|
pextrd r9d, xmm7, 2
|
||||||
|
|
||||||
|
xor r13, QWORD PTR [r10+r11]
|
||||||
|
mov r14, QWORD PTR [r10+r11+8]
|
||||||
|
|
||||||
CryptonightR_template_part2:
|
CryptonightR_template_part2:
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
|
||||||
mov eax, edi
|
mov eax, edi
|
||||||
mov edx, ebp
|
mov edx, ebp
|
||||||
shl rdx, 32
|
shl rdx, 32
|
||||||
|
@ -124,6 +127,8 @@ CryptonightR_template_part2:
|
||||||
|
|
||||||
mov rax, r13
|
mov rax, r13
|
||||||
mul r12
|
mul r12
|
||||||
|
add r15, rax
|
||||||
|
add rsp, rdx
|
||||||
|
|
||||||
mov r9d, r10d
|
mov r9d, r10d
|
||||||
mov r12d, r10d
|
mov r12d, r10d
|
||||||
|
@ -145,13 +150,10 @@ CryptonightR_template_part2:
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
movdqa xmm7, xmm6
|
||||||
add r15, rax
|
mov QWORD PTR [rcx], rsp
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
xor rsp, r13
|
||||||
mov r9d, esp
|
mov r9d, esp
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
mov QWORD PTR [rcx+8], r15
|
||||||
and r9d, 2097136
|
and r9d, 2097136
|
||||||
xor r15, r14
|
xor r15, r14
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
|
|
|
@ -1,266 +0,0 @@
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
|
|
||||||
mov QWORD PTR [rsp+8], rcx
|
|
||||||
push rbx
|
|
||||||
push rbp
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
sub rsp, 232
|
|
||||||
|
|
||||||
mov eax, [rcx+96]
|
|
||||||
mov ebx, [rcx+100]
|
|
||||||
mov esi, [rcx+104]
|
|
||||||
mov edx, [rcx+108]
|
|
||||||
mov [rsp+144], eax
|
|
||||||
mov [rsp+148], ebx
|
|
||||||
mov [rsp+152], esi
|
|
||||||
mov [rsp+156], edx
|
|
||||||
|
|
||||||
mov rax, QWORD PTR [rcx+48]
|
|
||||||
mov r10, rcx
|
|
||||||
xor rax, QWORD PTR [rcx+16]
|
|
||||||
mov r8, QWORD PTR [rcx+32]
|
|
||||||
xor r8, QWORD PTR [rcx]
|
|
||||||
mov r9, QWORD PTR [rcx+40]
|
|
||||||
xor r9, QWORD PTR [rcx+8]
|
|
||||||
movd xmm4, rax
|
|
||||||
mov rdx, QWORD PTR [rcx+56]
|
|
||||||
xor rdx, QWORD PTR [rcx+24]
|
|
||||||
mov r11, QWORD PTR [rcx+224]
|
|
||||||
mov rcx, QWORD PTR [rcx+88]
|
|
||||||
xor rcx, QWORD PTR [r10+72]
|
|
||||||
mov rax, QWORD PTR [r10+80]
|
|
||||||
movd xmm0, rdx
|
|
||||||
xor rax, QWORD PTR [r10+64]
|
|
||||||
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm6
|
|
||||||
movaps XMMWORD PTR [rsp+32], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+48], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp+64], xmm9
|
|
||||||
movaps XMMWORD PTR [rsp+80], xmm10
|
|
||||||
movaps XMMWORD PTR [rsp+96], xmm11
|
|
||||||
movaps XMMWORD PTR [rsp+112], xmm12
|
|
||||||
movaps XMMWORD PTR [rsp+128], xmm13
|
|
||||||
|
|
||||||
movd xmm5, rax
|
|
||||||
|
|
||||||
mov rax, r8
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
and eax, 2097136
|
|
||||||
movd xmm10, QWORD PTR [r10+96]
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [r10+104]
|
|
||||||
xorps xmm9, xmm9
|
|
||||||
mov QWORD PTR [rsp+328], rax
|
|
||||||
movd xmm12, r11
|
|
||||||
mov QWORD PTR [rsp+320], r9
|
|
||||||
punpcklqdq xmm5, xmm0
|
|
||||||
movd xmm13, rcx
|
|
||||||
mov r12d, 524288
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
|
|
||||||
movd xmm11, r12d
|
|
||||||
mov r12, QWORD PTR [r10+272]
|
|
||||||
lea r13, QWORD PTR [rax+r11]
|
|
||||||
mov esi, DWORD PTR [r13]
|
|
||||||
movd xmm0, r9
|
|
||||||
mov r10d, DWORD PTR [r13+4]
|
|
||||||
movd xmm7, r8
|
|
||||||
mov ebp, DWORD PTR [r13+12]
|
|
||||||
mov r14d, DWORD PTR [r13+8]
|
|
||||||
mov rdx, QWORD PTR [rsp+328]
|
|
||||||
movzx ecx, sil
|
|
||||||
shr esi, 8
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
mov r15d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
mov edi, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r14b
|
|
||||||
shr r14d, 8
|
|
||||||
mov ebx, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, bpl
|
|
||||||
shr ebp, 8
|
|
||||||
mov r9d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
movzx ecx, r14b
|
|
||||||
shr r14d, 8
|
|
||||||
mov eax, r14d
|
|
||||||
shr eax, 8
|
|
||||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
add eax, 256
|
|
||||||
movzx ecx, bpl
|
|
||||||
shr ebp, 8
|
|
||||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
movzx ecx, sil
|
|
||||||
shr esi, 8
|
|
||||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
|
||||||
add r12, 2048
|
|
||||||
movzx ecx, r10b
|
|
||||||
shr r10d, 8
|
|
||||||
add r10d, 256
|
|
||||||
mov r11d, DWORD PTR [r12+rax*4]
|
|
||||||
xor r11d, DWORD PTR [r12+rcx*4]
|
|
||||||
xor r11d, r9d
|
|
||||||
movzx ecx, sil
|
|
||||||
mov r10d, DWORD PTR [r12+r10*4]
|
|
||||||
shr esi, 8
|
|
||||||
add esi, 256
|
|
||||||
xor r10d, DWORD PTR [r12+rcx*4]
|
|
||||||
movzx ecx, bpl
|
|
||||||
xor r10d, ebx
|
|
||||||
shr ebp, 8
|
|
||||||
movd xmm1, r11d
|
|
||||||
add ebp, 256
|
|
||||||
movd r11, xmm12
|
|
||||||
mov r9d, DWORD PTR [r12+rcx*4]
|
|
||||||
xor r9d, DWORD PTR [r12+rsi*4]
|
|
||||||
mov eax, DWORD PTR [r12+rbp*4]
|
|
||||||
xor r9d, edi
|
|
||||||
movzx ecx, r14b
|
|
||||||
movd xmm0, r10d
|
|
||||||
movd xmm2, r9d
|
|
||||||
xor eax, DWORD PTR [r12+rcx*4]
|
|
||||||
mov rcx, rdx
|
|
||||||
xor eax, r15d
|
|
||||||
punpckldq xmm2, xmm1
|
|
||||||
xor rcx, 16
|
|
||||||
movd xmm6, eax
|
|
||||||
mov rax, rdx
|
|
||||||
punpckldq xmm6, xmm0
|
|
||||||
xor rax, 32
|
|
||||||
punpckldq xmm6, xmm2
|
|
||||||
xor rdx, 48
|
|
||||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
|
||||||
pxor xmm6, xmm7
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
|
||||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
|
||||||
paddq xmm0, xmm5
|
|
||||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movd rcx, xmm13
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
|
||||||
movd rdi, xmm6
|
|
||||||
mov r10, rdi
|
|
||||||
and r10d, 2097136
|
|
||||||
movdqa xmm0, xmm6
|
|
||||||
pxor xmm0, xmm4
|
|
||||||
movdqu XMMWORD PTR [r13], xmm0
|
|
||||||
|
|
||||||
mov ebx, [rsp+144]
|
|
||||||
mov ebp, [rsp+152]
|
|
||||||
add ebx, [rsp+148]
|
|
||||||
add ebp, [rsp+156]
|
|
||||||
shl rbp, 32
|
|
||||||
or rbx, rbp
|
|
||||||
|
|
||||||
xor rbx, QWORD PTR [r10+r11]
|
|
||||||
lea r14, QWORD PTR [r10+r11]
|
|
||||||
mov rbp, QWORD PTR [r14+8]
|
|
||||||
|
|
||||||
mov [rsp+160], rbx
|
|
||||||
mov [rsp+168], rdi
|
|
||||||
mov [rsp+176], rbp
|
|
||||||
mov [rsp+184], r10
|
|
||||||
mov r10, rsp
|
|
||||||
|
|
||||||
mov ebx, [rsp+144]
|
|
||||||
mov esi, [rsp+148]
|
|
||||||
mov edi, [rsp+152]
|
|
||||||
mov ebp, [rsp+156]
|
|
||||||
|
|
||||||
movd esp, xmm7
|
|
||||||
movaps xmm0, xmm7
|
|
||||||
psrldq xmm0, 8
|
|
||||||
movd r15d, xmm0
|
|
||||||
movd eax, xmm4
|
|
||||||
movd edx, xmm5
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
|
|
||||||
mov rsp, r10
|
|
||||||
mov [rsp+144], ebx
|
|
||||||
mov [rsp+148], esi
|
|
||||||
mov [rsp+152], edi
|
|
||||||
mov [rsp+156], ebp
|
|
||||||
|
|
||||||
mov rbx, [rsp+160]
|
|
||||||
mov rdi, [rsp+168]
|
|
||||||
mov rbp, [rsp+176]
|
|
||||||
mov r10, [rsp+184]
|
|
||||||
|
|
||||||
mov r9, r10
|
|
||||||
xor r9, 16
|
|
||||||
mov rcx, r10
|
|
||||||
xor rcx, 32
|
|
||||||
xor r10, 48
|
|
||||||
mov rax, rbx
|
|
||||||
mul rdi
|
|
||||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
|
||||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
movd xmm0, rax
|
|
||||||
movd xmm3, rdx
|
|
||||||
xor rax, QWORD PTR [r11+rcx+8]
|
|
||||||
xor rdx, QWORD PTR [rcx+r11]
|
|
||||||
punpcklqdq xmm3, xmm0
|
|
||||||
add r8, rdx
|
|
||||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
|
||||||
pxor xmm2, xmm3
|
|
||||||
paddq xmm0, xmm5
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
|
||||||
movdqa xmm5, xmm4
|
|
||||||
mov r9, QWORD PTR [rsp+320]
|
|
||||||
movdqa xmm4, xmm6
|
|
||||||
add r9, rax
|
|
||||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
|
||||||
mov r10, QWORD PTR [rsp+304]
|
|
||||||
movd r12d, xmm11
|
|
||||||
mov QWORD PTR [r14], r8
|
|
||||||
xor r8, rbx
|
|
||||||
mov rax, r8
|
|
||||||
mov QWORD PTR [r14+8], r9
|
|
||||||
and eax, 2097136
|
|
||||||
xor r9, rbp
|
|
||||||
mov QWORD PTR [rsp+320], r9
|
|
||||||
mov QWORD PTR [rsp+328], rax
|
|
||||||
sub r12d, 1
|
|
||||||
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
|
||||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
|
||||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
|
||||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
|
||||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
|
||||||
|
|
||||||
add rsp, 232
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
pop rbp
|
|
||||||
pop rbx
|
|
||||||
ret
|
|
||||||
FN_PREFIX(CryptonightWOW_soft_aes_template_end):
|
|
|
@ -1,486 +0,0 @@
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_end)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
|
|
||||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_template_part1):
|
|
||||||
mov QWORD PTR [rsp+16], rbx
|
|
||||||
mov QWORD PTR [rsp+24], rbp
|
|
||||||
mov QWORD PTR [rsp+32], rsi
|
|
||||||
push r10
|
|
||||||
push r11
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
push rdi
|
|
||||||
sub rsp, 64
|
|
||||||
mov r12, rcx
|
|
||||||
mov r8, QWORD PTR [r12+32]
|
|
||||||
mov rdx, r12
|
|
||||||
xor r8, QWORD PTR [r12]
|
|
||||||
mov r15, QWORD PTR [r12+40]
|
|
||||||
mov r9, r8
|
|
||||||
xor r15, QWORD PTR [r12+8]
|
|
||||||
mov r11, QWORD PTR [r12+224]
|
|
||||||
mov r12, QWORD PTR [r12+56]
|
|
||||||
xor r12, QWORD PTR [rdx+24]
|
|
||||||
mov rax, QWORD PTR [rdx+48]
|
|
||||||
xor rax, QWORD PTR [rdx+16]
|
|
||||||
movaps XMMWORD PTR [rsp+48], xmm6
|
|
||||||
movd xmm0, r12
|
|
||||||
movaps XMMWORD PTR [rsp+32], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp], xmm9
|
|
||||||
mov r12, QWORD PTR [rdx+88]
|
|
||||||
xor r12, QWORD PTR [rdx+72]
|
|
||||||
movd xmm6, rax
|
|
||||||
mov rax, QWORD PTR [rdx+80]
|
|
||||||
xor rax, QWORD PTR [rdx+64]
|
|
||||||
punpcklqdq xmm6, xmm0
|
|
||||||
and r9d, 2097136
|
|
||||||
movd xmm0, r12
|
|
||||||
movd xmm7, rax
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
mov r10d, r9d
|
|
||||||
movd xmm9, rsp
|
|
||||||
mov rsp, r8
|
|
||||||
mov r8d, 524288
|
|
||||||
|
|
||||||
mov ebx, [rdx+96]
|
|
||||||
mov esi, [rdx+100]
|
|
||||||
mov edi, [rdx+104]
|
|
||||||
mov ebp, [rdx+108]
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_template_mainloop):
|
|
||||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
|
||||||
movd xmm0, r15
|
|
||||||
movd xmm4, rsp
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
lea rdx, QWORD PTR [r9+r11]
|
|
||||||
|
|
||||||
aesenc xmm5, xmm4
|
|
||||||
movd r10d, xmm5
|
|
||||||
and r10d, 2097136
|
|
||||||
|
|
||||||
mov r12d, r9d
|
|
||||||
mov eax, r9d
|
|
||||||
xor r9d, 48
|
|
||||||
xor r12d, 16
|
|
||||||
xor eax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
|
||||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
|
||||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
|
||||||
paddq xmm0, xmm7
|
|
||||||
paddq xmm2, xmm6
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm0
|
|
||||||
movd r12, xmm5
|
|
||||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
|
||||||
|
|
||||||
movdqa xmm0, xmm5
|
|
||||||
pxor xmm0, xmm6
|
|
||||||
movdqu XMMWORD PTR [rdx], xmm0
|
|
||||||
|
|
||||||
lea r13d, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or r13, rdx
|
|
||||||
|
|
||||||
xor r13, QWORD PTR [r10+r11]
|
|
||||||
mov r14, QWORD PTR [r10+r11+8]
|
|
||||||
|
|
||||||
movd eax, xmm6
|
|
||||||
movd edx, xmm7
|
|
||||||
pextrd r9d, xmm7, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_template_part2):
|
|
||||||
mov rax, r13
|
|
||||||
mul r12
|
|
||||||
movd xmm0, rax
|
|
||||||
movd xmm3, rdx
|
|
||||||
punpcklqdq xmm3, xmm0
|
|
||||||
|
|
||||||
mov r9d, r10d
|
|
||||||
mov r12d, r10d
|
|
||||||
xor r9d, 16
|
|
||||||
xor r12d, 32
|
|
||||||
xor r10d, 48
|
|
||||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
|
||||||
xor rdx, QWORD PTR [r12+r11]
|
|
||||||
xor rax, QWORD PTR [r11+r12+8]
|
|
||||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
|
||||||
pxor xmm3, xmm2
|
|
||||||
paddq xmm7, XMMWORD PTR [r10+r11]
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
paddq xmm3, xmm6
|
|
||||||
movdqu XMMWORD PTR [r9+r11], xmm7
|
|
||||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
|
||||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
|
||||||
|
|
||||||
movdqa xmm7, xmm6
|
|
||||||
add r15, rax
|
|
||||||
add rsp, rdx
|
|
||||||
xor r10, 48
|
|
||||||
mov QWORD PTR [r10+r11], rsp
|
|
||||||
xor rsp, r13
|
|
||||||
mov r9d, esp
|
|
||||||
mov QWORD PTR [r10+r11+8], r15
|
|
||||||
and r9d, 2097136
|
|
||||||
xor r15, r14
|
|
||||||
movdqa xmm6, xmm5
|
|
||||||
dec r8d
|
|
||||||
jnz FN_PREFIX(CryptonightWOW_template_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_template_part3):
|
|
||||||
movd rsp, xmm9
|
|
||||||
|
|
||||||
mov rbx, QWORD PTR [rsp+136]
|
|
||||||
mov rbp, QWORD PTR [rsp+144]
|
|
||||||
mov rsi, QWORD PTR [rsp+152]
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp]
|
|
||||||
add rsp, 64
|
|
||||||
pop rdi
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop r11
|
|
||||||
pop r10
|
|
||||||
ret 0
|
|
||||||
FN_PREFIX(CryptonightWOW_template_end):
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_part1):
|
|
||||||
mov QWORD PTR [rsp+24], rbx
|
|
||||||
push rbp
|
|
||||||
push rsi
|
|
||||||
push rdi
|
|
||||||
push r12
|
|
||||||
push r13
|
|
||||||
push r14
|
|
||||||
push r15
|
|
||||||
sub rsp, 320
|
|
||||||
mov r14, QWORD PTR [rcx+32]
|
|
||||||
mov r8, rcx
|
|
||||||
xor r14, QWORD PTR [rcx]
|
|
||||||
mov r12, QWORD PTR [rcx+40]
|
|
||||||
mov ebx, r14d
|
|
||||||
mov rsi, QWORD PTR [rcx+224]
|
|
||||||
and ebx, 2097136
|
|
||||||
xor r12, QWORD PTR [rcx+8]
|
|
||||||
mov rcx, QWORD PTR [rcx+56]
|
|
||||||
xor rcx, QWORD PTR [r8+24]
|
|
||||||
mov rax, QWORD PTR [r8+48]
|
|
||||||
xor rax, QWORD PTR [r8+16]
|
|
||||||
mov r15, QWORD PTR [rdx+32]
|
|
||||||
xor r15, QWORD PTR [rdx]
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [r8+88]
|
|
||||||
xor rcx, QWORD PTR [r8+72]
|
|
||||||
mov r13, QWORD PTR [rdx+40]
|
|
||||||
mov rdi, QWORD PTR [rdx+224]
|
|
||||||
xor r13, QWORD PTR [rdx+8]
|
|
||||||
movaps XMMWORD PTR [rsp+160], xmm6
|
|
||||||
movaps XMMWORD PTR [rsp+176], xmm7
|
|
||||||
movaps XMMWORD PTR [rsp+192], xmm8
|
|
||||||
movaps XMMWORD PTR [rsp+208], xmm9
|
|
||||||
movaps XMMWORD PTR [rsp+224], xmm10
|
|
||||||
movaps XMMWORD PTR [rsp+240], xmm11
|
|
||||||
movaps XMMWORD PTR [rsp+256], xmm12
|
|
||||||
movaps XMMWORD PTR [rsp+272], xmm13
|
|
||||||
movaps XMMWORD PTR [rsp+288], xmm14
|
|
||||||
movaps XMMWORD PTR [rsp+304], xmm15
|
|
||||||
movd xmm7, rax
|
|
||||||
mov rax, QWORD PTR [r8+80]
|
|
||||||
xor rax, QWORD PTR [r8+64]
|
|
||||||
|
|
||||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
|
||||||
movaps xmm2, XMMWORD PTR [r8+96]
|
|
||||||
movaps XMMWORD PTR [rsp], xmm1
|
|
||||||
movaps XMMWORD PTR [rsp+16], xmm2
|
|
||||||
|
|
||||||
mov r8d, r15d
|
|
||||||
punpcklqdq xmm7, xmm0
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [rdx+56]
|
|
||||||
xor rcx, QWORD PTR [rdx+24]
|
|
||||||
movd xmm9, rax
|
|
||||||
mov QWORD PTR [rsp+128], rsi
|
|
||||||
mov rax, QWORD PTR [rdx+48]
|
|
||||||
xor rax, QWORD PTR [rdx+16]
|
|
||||||
punpcklqdq xmm9, xmm0
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov rcx, QWORD PTR [rdx+88]
|
|
||||||
xor rcx, QWORD PTR [rdx+72]
|
|
||||||
movd xmm8, rax
|
|
||||||
mov QWORD PTR [rsp+136], rdi
|
|
||||||
mov rax, QWORD PTR [rdx+80]
|
|
||||||
xor rax, QWORD PTR [rdx+64]
|
|
||||||
punpcklqdq xmm8, xmm0
|
|
||||||
and r8d, 2097136
|
|
||||||
movd xmm0, rcx
|
|
||||||
mov r11d, 524288
|
|
||||||
movd xmm10, rax
|
|
||||||
punpcklqdq xmm10, xmm0
|
|
||||||
|
|
||||||
movd xmm14, QWORD PTR [rsp+128]
|
|
||||||
movd xmm15, QWORD PTR [rsp+136]
|
|
||||||
|
|
||||||
ALIGN(64)
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_mainloop):
|
|
||||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
|
||||||
movd xmm0, r12
|
|
||||||
mov ecx, ebx
|
|
||||||
movd xmm3, r14
|
|
||||||
punpcklqdq xmm3, xmm0
|
|
||||||
xor ebx, 16
|
|
||||||
aesenc xmm6, xmm3
|
|
||||||
movd rdx, xmm6
|
|
||||||
movd xmm4, r15
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
|
||||||
xor ebx, 48
|
|
||||||
paddq xmm0, xmm7
|
|
||||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
|
||||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
|
||||||
paddq xmm1, xmm3
|
|
||||||
xor ebx, 16
|
|
||||||
mov eax, ebx
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
|
||||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
|
||||||
paddq xmm0, xmm9
|
|
||||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
|
||||||
movdqa xmm0, xmm6
|
|
||||||
pxor xmm0, xmm7
|
|
||||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
|
||||||
mov esi, edx
|
|
||||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
|
||||||
and esi, 2097136
|
|
||||||
mov ecx, r8d
|
|
||||||
movd xmm0, r13
|
|
||||||
punpcklqdq xmm4, xmm0
|
|
||||||
xor r8d, 16
|
|
||||||
aesenc xmm5, xmm4
|
|
||||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
|
||||||
xor r8d, 48
|
|
||||||
paddq xmm0, xmm8
|
|
||||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
|
||||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
|
||||||
paddq xmm1, xmm4
|
|
||||||
xor r8d, 16
|
|
||||||
mov eax, r8d
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
|
||||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
|
||||||
paddq xmm0, xmm10
|
|
||||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
|
||||||
movdqa xmm0, xmm5
|
|
||||||
pxor xmm0, xmm8
|
|
||||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
|
||||||
movd rdi, xmm5
|
|
||||||
movd rcx, xmm14
|
|
||||||
mov ebp, edi
|
|
||||||
mov r8, QWORD PTR [rcx+rsi]
|
|
||||||
mov r10, QWORD PTR [rcx+rsi+8]
|
|
||||||
lea r9, QWORD PTR [rcx+rsi]
|
|
||||||
xor esi, 16
|
|
||||||
|
|
||||||
movd xmm0, rsp
|
|
||||||
movd xmm1, rsi
|
|
||||||
movd xmm2, rdi
|
|
||||||
movd xmm11, rbp
|
|
||||||
movd xmm12, r15
|
|
||||||
movd xmm13, rdx
|
|
||||||
mov [rsp+104], rcx
|
|
||||||
mov [rsp+112], r9
|
|
||||||
|
|
||||||
mov ebx, DWORD PTR [rsp+16]
|
|
||||||
mov esi, DWORD PTR [rsp+20]
|
|
||||||
mov edi, DWORD PTR [rsp+24]
|
|
||||||
mov ebp, DWORD PTR [rsp+28]
|
|
||||||
|
|
||||||
lea eax, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
xor r8, rax
|
|
||||||
|
|
||||||
movd esp, xmm3
|
|
||||||
pextrd r15d, xmm3, 2
|
|
||||||
movd eax, xmm7
|
|
||||||
movd edx, xmm9
|
|
||||||
pextrd r9d, xmm9, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_part2):
|
|
||||||
|
|
||||||
movd rsp, xmm0
|
|
||||||
mov DWORD PTR [rsp+16], ebx
|
|
||||||
mov DWORD PTR [rsp+20], esi
|
|
||||||
mov DWORD PTR [rsp+24], edi
|
|
||||||
mov DWORD PTR [rsp+28], ebp
|
|
||||||
|
|
||||||
movd rsi, xmm1
|
|
||||||
movd rdi, xmm2
|
|
||||||
movd rbp, xmm11
|
|
||||||
movd r15, xmm12
|
|
||||||
movd rdx, xmm13
|
|
||||||
mov rcx, [rsp+104]
|
|
||||||
mov r9, [rsp+112]
|
|
||||||
|
|
||||||
mov rbx, r8
|
|
||||||
mov rax, r8
|
|
||||||
mul rdx
|
|
||||||
and ebp, 2097136
|
|
||||||
mov r8, rax
|
|
||||||
movd xmm1, rdx
|
|
||||||
movd xmm0, r8
|
|
||||||
punpcklqdq xmm1, xmm0
|
|
||||||
pxor xmm1, XMMWORD PTR [rcx+rsi]
|
|
||||||
xor esi, 48
|
|
||||||
paddq xmm1, xmm7
|
|
||||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
|
||||||
xor rdx, QWORD PTR [rsi+rcx]
|
|
||||||
paddq xmm2, xmm3
|
|
||||||
xor r8, QWORD PTR [rsi+rcx+8]
|
|
||||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
|
||||||
xor esi, 16
|
|
||||||
mov eax, esi
|
|
||||||
mov rsi, rcx
|
|
||||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
|
||||||
paddq xmm0, xmm9
|
|
||||||
add r12, r8
|
|
||||||
xor rax, 32
|
|
||||||
add r14, rdx
|
|
||||||
movdqa xmm9, xmm7
|
|
||||||
movdqa xmm7, xmm6
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
|
||||||
mov QWORD PTR [r9+8], r12
|
|
||||||
xor r12, r10
|
|
||||||
mov QWORD PTR [r9], r14
|
|
||||||
movd rcx, xmm15
|
|
||||||
xor r14, rbx
|
|
||||||
mov r10d, ebp
|
|
||||||
mov ebx, r14d
|
|
||||||
xor ebp, 16
|
|
||||||
and ebx, 2097136
|
|
||||||
mov r8, QWORD PTR [r10+rcx]
|
|
||||||
mov r9, QWORD PTR [r10+rcx+8]
|
|
||||||
|
|
||||||
movd xmm0, rsp
|
|
||||||
movd xmm1, rbx
|
|
||||||
movd xmm2, rsi
|
|
||||||
movd xmm11, rdi
|
|
||||||
movd xmm12, rbp
|
|
||||||
movd xmm13, r15
|
|
||||||
mov [rsp+104], rcx
|
|
||||||
mov [rsp+112], r9
|
|
||||||
|
|
||||||
mov ebx, DWORD PTR [rsp]
|
|
||||||
mov esi, DWORD PTR [rsp+4]
|
|
||||||
mov edi, DWORD PTR [rsp+8]
|
|
||||||
mov ebp, DWORD PTR [rsp+12]
|
|
||||||
|
|
||||||
lea eax, [ebx+esi]
|
|
||||||
lea edx, [edi+ebp]
|
|
||||||
shl rdx, 32
|
|
||||||
or rax, rdx
|
|
||||||
|
|
||||||
xor r8, rax
|
|
||||||
movd xmm3, r8
|
|
||||||
|
|
||||||
movd esp, xmm4
|
|
||||||
pextrd r15d, xmm4, 2
|
|
||||||
movd eax, xmm8
|
|
||||||
movd edx, xmm10
|
|
||||||
pextrd r9d, xmm10, 2
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_part3):
|
|
||||||
|
|
||||||
movd rsp, xmm0
|
|
||||||
mov DWORD PTR [rsp], ebx
|
|
||||||
mov DWORD PTR [rsp+4], esi
|
|
||||||
mov DWORD PTR [rsp+8], edi
|
|
||||||
mov DWORD PTR [rsp+12], ebp
|
|
||||||
|
|
||||||
movd rbx, xmm1
|
|
||||||
movd rsi, xmm2
|
|
||||||
movd rdi, xmm11
|
|
||||||
movd rbp, xmm12
|
|
||||||
movd r15, xmm13
|
|
||||||
mov rcx, [rsp+104]
|
|
||||||
mov r9, [rsp+112]
|
|
||||||
|
|
||||||
mov rax, r8
|
|
||||||
mul rdi
|
|
||||||
movd xmm1, rdx
|
|
||||||
movd xmm0, rax
|
|
||||||
punpcklqdq xmm1, xmm0
|
|
||||||
mov rdi, rcx
|
|
||||||
mov r8, rax
|
|
||||||
pxor xmm1, XMMWORD PTR [rbp+rcx]
|
|
||||||
xor ebp, 48
|
|
||||||
paddq xmm1, xmm8
|
|
||||||
xor r8, QWORD PTR [rbp+rcx+8]
|
|
||||||
xor rdx, QWORD PTR [rbp+rcx]
|
|
||||||
add r13, r8
|
|
||||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
|
||||||
add r15, rdx
|
|
||||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
|
||||||
paddq xmm2, xmm4
|
|
||||||
xor ebp, 16
|
|
||||||
mov eax, ebp
|
|
||||||
xor rax, 32
|
|
||||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
|
||||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
|
||||||
paddq xmm0, xmm10
|
|
||||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
|
||||||
movd rax, xmm3
|
|
||||||
movdqa xmm10, xmm8
|
|
||||||
mov QWORD PTR [r10+rcx], r15
|
|
||||||
movdqa xmm8, xmm5
|
|
||||||
xor r15, rax
|
|
||||||
mov QWORD PTR [r10+rcx+8], r13
|
|
||||||
mov r8d, r15d
|
|
||||||
xor r13, r9
|
|
||||||
and r8d, 2097136
|
|
||||||
dec r11d
|
|
||||||
jnz FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
|
||||||
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_part4):
|
|
||||||
|
|
||||||
mov rbx, QWORD PTR [rsp+400]
|
|
||||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
|
||||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
|
||||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
|
||||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
|
||||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
|
||||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
|
||||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
|
||||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
|
||||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
|
||||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
|
||||||
add rsp, 320
|
|
||||||
pop r15
|
|
||||||
pop r14
|
|
||||||
pop r13
|
|
||||||
pop r12
|
|
||||||
pop rdi
|
|
||||||
pop rsi
|
|
||||||
pop rbp
|
|
||||||
ret 0
|
|
||||||
FN_PREFIX(CryptonightWOW_template_double_end):
|
|
410
src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc
Normal file
410
src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc
Normal file
|
@ -0,0 +1,410 @@
|
||||||
|
mov rax, rsp
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 184
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+272]
|
||||||
|
mov DWORD PTR [rsp+276], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+276]
|
||||||
|
|
||||||
|
mov r13, QWORD PTR [rcx+224]
|
||||||
|
mov r9, rdx
|
||||||
|
mov r10, QWORD PTR [rcx+32]
|
||||||
|
mov r8, rcx
|
||||||
|
xor r10, QWORD PTR [rcx]
|
||||||
|
mov r14d, 393216
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rsi, QWORD PTR [rdx+224]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov rdi, QWORD PTR [r9+32]
|
||||||
|
xor rdi, QWORD PTR [r9]
|
||||||
|
mov rbp, QWORD PTR [r9+40]
|
||||||
|
xor rbp, QWORD PTR [r9+8]
|
||||||
|
movd xmm0, rdx
|
||||||
|
movaps XMMWORD PTR [rax-88], xmm6
|
||||||
|
movaps XMMWORD PTR [rax-104], xmm7
|
||||||
|
movaps XMMWORD PTR [rax-120], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm13
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm14
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm15
|
||||||
|
mov rdx, r10
|
||||||
|
movd xmm4, QWORD PTR [r8+96]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xorps xmm13, xmm13
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r8+72]
|
||||||
|
movd xmm5, QWORD PTR [r8+104]
|
||||||
|
movd xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
shl rax, 52
|
||||||
|
movd xmm14, rax
|
||||||
|
punpcklqdq xmm14, xmm14
|
||||||
|
|
||||||
|
mov eax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movd xmm12, rax
|
||||||
|
punpcklqdq xmm12, xmm12
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [r8+80]
|
||||||
|
xor rax, QWORD PTR [r8+64]
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
movd xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r9+56]
|
||||||
|
xor rcx, QWORD PTR [r9+24]
|
||||||
|
movd xmm3, rax
|
||||||
|
mov rax, QWORD PTR [r9+48]
|
||||||
|
xor rax, QWORD PTR [r9+16]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movd xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp], r13
|
||||||
|
mov rcx, QWORD PTR [r9+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movd xmm6, rax
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
movd xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp+256], r10
|
||||||
|
mov rcx, rdi
|
||||||
|
mov QWORD PTR [rsp+264], r11
|
||||||
|
movd xmm8, rax
|
||||||
|
and ecx, 2097136
|
||||||
|
punpcklqdq xmm8, xmm0
|
||||||
|
movd xmm0, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movd xmm0, QWORD PTR [r9+104]
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
movdqu xmm11, XMMWORD PTR [r8]
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
rwz_main_loop_double:
|
||||||
|
movdqu xmm9, xmm15
|
||||||
|
mov eax, edx
|
||||||
|
mov ebx, edx
|
||||||
|
xor eax, 16
|
||||||
|
xor ebx, 32
|
||||||
|
xor edx, 48
|
||||||
|
|
||||||
|
movd xmm0, r11
|
||||||
|
movd xmm2, r10
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
aesenc xmm9, xmm2
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||||
|
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||||
|
|
||||||
|
movd r11, xmm9
|
||||||
|
mov edx, r11d
|
||||||
|
and edx, 2097136
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
pxor xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r9], xmm0
|
||||||
|
|
||||||
|
lea rbx, QWORD PTR [rdx+r13]
|
||||||
|
mov r10, QWORD PTR [rdx+r13]
|
||||||
|
|
||||||
|
movdqu xmm10, xmm11
|
||||||
|
movd xmm0, rbp
|
||||||
|
movd xmm11, rdi
|
||||||
|
punpcklqdq xmm11, xmm0
|
||||||
|
aesenc xmm10, xmm11
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
mov r12d, ecx
|
||||||
|
xor eax, 16
|
||||||
|
xor r12d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||||
|
|
||||||
|
movd rcx, xmm10
|
||||||
|
and ecx, 2097136
|
||||||
|
|
||||||
|
movdqa xmm0, xmm10
|
||||||
|
pxor xmm0, xmm6
|
||||||
|
movdqu XMMWORD PTR [r8], xmm0
|
||||||
|
mov r12, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov r9, QWORD PTR [rbx+8]
|
||||||
|
|
||||||
|
xor edx, 16
|
||||||
|
mov r8d, edx
|
||||||
|
mov r15d, edx
|
||||||
|
|
||||||
|
movd rdx, xmm5
|
||||||
|
shl rdx, 32
|
||||||
|
movd rax, xmm4
|
||||||
|
xor rdx, rax
|
||||||
|
xor r10, rdx
|
||||||
|
mov rax, r10
|
||||||
|
mul r11
|
||||||
|
mov r11d, r8d
|
||||||
|
xor r11d, 48
|
||||||
|
movd xmm0, rdx
|
||||||
|
xor rdx, [r11+r13]
|
||||||
|
movd xmm1, rax
|
||||||
|
xor rax, [r11+r13+8]
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm0
|
||||||
|
xor r8d, 32
|
||||||
|
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||||
|
|
||||||
|
mov r11, QWORD PTR [rsp+256]
|
||||||
|
add r11, rdx
|
||||||
|
mov rdx, QWORD PTR [rsp+264]
|
||||||
|
add rdx, rax
|
||||||
|
mov QWORD PTR [rbx], r11
|
||||||
|
xor r11, r10
|
||||||
|
mov QWORD PTR [rbx+8], rdx
|
||||||
|
xor rdx, r9
|
||||||
|
mov QWORD PTR [rsp+256], r11
|
||||||
|
and r11d, 2097136
|
||||||
|
mov QWORD PTR [rsp+264], rdx
|
||||||
|
mov QWORD PTR [rsp+8], r11
|
||||||
|
lea r15, QWORD PTR [r11+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||||
|
lea r13, QWORD PTR [rsi+rcx]
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movaps xmm2, xmm13
|
||||||
|
movd r10, xmm0
|
||||||
|
psllq xmm5, 1
|
||||||
|
shl r10, 32
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movdqa xmm1, xmm10
|
||||||
|
movd r11, xmm0
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movd r8, xmm1
|
||||||
|
psrldq xmm4, 8
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
movd rax, xmm4
|
||||||
|
xor r10, rax
|
||||||
|
movaps xmm1, xmm13
|
||||||
|
xor r10, r12
|
||||||
|
lea rax, QWORD PTR [r11+1]
|
||||||
|
shr rax, 1
|
||||||
|
movdqa xmm3, xmm9
|
||||||
|
punpcklqdq xmm3, xmm10
|
||||||
|
paddq xmm5, xmm3
|
||||||
|
movd rdx, xmm5
|
||||||
|
psrldq xmm5, 8
|
||||||
|
cvtsi2sd xmm2, rax
|
||||||
|
or edx, -2147483647
|
||||||
|
lea rax, QWORD PTR [r8+1]
|
||||||
|
shr rax, 1
|
||||||
|
movd r9, xmm5
|
||||||
|
cvtsi2sd xmm0, rax
|
||||||
|
or r9d, -2147483647
|
||||||
|
cvtsi2sd xmm1, rdx
|
||||||
|
unpcklpd xmm2, xmm0
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
cvtsi2sd xmm0, r9
|
||||||
|
unpcklpd xmm1, xmm0
|
||||||
|
divpd xmm2, xmm1
|
||||||
|
paddq xmm2, xmm14
|
||||||
|
cvttsd2si rax, xmm2
|
||||||
|
psrldq xmm2, 8
|
||||||
|
mov rbx, rax
|
||||||
|
imul rax, rdx
|
||||||
|
sub r11, rax
|
||||||
|
js rwz_div_fix_1
|
||||||
|
rwz_div_fix_1_ret:
|
||||||
|
|
||||||
|
cvttsd2si rdx, xmm2
|
||||||
|
mov rax, rdx
|
||||||
|
imul rax, r9
|
||||||
|
movd xmm2, r11d
|
||||||
|
movd xmm4, ebx
|
||||||
|
sub r8, rax
|
||||||
|
js rwz_div_fix_2
|
||||||
|
rwz_div_fix_2_ret:
|
||||||
|
|
||||||
|
movd xmm1, r8d
|
||||||
|
movd xmm0, edx
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
punpckldq xmm4, xmm0
|
||||||
|
punpckldq xmm4, xmm2
|
||||||
|
paddq xmm3, xmm4
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm12
|
||||||
|
sqrtpd xmm1, xmm0
|
||||||
|
movd r9, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
psrlq xmm5, 19
|
||||||
|
test r9, 524287
|
||||||
|
je rwz_sqrt_fix_1
|
||||||
|
rwz_sqrt_fix_1_ret:
|
||||||
|
|
||||||
|
movd r9, xmm10
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movd r8, xmm1
|
||||||
|
test r8, 524287
|
||||||
|
je rwz_sqrt_fix_2
|
||||||
|
rwz_sqrt_fix_2_ret:
|
||||||
|
|
||||||
|
mov r12d, ecx
|
||||||
|
mov r8d, ecx
|
||||||
|
xor r12d, 16
|
||||||
|
xor r8d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
mov rax, r10
|
||||||
|
mul r9
|
||||||
|
movd xmm0, rax
|
||||||
|
movd xmm3, rdx
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||||
|
xor rdx, [r8+rsi]
|
||||||
|
xor rax, [r8+rsi+8]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm3, xmm6
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [r8+rsi], xmm3
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
|
||||||
|
add rdi, rdx
|
||||||
|
mov QWORD PTR [r13], rdi
|
||||||
|
xor rdi, r10
|
||||||
|
mov ecx, edi
|
||||||
|
and ecx, 2097136
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov rdx, QWORD PTR [r13+8]
|
||||||
|
add rbp, rax
|
||||||
|
mov QWORD PTR [r13+8], rbp
|
||||||
|
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||||
|
xor rbp, rdx
|
||||||
|
mov r13, QWORD PTR [rsp]
|
||||||
|
movdqa xmm3, xmm7
|
||||||
|
mov rdx, QWORD PTR [rsp+8]
|
||||||
|
movdqa xmm8, xmm6
|
||||||
|
mov r10, QWORD PTR [rsp+256]
|
||||||
|
movdqa xmm7, xmm9
|
||||||
|
mov r11, QWORD PTR [rsp+264]
|
||||||
|
movdqa xmm6, xmm10
|
||||||
|
mov r9, r15
|
||||||
|
dec r14d
|
||||||
|
jne rwz_main_loop_double
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+272]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+184]
|
||||||
|
movaps xmm6, XMMWORD PTR [r11-24]
|
||||||
|
movaps xmm7, XMMWORD PTR [r11-40]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-56]
|
||||||
|
movaps xmm9, XMMWORD PTR [r11-72]
|
||||||
|
movaps xmm10, XMMWORD PTR [r11-88]
|
||||||
|
movaps xmm11, XMMWORD PTR [r11-104]
|
||||||
|
movaps xmm12, XMMWORD PTR [r11-120]
|
||||||
|
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||||
|
|
||||||
|
rwz_div_fix_1:
|
||||||
|
dec rbx
|
||||||
|
add r11, rdx
|
||||||
|
jmp rwz_div_fix_1_ret
|
||||||
|
|
||||||
|
rwz_div_fix_2:
|
||||||
|
dec rdx
|
||||||
|
add r8, r9
|
||||||
|
jmp rwz_div_fix_2_ret
|
||||||
|
|
||||||
|
rwz_sqrt_fix_1:
|
||||||
|
movd r8, xmm3
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
dec r9
|
||||||
|
mov r11d, -1022
|
||||||
|
shl r11, 32
|
||||||
|
mov rax, r9
|
||||||
|
shr r9, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r9
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+r11+1]
|
||||||
|
add rax, r11
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r8
|
||||||
|
adc r9, 0
|
||||||
|
movd xmm5, r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp rwz_sqrt_fix_1_ret
|
||||||
|
|
||||||
|
rwz_sqrt_fix_2:
|
||||||
|
psrldq xmm3, 8
|
||||||
|
movd r11, xmm3
|
||||||
|
dec r8
|
||||||
|
mov ebx, -1022
|
||||||
|
shl rbx, 32
|
||||||
|
mov rax, r8
|
||||||
|
shr r8, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r8
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+rbx+1]
|
||||||
|
add rax, rbx
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r11
|
||||||
|
adc r8, 0
|
||||||
|
movd xmm0, r8
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp rwz_sqrt_fix_2_ret
|
||||||
|
|
||||||
|
rwz_cnv2_double_mainloop_asm_endp:
|
186
src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc
Normal file
186
src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 393216
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movd xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movd xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movd xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movd xmm5, rax
|
||||||
|
|
||||||
|
xor eax, eax
|
||||||
|
mov QWORD PTR [rsp+16], rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movd xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movd xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
rwz_main_loop:
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movd xmm0, r11
|
||||||
|
movd xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movd rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movd rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movd rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movd xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||||
|
movd rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je rwz_sqrt_fixup
|
||||||
|
psrlq xmm3, 19
|
||||||
|
rwz_sqrt_fixup_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movd xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
|
movd xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm5
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm2
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
movdqu xmm6, [rdi+rbx]
|
||||||
|
mov r10d, edi
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne rwz_main_loop
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp cnv2_rwz_main_loop_endp
|
||||||
|
|
||||||
|
rwz_sqrt_fixup:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movd xmm3, rdx
|
||||||
|
jmp rwz_sqrt_fixup_ret
|
||||||
|
|
||||||
|
cnv2_rwz_main_loop_endp:
|
|
@ -5,6 +5,8 @@
|
||||||
.global cnv2_mainloop_ryzen_asm
|
.global cnv2_mainloop_ryzen_asm
|
||||||
.global cnv2_mainloop_bulldozer_asm
|
.global cnv2_mainloop_bulldozer_asm
|
||||||
.global cnv2_double_mainloop_sandybridge_asm
|
.global cnv2_double_mainloop_sandybridge_asm
|
||||||
|
.global cnv2_rwz_mainloop_asm
|
||||||
|
.global cnv2_rwz_double_mainloop_asm
|
||||||
|
|
||||||
ALIGN(64)
|
ALIGN(64)
|
||||||
cnv2_mainloop_ivybridge_asm:
|
cnv2_mainloop_ivybridge_asm:
|
||||||
|
@ -29,3 +31,15 @@ cnv2_double_mainloop_sandybridge_asm:
|
||||||
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||||
ret 0
|
ret 0
|
||||||
mov eax, 3735929054
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_mainloop_asm:
|
||||||
|
#include "cn2/cnv2_rwz_main_loop.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_double_mainloop_asm:
|
||||||
|
#include "cn2/cnv2_rwz_double_main_loop.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
|
@ -3,6 +3,8 @@ PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
PUBLIC cnv2_rwz_mainloop_asm
|
||||||
|
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
cnv2_mainloop_ivybridge_asm PROC
|
cnv2_mainloop_ivybridge_asm PROC
|
||||||
|
@ -32,5 +34,19 @@ cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
mov eax, 3735929054
|
mov eax, 3735929054
|
||||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_mainloop_asm PROC
|
||||||
|
INCLUDE cn2/cnv2_rwz_main_loop.inc
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
cnv2_rwz_mainloop_asm ENDP
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_rwz_double_mainloop_asm PROC
|
||||||
|
INCLUDE cn2/cnv2_rwz_double_main_loop.inc
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
cnv2_rwz_double_mainloop_asm ENDP
|
||||||
|
|
||||||
_TEXT_CNV2_MAINLOOP ENDS
|
_TEXT_CNV2_MAINLOOP ENDS
|
||||||
END
|
END
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
@ -21,8 +22,8 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __IWORKER_H__
|
#ifndef XMRIG_IWORKER_H
|
||||||
#define __IWORKER_H__
|
#define XMRIG_IWORKER_H
|
||||||
|
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
@ -31,7 +32,7 @@
|
||||||
class IWorker
|
class IWorker
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~IWorker() {}
|
virtual ~IWorker() = default;
|
||||||
|
|
||||||
virtual bool selfTest() = 0;
|
virtual bool selfTest() = 0;
|
||||||
virtual size_t id() const = 0;
|
virtual size_t id() const = 0;
|
||||||
|
@ -41,4 +42,4 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif // __IWORKER_H__
|
#endif // XMRIG_IWORKER_H
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#define APP_ID "xmrig"
|
#define APP_ID "xmrig"
|
||||||
#define APP_NAME "XMRig"
|
#define APP_NAME "XMRig"
|
||||||
#define APP_DESC "XMRig CPU miner"
|
#define APP_DESC "XMRig CPU miner"
|
||||||
#define APP_VERSION "2.13.1"
|
#define APP_VERSION "2.13.2-dev"
|
||||||
#define APP_DOMAIN "xmrig.com"
|
#define APP_DOMAIN "xmrig.com"
|
||||||
#define APP_SITE "www.xmrig.com"
|
#define APP_SITE "www.xmrig.com"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
|
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
|
||||||
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#define APP_VER_MAJOR 2
|
#define APP_VER_MAJOR 2
|
||||||
#define APP_VER_MINOR 13
|
#define APP_VER_MINOR 13
|
||||||
#define APP_VER_PATCH 1
|
#define APP_VER_PATCH 2
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1910)
|
# if (_MSC_VER >= 1910)
|
||||||
|
|
|
@ -96,15 +96,25 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx);
|
||||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx *ctx0, cryptonight_ctx *ctx1);
|
||||||
|
|
||||||
|
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
|
||||||
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
|
xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
|
||||||
|
xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
|
||||||
|
|
||||||
|
|
||||||
void xmrig::CpuThread::patchAsmVariants()
|
void xmrig::CpuThread::patchAsmVariants()
|
||||||
|
@ -122,15 +132,35 @@ void xmrig::CpuThread::patchAsmVariants()
|
||||||
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
|
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
|
||||||
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
|
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0x7000);
|
||||||
|
|
||||||
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
|
||||||
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000);
|
||||||
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000);
|
||||||
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0xB000);
|
||||||
|
|
||||||
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000);
|
||||||
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000);
|
||||||
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000);
|
||||||
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_double_fun> (base + 0xF000);
|
||||||
|
|
||||||
|
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
|
||||||
|
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK);
|
||||||
|
|
||||||
|
patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
|
||||||
|
patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
|
||||||
|
|
||||||
Mem::protectExecutableMemory(base, allocation_size);
|
Mem::protectExecutableMemory(base, allocation_size);
|
||||||
Mem::flushInstructionCache(base, allocation_size);
|
Mem::flushInstructionCache(base, allocation_size);
|
||||||
|
@ -148,12 +178,12 @@ bool xmrig::CpuThread::isSoftAES(AlgoVariant av)
|
||||||
template<xmrig::Algo algo, xmrig::Variant variant>
|
template<xmrig::Algo algo, xmrig::Variant variant>
|
||||||
static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX])
|
static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX])
|
||||||
{
|
{
|
||||||
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_INTEL>;
|
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_INTEL>;
|
||||||
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
|
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
|
||||||
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
|
asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
|
||||||
|
|
||||||
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_INTEL>;
|
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_INTEL>;
|
||||||
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
|
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
|
||||||
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
|
asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -180,6 +210,10 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
add_asm_func<CRYPTONIGHT_PICO, VARIANT_TRTL>(asm_func_map);
|
add_asm_func<CRYPTONIGHT_PICO, VARIANT_TRTL>(asm_func_map);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
add_asm_func<CRYPTONIGHT, VARIANT_RWZ>(asm_func_map);
|
||||||
|
add_asm_func<CRYPTONIGHT, VARIANT_ZLS>(asm_func_map);
|
||||||
|
add_asm_func<CRYPTONIGHT, VARIANT_DOUBLE>(asm_func_map);
|
||||||
|
|
||||||
asm_func_map_initialized = true;
|
asm_func_map_initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,6 +357,39 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_4>,
|
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_4>,
|
||||||
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_4>,
|
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_4>,
|
||||||
|
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_RWZ>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_RWZ>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_RWZ>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_RWZ>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_RWZ>,
|
||||||
|
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_ZLS>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_ZLS>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_ZLS>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_ZLS>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_ZLS>,
|
||||||
|
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||||
|
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_DOUBLE>,
|
||||||
|
|
||||||
# ifndef XMRIG_NO_AEON
|
# ifndef XMRIG_NO_AEON
|
||||||
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
|
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
|
||||||
cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
|
cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
|
||||||
|
@ -358,6 +425,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# else
|
# else
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||||
|
@ -373,6 +443,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# ifndef XMRIG_NO_SUMO
|
# ifndef XMRIG_NO_SUMO
|
||||||
|
@ -422,6 +495,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# else
|
# else
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||||
|
@ -437,6 +513,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# ifndef XMRIG_NO_CN_PICO
|
# ifndef XMRIG_NO_CN_PICO
|
||||||
|
@ -465,6 +544,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# else
|
# else
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
|
||||||
|
@ -480,6 +562,9 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
|
||||||
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
|
||||||
# endif
|
# endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
|
|
||||||
#include "crypto/CryptoNight_test.h"
|
#include "crypto/CryptoNight_test.h"
|
||||||
|
@ -56,23 +55,19 @@ bool MultiWorker<N>::selfTest()
|
||||||
using namespace xmrig;
|
using namespace xmrig;
|
||||||
|
|
||||||
if (m_thread->algorithm() == CRYPTONIGHT) {
|
if (m_thread->algorithm() == CRYPTONIGHT) {
|
||||||
if (!verify2(VARIANT_WOW, test_input_WOW)) {
|
const bool rc = verify(VARIANT_0, test_output_v0) &&
|
||||||
LOG_WARN("CryptonightR (Wownero) self-test failed");
|
verify(VARIANT_1, test_output_v1) &&
|
||||||
return false;
|
verify(VARIANT_2, test_output_v2) &&
|
||||||
}
|
verify(VARIANT_XTL, test_output_xtl) &&
|
||||||
if (!verify2(VARIANT_4, test_input_R)) {
|
verify(VARIANT_MSR, test_output_msr) &&
|
||||||
LOG_WARN("CryptonightR self-test failed");
|
verify(VARIANT_XAO, test_output_xao) &&
|
||||||
return false;
|
verify(VARIANT_RTO, test_output_rto) &&
|
||||||
}
|
verify(VARIANT_HALF, test_output_half) &&
|
||||||
|
verify2(VARIANT_WOW, test_output_wow) &&
|
||||||
const bool rc = verify(VARIANT_0, test_output_v0) &&
|
verify2(VARIANT_4, test_output_r) &&
|
||||||
verify(VARIANT_1, test_output_v1) &&
|
verify(VARIANT_RWZ, test_output_rwz) &&
|
||||||
verify(VARIANT_2, test_output_v2) &&
|
verify(VARIANT_ZLS, test_output_zls) &&
|
||||||
verify(VARIANT_XTL, test_output_xtl) &&
|
verify(VARIANT_DOUBLE, test_output_double);
|
||||||
verify(VARIANT_MSR, test_output_msr) &&
|
|
||||||
verify(VARIANT_XAO, test_output_xao) &&
|
|
||||||
verify(VARIANT_RTO, test_output_rto) &&
|
|
||||||
verify(VARIANT_HALF, test_output_half);
|
|
||||||
|
|
||||||
# ifndef XMRIG_NO_CN_GPU
|
# ifndef XMRIG_NO_CN_GPU
|
||||||
if (!rc || N > 1) {
|
if (!rc || N > 1) {
|
||||||
|
@ -179,61 +174,48 @@ bool MultiWorker<N>::verify(xmrig::Variant variant, const uint8_t *referenceValu
|
||||||
|
|
||||||
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
bool MultiWorker<N>::verify2(xmrig::Variant variant, const char *test_data)
|
bool MultiWorker<N>::verify2(xmrig::Variant variant, const uint8_t *referenceValue)
|
||||||
{
|
{
|
||||||
xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
|
xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
|
||||||
if (!func) {
|
if (!func) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::stringstream s(test_data);
|
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||||
std::string expected_hex;
|
const size_t size = cn_r_test_input[i].size;
|
||||||
std::string input_hex;
|
for (size_t k = 0; k < N; ++k) {
|
||||||
uint64_t height;
|
memcpy(m_state.blob + (k * size), cn_r_test_input[i].data, size);
|
||||||
while (!s.eof())
|
|
||||||
{
|
|
||||||
uint8_t referenceValue[N * 32];
|
|
||||||
uint8_t input[N * 256];
|
|
||||||
|
|
||||||
s >> expected_hex;
|
|
||||||
s >> input_hex;
|
|
||||||
s >> height;
|
|
||||||
|
|
||||||
if ((expected_hex.length() != 64) || (input_hex.length() > 512))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool err = false;
|
func(m_state.blob, size, m_hash, m_ctx, cn_r_test_input[i].height);
|
||||||
|
|
||||||
for (int i = 0; i < 32; ++i)
|
for (size_t k = 0; k < N; ++k) {
|
||||||
{
|
if (memcmp(m_hash + k * 32, referenceValue + i * 32, sizeof m_hash / N) != 0) {
|
||||||
referenceValue[i] = (hf_hex2bin(expected_hex[i * 2], err) << 4) + hf_hex2bin(expected_hex[i * 2 + 1], err);
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const size_t input_len = input_hex.length() / 2;
|
return true;
|
||||||
for (size_t i = 0; i < input_len; ++i)
|
}
|
||||||
{
|
|
||||||
input[i] = (hf_hex2bin(input_hex[i * 2], err) << 4) + hf_hex2bin(input_hex[i * 2 + 1], err);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 1; i < N; ++i)
|
template<>
|
||||||
{
|
bool MultiWorker<1>::verify2(xmrig::Variant variant, const uint8_t *referenceValue)
|
||||||
memcpy(input + i * input_len, input, input_len);
|
{
|
||||||
memcpy(referenceValue + i * 32, referenceValue, 32);
|
xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
|
||||||
}
|
if (!func) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
func(input, input_len, m_hash, m_ctx, height);
|
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||||
if (memcmp(m_hash, referenceValue, sizeof m_hash) != 0)
|
func(cn_r_test_input[i].data, cn_r_test_input[i].size, m_hash, m_ctx, cn_r_test_input[i].height);
|
||||||
{
|
|
||||||
|
if (memcmp(m_hash, referenceValue + i * 32, sizeof m_hash) != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ protected:
|
||||||
private:
|
private:
|
||||||
bool resume(const xmrig::Job &job);
|
bool resume(const xmrig::Job &job);
|
||||||
bool verify(xmrig::Variant variant, const uint8_t *referenceValue);
|
bool verify(xmrig::Variant variant, const uint8_t *referenceValue);
|
||||||
bool verify2(xmrig::Variant variant, const char *test_data);
|
bool verify2(xmrig::Variant variant, const uint8_t *referenceValue);
|
||||||
void consumeJob();
|
void consumeJob();
|
||||||
void save(const xmrig::Job &job);
|
void save(const xmrig::Job &job);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue