Merge branch 'feature-cn-variant2' into dev

This commit is contained in:
XMRig 2018-09-13 11:16:10 +03:00
commit 310ad7fa37
17 changed files with 577 additions and 458 deletions

View file

@ -5,6 +5,7 @@ option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON) option(WITH_AEON "CryptoNight-Lite support" ON)
option(WITH_SUMO "CryptoNight-Heavy support" ON) option(WITH_SUMO "CryptoNight-Heavy support" ON)
option(WITH_HTTPD "HTTP REST API" ON) option(WITH_HTTPD "HTTP REST API" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
include (CheckIncludeFile) include (CheckIncludeFile)
@ -170,7 +171,6 @@ endif()
add_definitions(/D__STDC_FORMAT_MACROS) add_definitions(/D__STDC_FORMAT_MACROS)
add_definitions(/DUNICODE) add_definitions(/DUNICODE)
#add_definitions(/DAPP_DEBUG)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@ -246,5 +246,9 @@ if (BUILD_STATIC)
set(CMAKE_EXE_LINKER_FLAGS " -static") set(CMAKE_EXE_LINKER_FLAGS " -static")
endif() endif()
if (WITH_DEBUG_LOG)
add_definitions(/DAPP_DEBUG)
endif()
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES}) add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES})
target_link_libraries(${PROJECT_NAME} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) target_link_libraries(${PROJECT_NAME} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -60,6 +61,7 @@ static AlgoData const algorithms[] = {
{ "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR },
{ "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO },
{ "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO },
{ "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
# ifndef XMRIG_NO_AEON # ifndef XMRIG_NO_AEON
{ "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO },
@ -81,6 +83,8 @@ static AlgoData const algorithms[] = {
static AlgoData const xmrStakAlgorithms[] = { static AlgoData const xmrStakAlgorithms[] = {
{ "cryptonight-monerov7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, { "cryptonight-monerov7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
{ "cryptonight_v7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, { "cryptonight_v7", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_1 },
{ "cryptonight-monerov8", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
{ "cryptonight_v8", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_2 },
{ "cryptonight_v7_stellite", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, { "cryptonight_v7_stellite", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL },
{ "cryptonight_lite", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 }, { "cryptonight_lite", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 },
{ "cryptonight-aeonv7", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 }, { "cryptonight-aeonv7", nullptr, xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 },
@ -103,7 +107,8 @@ static const char *variants[] = {
"msr", "msr",
"xhv", "xhv",
"xao", "xao",
"rto" "rto",
"2",
}; };
@ -172,11 +177,21 @@ void xmrig::Algorithm::parseVariant(const char *variant)
void xmrig::Algorithm::parseVariant(int variant) void xmrig::Algorithm::parseVariant(int variant)
{ {
if (variant >= VARIANT_AUTO && variant < VARIANT_MAX) { assert(variant >= -1 && variant <= 2);
switch (variant) {
case -1:
case 0:
case 1:
m_variant = static_cast<Variant>(variant); m_variant = static_cast<Variant>(variant);
} break;
else {
assert(false); case 2:
m_variant = VARIANT_2;
break;
default:
break;
} }
} }

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -22,8 +23,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __ALGORITHM_H__ #ifndef XMRIG_ALGORITHM_H
#define __ALGORITHM_H__ #define XMRIG_ALGORITHM_H
#include <vector> #include <vector>

View file

@ -270,17 +270,17 @@ bool Client::parseJob(const rapidjson::Value &params, int *code)
} }
if (params.HasMember("algo")) { if (params.HasMember("algo")) {
job.algorithm().parseAlgorithm(params["algo"].GetString()); job.setAlgorithm(params["algo"].GetString());
} }
if (params.HasMember("variant")) { if (params.HasMember("variant")) {
const rapidjson::Value &variant = params["variant"]; const rapidjson::Value &variant = params["variant"];
if (variant.IsInt()) { if (variant.IsInt()) {
job.algorithm().parseVariant(variant.GetInt()); job.setVariant(variant.GetInt());
} }
else if (variant.IsString()){ else if (variant.IsString()){
job.algorithm().parseVariant(variant.GetString()); job.setVariant(variant.GetString());
} }
} }

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -58,6 +59,7 @@ static inline char hf_bin2hex(unsigned char c)
Job::Job() : Job::Job() :
m_autoVariant(false),
m_nicehash(false), m_nicehash(false),
m_poolId(-2), m_poolId(-2),
m_threadId(-1), m_threadId(-1),
@ -69,7 +71,8 @@ Job::Job() :
} }
Job::Job(int poolId, bool nicehash, xmrig::Algorithm algorithm, const xmrig::Id &clientId) : Job::Job(int poolId, bool nicehash, const xmrig::Algorithm &algorithm, const xmrig::Id &clientId) :
m_autoVariant(algorithm.variant() == xmrig::VARIANT_AUTO),
m_nicehash(nicehash), m_nicehash(nicehash),
m_poolId(poolId), m_poolId(poolId),
m_threadId(-1), m_threadId(-1),
@ -112,6 +115,10 @@ bool Job::setBlob(const char *blob)
m_nicehash = true; m_nicehash = true;
} }
if (m_autoVariant) {
m_algorithm.setVariant(variant());
}
# ifdef XMRIG_PROXY_PROJECT # ifdef XMRIG_PROXY_PROJECT
memset(m_rawBlob, 0, sizeof(m_rawBlob)); memset(m_rawBlob, 0, sizeof(m_rawBlob));
memcpy(m_rawBlob, blob, m_size * 2); memcpy(m_rawBlob, blob, m_size * 2);
@ -163,28 +170,6 @@ bool Job::setTarget(const char *target)
} }
xmrig::Variant Job::variant() const
{
if (m_algorithm.variant() == xmrig::VARIANT_XTL && m_blob[0] < 4) {
return xmrig::VARIANT_1;
}
if (m_algorithm.variant() == xmrig::VARIANT_MSR && m_blob[0] < 7) {
return xmrig::VARIANT_1;
}
if (m_algorithm.variant() == xmrig::VARIANT_XHV && m_blob[0] < 3) {
return xmrig::VARIANT_0;
}
if (m_algorithm.variant() == xmrig::VARIANT_AUTO) {
return m_algorithm.algo() == xmrig::CRYPTONIGHT_HEAVY ? xmrig::VARIANT_0 : xmrig::VARIANT_1;
}
return m_algorithm.variant();
}
bool Job::fromHex(const char* in, unsigned int len, unsigned char* out) bool Job::fromHex(const char* in, unsigned int len, unsigned char* out)
{ {
bool error = false; bool error = false;
@ -229,3 +214,25 @@ bool Job::operator!=(const Job &other) const
{ {
return m_id != other.m_id || memcmp(m_blob, other.m_blob, sizeof(m_blob)) != 0; return m_id != other.m_id || memcmp(m_blob, other.m_blob, sizeof(m_blob)) != 0;
} }
xmrig::Variant Job::variant() const
{
using namespace xmrig;
switch (m_algorithm.algo()) {
case CRYPTONIGHT:
return (m_blob[0] >= 8) ? VARIANT_2 : VARIANT_1;
case CRYPTONIGHT_LITE:
return VARIANT_1;
case CRYPTONIGHT_HEAVY:
return VARIANT_0;
default:
break;
}
return m_algorithm.variant();
}

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -22,8 +23,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __JOB_H__ #ifndef XMRIG_JOB_H
#define __JOB_H__ #define XMRIG_JOB_H
#include <stddef.h> #include <stddef.h>
@ -38,12 +39,11 @@ class Job
{ {
public: public:
Job(); Job();
Job(int poolId, bool nicehash, xmrig::Algorithm algorithm, const xmrig::Id &clientId); Job(int poolId, bool nicehash, const xmrig::Algorithm &algorithm, const xmrig::Id &clientId);
~Job(); ~Job();
bool setBlob(const char *blob); bool setBlob(const char *blob);
bool setTarget(const char *target); bool setTarget(const char *target);
xmrig::Variant variant() const;
inline bool isNicehash() const { return m_nicehash; } inline bool isNicehash() const { return m_nicehash; }
inline bool isValid() const { return m_size > 0 && m_diff > 0; } inline bool isValid() const { return m_size > 0 && m_diff > 0; }
@ -60,10 +60,12 @@ public:
inline uint32_t diff() const { return static_cast<uint32_t>(m_diff); } inline uint32_t diff() const { return static_cast<uint32_t>(m_diff); }
inline uint64_t target() const { return m_target; } inline uint64_t target() const { return m_target; }
inline void reset() { m_size = 0; m_diff = 0; } inline void reset() { m_size = 0; m_diff = 0; }
inline void setAlgorithm(const char *algo) { m_algorithm.parseAlgorithm(algo); }
inline void setClientId(const xmrig::Id &id) { m_clientId = id; } inline void setClientId(const xmrig::Id &id) { m_clientId = id; }
inline void setPoolId(int poolId) { m_poolId = poolId; } inline void setPoolId(int poolId) { m_poolId = poolId; }
inline void setThreadId(int threadId) { m_threadId = threadId; } inline void setThreadId(int threadId) { m_threadId = threadId; }
inline xmrig::Algorithm &algorithm() { return m_algorithm; } inline void setVariant(const char *variant) { m_algorithm.parseVariant(variant); }
inline void setVariant(int variant) { m_algorithm.parseVariant(variant); }
# ifdef XMRIG_PROXY_PROJECT # ifdef XMRIG_PROXY_PROJECT
inline char *rawBlob() { return m_rawBlob; } inline char *rawBlob() { return m_rawBlob; }
@ -83,6 +85,9 @@ public:
bool operator!=(const Job &other) const; bool operator!=(const Job &other) const;
private: private:
xmrig::Variant variant() const;
bool m_autoVariant;
bool m_nicehash; bool m_nicehash;
int m_poolId; int m_poolId;
int m_threadId; int m_threadId;
@ -100,4 +105,4 @@ private:
# endif # endif
}; };
#endif /* __JOB_H__ */ #endif /* XMRIG_JOB_H */

View file

@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -211,6 +212,7 @@ rapidjson::Value Pool::toJSON(rapidjson::Document &doc) const
case xmrig::VARIANT_AUTO: case xmrig::VARIANT_AUTO:
case xmrig::VARIANT_0: case xmrig::VARIANT_0:
case xmrig::VARIANT_1: case xmrig::VARIANT_1:
case xmrig::VARIANT_2:
obj.AddMember("variant", m_algorithm.variant(), allocator); obj.AddMember("variant", m_algorithm.variant(), allocator);
break; break;
@ -359,7 +361,7 @@ void Pool::adjustVariant(const xmrig::Variant variantHint)
if (m_algorithm.algo() == CRYPTONIGHT_HEAVY) { if (m_algorithm.algo() == CRYPTONIGHT_HEAVY) {
m_algorithm.setVariant(VARIANT_0); m_algorithm.setVariant(VARIANT_0);
} }
else { else if (m_algorithm.algo() == CRYPTONIGHT_LITE) {
m_algorithm.setVariant(VARIANT_1); m_algorithm.setVariant(VARIANT_1);
} }
# endif # endif
@ -377,6 +379,7 @@ void Pool::rebuild()
m_algorithms.push_back(m_algorithm); m_algorithms.push_back(m_algorithm);
# ifndef XMRIG_PROXY_PROJECT # ifndef XMRIG_PROXY_PROJECT
addVariant(xmrig::VARIANT_2);
addVariant(xmrig::VARIANT_1); addVariant(xmrig::VARIANT_1);
addVariant(xmrig::VARIANT_0); addVariant(xmrig::VARIANT_0);
addVariant(xmrig::VARIANT_XTL); addVariant(xmrig::VARIANT_XTL);

View file

@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -21,8 +22,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __POOL_H__ #ifndef XMRIG_POOL_H
#define __POOL_H__ #define XMRIG_POOL_H
#include <vector> #include <vector>
@ -105,4 +106,4 @@ private:
typedef std::vector<Pool> Pools; typedef std::vector<Pool> Pools;
#endif /* __POOL_H__ */ #endif /* XMRIG_POOL_H */

View file

@ -67,6 +67,7 @@ enum Variant {
VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only)
VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only)
VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only)
VARIANT_2 = 8, // CryptoNight variant 2
VARIANT_MAX VARIANT_MAX
}; };

View file

@ -7,6 +7,7 @@
* Copyright 2016 Imran Yusuff <https://github.com/imranyusuff> * Copyright 2016 Imran Yusuff <https://github.com/imranyusuff>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -23,8 +24,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __CRYPTONIGHT_ARM_H__ #ifndef XMRIG_CRYPTONIGHT_ARM_H
#define __CRYPTONIGHT_ARM_H__ #define XMRIG_CRYPTONIGHT_ARM_H
#include "common/crypto/keccak.h" #include "common/crypto/keccak.h"
@ -95,9 +96,6 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
} }
#define EXTRACT64(X) _mm_cvtsi128_si64(X)
#if defined (__arm64__) || defined (__aarch64__) #if defined (__arm64__) || defined (__aarch64__)
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
{ {
@ -404,20 +402,28 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
} }
template<int SHIFT> template<xmrig::Variant VARIANT>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{ {
mem_out[0] = EXTRACT64(tmp); uint64_t* mem_out = (uint64_t*)&l[idx];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
mem_out[0] = _mm_cvtsi128_si64(tmp);
uint64_t vh = vgetq_lane_u64(tmp, 1); uint64_t vh = vgetq_lane_u64(tmp, 1);
uint8_t x = vh >> 24; uint8_t x = vh >> 24;
static const uint16_t table = 0x7531; static const uint16_t table = 0x7531;
const uint8_t index = (((x >> SHIFT) & 6) | (x & 1)) << 1; const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28; vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh; mem_out[1] = vh;
} }
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
@ -426,27 +432,29 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
VARIANT1_INIT(0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
VARIANT1_INIT(0);
VARIANT2_INIT(0);
uint64_t al0 = h0[0] ^ h0[4]; uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx0 = al0;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx; __m128i cx;
@ -454,44 +462,47 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx = aes_round_tweak_div(cx, _mm_set_epi64x(ah0, al0)); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
} }
else { else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); cx = _mm_aesenc_si128(cx, ax0);
} }
if (IS_MONERO) { if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
} }
idx0 = EXTRACT64(cx); idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
}
else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} } else if (IS_V1) {
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} } else {
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
@ -514,6 +525,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
idx0 = d ^ q; idx0 = d ^ q;
} }
} }
if (VARIANT == xmrig::VARIANT_2) {
bx1 = bx0;
}
bx0 = cx;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
@ -529,9 +544,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@ -539,14 +554,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state); xmrig::keccak(input + size, size, ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory; const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT2_INIT(0);
VARIANT2_INIT(1);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -555,11 +572,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5]; uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx0 = al0;
uint64_t idx1 = h1[0] ^ h1[4]; uint64_t idx1 = al1;
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0, cx1; __m128i cx0, cx1;
@ -568,52 +587,53 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx0 = aes_round_tweak_div(cx0, _mm_set_epi64x(ah0, al0)); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, _mm_set_epi64x(ah1, al1)); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (IS_MONERO) { if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); cryptonight_monero_tweak<VARIANT>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); cryptonight_monero_tweak<VARIANT>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
}; }
idx0 = EXTRACT64(cx0); idx0 = _mm_cvtsi128_si64(cx0);
idx1 = EXTRACT64(cx1); idx1 = _mm_cvtsi128_si64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} } else if (IS_V1) {
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} } else {
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
@ -639,22 +659,24 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi); lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} } else if (IS_V1) {
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} } else {
}
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
} }
@ -677,6 +699,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
idx1 = d ^ q; idx1 = d ^ q;
} }
} }
if (VARIANT == xmrig::VARIANT_2) {
bx01 = bx00;
bx11 = bx10;
}
bx00 = cx0;
bx10 = cx1;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);

View file

@ -107,6 +107,7 @@ inline uint32_t cn_select_mask(Algo algorithm)
template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter() { return 0; } template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter() { return 0; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_2>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>() { return CRYPTONIGHT_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_MSR_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>() { return CRYPTONIGHT_MSR_ITER; }
template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; } template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>() { return CRYPTONIGHT_XAO_ITER; }
@ -150,29 +151,16 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
} }
template<Variant variant> inline constexpr bool cn_is_monero() { return false; } template<Variant variant> inline constexpr Variant cn_base_variant() { return VARIANT_0; }
template<> inline constexpr bool cn_is_monero<VARIANT_0>() { return false; } template<> inline constexpr Variant cn_base_variant<VARIANT_0>() { return VARIANT_0; }
template<> inline constexpr bool cn_is_monero<VARIANT_1>() { return true; } template<> inline constexpr Variant cn_base_variant<VARIANT_1>() { return VARIANT_1; }
template<> inline constexpr bool cn_is_monero<VARIANT_TUBE>() { return true; } template<> inline constexpr Variant cn_base_variant<VARIANT_TUBE>() { return VARIANT_1; }
template<> inline constexpr bool cn_is_monero<VARIANT_XTL>() { return true; } template<> inline constexpr Variant cn_base_variant<VARIANT_XTL>() { return VARIANT_1; }
template<> inline constexpr bool cn_is_monero<VARIANT_MSR>() { return true; } template<> inline constexpr Variant cn_base_variant<VARIANT_MSR>() { return VARIANT_1; }
template<> inline constexpr bool cn_is_monero<VARIANT_XHV>() { return false; } template<> inline constexpr Variant cn_base_variant<VARIANT_XHV>() { return VARIANT_0; }
template<> inline constexpr bool cn_is_monero<VARIANT_XAO>() { return false; } template<> inline constexpr Variant cn_base_variant<VARIANT_XAO>() { return VARIANT_0; }
template<> inline constexpr bool cn_is_monero<VARIANT_RTO>() { return true; } template<> inline constexpr Variant cn_base_variant<VARIANT_RTO>() { return VARIANT_1; }
template<> inline constexpr Variant cn_base_variant<VARIANT_2>() { return VARIANT_2; }
inline bool cn_is_monero(Variant variant)
{
switch (variant) {
case VARIANT_0:
case VARIANT_XHV:
case VARIANT_RTO:
return false;
default:
return true;
}
}
} /* namespace xmrig */ } /* namespace xmrig */

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -22,29 +23,31 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __CRYPTONIGHT_MONERO_H__ #ifndef XMRIG_CRYPTONIGHT_MONERO_H
#define __CRYPTONIGHT_MONERO_H__ #define XMRIG_CRYPTONIGHT_MONERO_H
#include <fenv.h>
#include <math.h>
// VARIANT ALTERATIONS // VARIANT ALTERATIONS
#ifndef XMRIG_ARM #ifndef XMRIG_ARM
# define VARIANT1_INIT(part) \ # define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \ uint64_t tweak1_2_##part = 0; \
if (IS_MONERO) { \ if (IS_V1) { \
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \ tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
*(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \ *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
} }
#else #else
# define VARIANT1_INIT(part) \ # define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \ uint64_t tweak1_2_##part = 0; \
if (IS_MONERO) { \ if (IS_V1) { \
memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \ memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \
tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \ tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
} }
#endif #endif
#define VARIANT1_1(p) \ #define VARIANT1_1(p) \
if (IS_MONERO) { \ if (IS_V1) { \
const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \ const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \
static const uint32_t table = 0x75310; \ static const uint32_t table = 0x75310; \
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
@ -52,9 +55,72 @@
} }
#define VARIANT1_2(p, part) \ #define VARIANT1_2(p, part) \
if (IS_MONERO) { \ if (IS_V1) { \
(p) ^= tweak1_2_##part; \ (p) ^= tweak1_2_##part; \
} }
#endif /* __CRYPTONIGHT_MONERO_H__ */ #ifndef XMRIG_ARM
# define VARIANT2_INIT(part) \
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
#ifdef _MSC_VER
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
#else
# define VARIANT2_SET_ROUNDING_MODE() if (VARIANT == xmrig::VARIANT_2) { std::fesetround(FE_DOWNWARD); }
#endif
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
do { \
const uint64_t sqrt_result = static_cast<uint64_t>(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
cl ^= static_cast<uint64_t>(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
const uint64_t division_result = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(division_result)); \
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
do { \
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
} while (0)
#else
# define VARIANT2_INIT(part) \
uint64_t division_result_##part = h##part[12]; \
uint64_t sqrt_result_##part = h##part[13];
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
do { \
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
cl ^= division_result_##part ^ (sqrt_result_##part << 32); \
const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
division_result_##part = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
const uint64_t sqrt_input = cx_0 + division_result_##part; \
sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
const uint64_t s = sqrt_result_##part >> 1; \
const uint64_t b = sqrt_result_##part & 1; \
const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \
sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
} while (0)
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
do { \
const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
} while (0)
#endif
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View file

@ -69,7 +69,7 @@ const static uint8_t test_output_v0[160] = {
}; };
// Monero v7 // Cryptonight variant 1 (Monero v7)
const static uint8_t test_output_v1[160] = { const static uint8_t test_output_v1[160] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
@ -84,6 +84,21 @@ const static uint8_t test_output_v1[160] = {
}; };
// Cryptonight variant 2 (Monero v8)
const static uint8_t test_output_v2[160] = {
0x6E, 0xEE, 0x53, 0xA3, 0xDA, 0xD1, 0x8C, 0x05, 0xB8, 0xCB, 0x32, 0x17, 0xAA, 0xEA, 0xEA, 0xB4,
0x16, 0x11, 0x01, 0xA9, 0x08, 0x76, 0x37, 0x36, 0x6F, 0xDC, 0xCA, 0xC6, 0x92, 0x0D, 0xEA, 0x09,
0x91, 0x03, 0x2F, 0x5B, 0x27, 0x4D, 0x94, 0x1D, 0x60, 0x50, 0xDC, 0x1F, 0x35, 0x57, 0xEC, 0x20,
0xA6, 0xAC, 0x10, 0xDB, 0xCF, 0x36, 0x23, 0x8F, 0x96, 0xC7, 0x72, 0x8B, 0xF9, 0xE7, 0x30, 0xEB,
0x50, 0x58, 0x4B, 0xFE, 0xAD, 0xC5, 0x13, 0x79, 0x50, 0x98, 0x1C, 0x67, 0xB2, 0xEB, 0xDA, 0x64,
0xD4, 0xAA, 0xC4, 0xE8, 0xE5, 0xC9, 0xE7, 0x6B, 0x84, 0xC2, 0xD2, 0xE9, 0x1F, 0xA1, 0x0F, 0xDF,
0x45, 0x06, 0x80, 0x25, 0x32, 0x6B, 0xC4, 0x66, 0x2A, 0x69, 0x9F, 0x1E, 0x1F, 0x4C, 0xBE, 0x89,
0xFE, 0x61, 0xBB, 0x04, 0x79, 0xB5, 0x3B, 0x45, 0x58, 0xD9, 0x9C, 0x18, 0x7C, 0x48, 0x1B, 0x44,
0x92, 0xC4, 0x4C, 0xD0, 0x8F, 0x16, 0x44, 0x79, 0x71, 0x48, 0x63, 0x0B, 0x51, 0xB6, 0x33, 0x8B,
0x6B, 0x3F, 0xCC, 0x0A, 0x3A, 0x14, 0x3B, 0x49, 0x68, 0x46, 0xB9, 0x46, 0xC6, 0xA3, 0x03, 0x41
};
// Stellite (XTL) // Stellite (XTL)
const static uint8_t test_output_xtl[160] = { const static uint8_t test_output_xtl[160] = {
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3, 0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -22,8 +23,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __CRYPTONIGHT_X86_H__ #ifndef XMRIG_CRYPTONIGHT_X86_H
#define __CRYPTONIGHT_X86_H__ #define XMRIG_CRYPTONIGHT_X86_H
#ifdef __GNUC__ #ifdef __GNUC__
@ -73,10 +74,7 @@ static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *outp
void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
#if defined(__x86_64__) || defined(_M_AMD64) #if defined(__x86_64__) || defined(_M_AMD64)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
# ifdef __GNUC__ # ifdef __GNUC__
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
{ {
@ -88,13 +86,14 @@ static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
#define __umul128 _umul128 #define __umul128 _umul128
# endif # endif
#elif defined(__i386__) || defined(_M_IX86) #elif defined(__i386__) || defined(_M_IX86)
# define HI32(X) \ static inline int64_t _mm_cvtsi128_si64(__m128i a)
_mm_srli_si128((X), 4) {
return ((uint64_t)(uint32_t)_mm_cvtsi128_si32(a) | ((uint64_t)(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4)) << 32));
}
static inline __m128i _mm_cvtsi64_si128(int64_t a) {
# define EXTRACT64(X) \ return _mm_set_epi64x(0, a);
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ }
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b // multiplier = ab = a * 2^32 + b
@ -408,21 +407,47 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
} }
template<int SHIFT> static inline __m128i int_sqrt_v2(const uint64_t n0)
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{ {
mem_out[0] = EXTRACT64(tmp); __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
x = _mm_sqrt_sd(_mm_setzero_pd(), x);
uint64_t r = static_cast<uint64_t>(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
const uint64_t s = r >> 20;
r >>= 19;
uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
# else
if (x2 < n0) ++r;
# endif
return _mm_cvtsi64_si128(r);
}
template<xmrig::Variant VARIANT>
static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i cx)
{
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1);
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
mem_out[0] = _mm_cvtsi128_si64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = EXTRACT64(tmp); uint64_t vh = _mm_cvtsi128_si64(tmp);
uint8_t x = vh >> 24; uint8_t x = static_cast<uint8_t>(vh >> 24);
static const uint16_t table = 0x7531; static const uint16_t table = 0x7531;
const uint8_t index = (((x >> SHIFT) & 6) | (x & 1)) << 1; const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28; vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh; mem_out[1] = vh;
} }
}
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
@ -431,25 +456,28 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32); memset(output, 0, 32);
return; return;
} }
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
VARIANT1_INIT(0);
VARIANT2_INIT(0);
VARIANT2_SET_ROUNDING_MODE();
uint64_t al0 = h0[0] ^ h0[4]; uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
uint64_t idx0 = al0; uint64_t idx0 = al0;
@ -459,44 +487,47 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx = aes_round_tweak_div(cx, _mm_set_epi64x(ah0, al0)); cx = aes_round_tweak_div(cx, ax0);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
} }
else { else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); cx = _mm_aesenc_si128(cx, ax0);
} }
if (IS_MONERO) { if (IS_V1 || VARIANT == xmrig::VARIANT_2) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); cryptonight_monero_tweak<VARIANT>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else { } else {
_mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
} }
idx0 = EXTRACT64(cx); idx0 = _mm_cvtsi128_si64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx);
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1);
}
else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} } else if (IS_V1) {
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} } else {
}
else {
((uint64_t*)&l0[idx0 & MASK])[1] = ah0; ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
} }
@ -517,6 +548,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
idx0 = d ^ q; idx0 = d ^ q;
} }
if (VARIANT == xmrig::VARIANT_2) {
bx1 = bx0;
}
bx0 = cx;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
@ -532,9 +567,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 64); memset(output, 0, 64);
return; return;
} }
@ -542,14 +577,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
xmrig::keccak(input, size, ctx[0]->state); xmrig::keccak(input, size, ctx[0]->state);
xmrig::keccak(input + size, size, ctx[1]->state); xmrig::keccak(input + size, size, ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx[0]->memory; const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory; const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state); uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT2_INIT(0);
VARIANT2_INIT(1);
VARIANT2_SET_ROUNDING_MODE();
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -558,8 +596,10 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
uint64_t ah0 = h0[1] ^ h0[5]; uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5]; uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
uint64_t idx0 = al0; uint64_t idx0 = al0;
uint64_t idx1 = al1; uint64_t idx1 = al1;
@ -571,52 +611,53 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
} }
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
if (VARIANT == xmrig::VARIANT_TUBE) { if (VARIANT == xmrig::VARIANT_TUBE) {
cx0 = aes_round_tweak_div(cx0, _mm_set_epi64x(ah0, al0)); cx0 = aes_round_tweak_div(cx0, ax0);
cx1 = aes_round_tweak_div(cx1, _mm_set_epi64x(ah1, al1)); cx1 = aes_round_tweak_div(cx1, ax1);
} }
else if (SOFT_AES) { else if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
} }
else { else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, ax1);
} }
if (IS_MONERO) { if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) {
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); cryptonight_monero_tweak<VARIANT>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); cryptonight_monero_tweak<VARIANT>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
} else { } else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0)); _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1)); _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
} }
idx0 = EXTRACT64(cx0); idx0 = _mm_cvtsi128_si64(cx0);
idx1 = EXTRACT64(cx1); idx1 = _mm_cvtsi128_si64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch; uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0]; cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1]; ch = ((uint64_t*) &l0[idx0 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(0, cl, cx0);
lo = __umul128(idx0, cl, &hi); lo = __umul128(idx0, cl, &hi);
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01);
} else {
lo = __umul128(idx0, cl, &hi);
}
al0 += hi; al0 += hi;
ah0 += lo; ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0; ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
} } else if (IS_V1) {
else {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
} } else {
}
else {
((uint64_t*) &l0[idx0 & MASK])[1] = ah0; ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
} }
@ -640,22 +681,24 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
cl = ((uint64_t*) &l1[idx1 & MASK])[0]; cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1]; ch = ((uint64_t*) &l1[idx1 & MASK])[1];
if (VARIANT == xmrig::VARIANT_2) {
VARIANT2_INTEGER_MATH(1, cl, cx1);
lo = __umul128(idx1, cl, &hi); lo = __umul128(idx1, cl, &hi);
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11);
} else {
lo = __umul128(idx1, cl, &hi);
}
al1 += hi; al1 += hi;
ah1 += lo; ah1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = al1; ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
if (IS_MONERO) { if (IS_V1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
if (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO) {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
} } else if (IS_V1) {
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
} } else {
}
else {
((uint64_t*)&l1[idx1 & MASK])[1] = ah1; ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
} }
@ -676,6 +719,13 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
idx1 = d ^ q; idx1 = d ^ q;
} }
if (VARIANT == xmrig::VARIANT_2) {
bx01 = bx00;
bx11 = bx10;
}
bx00 = cx0;
bx10 = cx1;
} }
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0); cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@ -689,12 +739,12 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} }
#define CN_STEP1(a, b, c, l, ptr, idx) \ #define CN_STEP1(a, b0, b1, c, l, ptr, idx) \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
c = _mm_load_si128(ptr); c = _mm_load_si128(ptr);
#define CN_STEP2(a, b, c, l, ptr, idx) \ #define CN_STEP2(a, b0, b1, c, l, ptr, idx) \
if (VARIANT == xmrig::VARIANT_TUBE) { \ if (VARIANT == xmrig::VARIANT_TUBE) { \
c = aes_round_tweak_div(c, a); \ c = aes_round_tweak_div(c, a); \
} \ } \
@ -704,26 +754,31 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
c = _mm_aesenc_si128(c, a); \ c = _mm_aesenc_si128(c, a); \
} \ } \
\ \
b = _mm_xor_si128(b, c); \ if (IS_V1 || (VARIANT == xmrig::VARIANT_2)) { \
\ cryptonight_monero_tweak<VARIANT>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
if (IS_MONERO) { \
cryptonight_monero_tweak<VARIANT == xmrig::VARIANT_XTL ? 4 : 3>(reinterpret_cast<uint64_t*>(ptr), b); \
} else { \ } else { \
_mm_store_si128(ptr, b); \ _mm_store_si128(ptr, _mm_xor_si128(b0, c)); \
} }
#define CN_STEP3(a, b, c, l, ptr, idx) \ #define CN_STEP3(part, a, b0, b1, c, l, ptr, idx) \
idx = EXTRACT64(c); \ idx = _mm_cvtsi128_si64(c); \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
b = _mm_load_si128(ptr); uint64_t cl##part = ((uint64_t*)ptr)[0]; \
uint64_t ch##part = ((uint64_t*)ptr)[1];
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \ #define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \
lo = __umul128(idx, EXTRACT64(b), &hi); \ if (VARIANT == xmrig::VARIANT_2) { \
VARIANT2_INTEGER_MATH(part, cl##part, c); \
lo = __umul128(idx, cl##part, &hi); \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1); \
} else { \
lo = __umul128(idx, cl##part, &hi); \
} \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
\ \
if (IS_MONERO) { \ if (IS_V1) { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
\ \
if (VARIANT == xmrig::VARIANT_TUBE || \ if (VARIANT == xmrig::VARIANT_TUBE || \
@ -734,8 +789,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
_mm_store_si128(ptr, a); \ _mm_store_si128(ptr, a); \
} \ } \
\ \
a = _mm_xor_si128(a, b); \ a = _mm_xor_si128(a, _mm_set_epi64x(ch##part, cl##part)); \
idx = EXTRACT64(a); \ idx = _mm_cvtsi128_si64(a); \
\ \
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \ if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \
int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ int64_t n = ((int64_t*)&l[idx & MASK])[0]; \
@ -747,15 +802,29 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
} \ } \
\ \
idx = d ^ q; \ idx = d ^ q; \
} } \
if (VARIANT == xmrig::VARIANT_2) { \
b1 = b0; \
} \
b0 = c;
#define CONST_INIT(ctx, n) \ #define CONST_INIT(ctx, n) \
__m128i mc##n; \ __m128i mc##n; \
if (IS_MONERO) { \ __m128i division_result_xmm_##n; \
__m128i sqrt_result_xmm_##n; \
if (IS_V1) { \
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \ mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \ *(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
} } \
if (VARIANT == xmrig::VARIANT_2) { \
division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \
sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \
} \
__m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \
__m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \
__m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \
__m128i cx##n = _mm_setzero_si128();
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
@ -764,9 +833,9 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 3); memset(output, 0, 32 * 3);
return; return;
} }
@ -776,10 +845,6 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -787,58 +852,35 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state); uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state); uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); VARIANT2_SET_ROUNDING_MODE();
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2; uint64_t idx0, idx1, idx2;
idx0 = EXTRACT64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
idx1 = EXTRACT64(ax1); idx1 = _mm_cvtsi128_si64(ax1);
idx2 = EXTRACT64(ax2); idx2 = _mm_cvtsi128_si64(ax2);
for (size_t i = 0; i < ITERATIONS / 2; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2; __m128i *ptr0, *ptr1, *ptr2;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
} }
for (size_t i = 0; i < 3; i++) { for (size_t i = 0; i < 3; i++) {
@ -855,9 +897,9 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 4); memset(output, 0, 32 * 4);
return; return;
} }
@ -867,11 +909,6 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -881,71 +918,42 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state); uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state); uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); CONST_INIT(ctx[3], 3);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); VARIANT2_SET_ROUNDING_MODE();
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3; uint64_t idx0, idx1, idx2, idx3;
idx0 = EXTRACT64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
idx1 = EXTRACT64(ax1); idx1 = _mm_cvtsi128_si64(ax1);
idx2 = EXTRACT64(ax2); idx2 = _mm_cvtsi128_si64(ax2);
idx3 = EXTRACT64(ax3); idx3 = _mm_cvtsi128_si64(ax3);
for (size_t i = 0; i < ITERATIONS / 2; i++) for (size_t i = 0; i < ITERATIONS; i++)
{ {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3; __m128i *ptr0, *ptr1, *ptr2, *ptr3;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
} }
for (size_t i = 0; i < 4; i++) { for (size_t i = 0; i < 4; i++) {
@ -962,9 +970,9 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>(); constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>(); constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO, VARIANT>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>(); constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
constexpr bool IS_MONERO = xmrig::cn_is_monero<VARIANT>(); constexpr bool IS_V1 = xmrig::cn_base_variant<VARIANT>() == xmrig::VARIANT_1;
if (IS_MONERO && size < 43) { if (IS_V1 && size < 43) {
memset(output, 0, 32 * 5); memset(output, 0, 32 * 5);
return; return;
} }
@ -974,12 +982,6 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
} }
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
CONST_INIT(ctx[4], 4);
uint8_t* l0 = ctx[0]->memory; uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory; uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory; uint8_t* l2 = ctx[2]->memory;
@ -991,83 +993,48 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state); uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state); uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); CONST_INIT(ctx[0], 0);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); CONST_INIT(ctx[1], 1);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); CONST_INIT(ctx[2], 2);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); CONST_INIT(ctx[3], 3);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); CONST_INIT(ctx[4], 4);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); VARIANT2_SET_ROUNDING_MODE();
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
__m128i cx4 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3, idx4; uint64_t idx0, idx1, idx2, idx3, idx4;
idx0 = EXTRACT64(ax0); idx0 = _mm_cvtsi128_si64(ax0);
idx1 = EXTRACT64(ax1); idx1 = _mm_cvtsi128_si64(ax1);
idx2 = EXTRACT64(ax2); idx2 = _mm_cvtsi128_si64(ax2);
idx3 = EXTRACT64(ax3); idx3 = _mm_cvtsi128_si64(ax3);
idx4 = EXTRACT64(ax4); idx4 = _mm_cvtsi128_si64(ax4);
for (size_t i = 0; i < ITERATIONS / 2; i++) for (size_t i = 0; i < ITERATIONS; i++)
{ {
uint64_t hi, lo; uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
// EVEN ROUND CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4); CN_STEP2(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4); CN_STEP3(4, ax4, bx40, bx41, cx4, l4, ptr4, idx4);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4); CN_STEP4(4, ax4, bx40, bx41, cx4, l4, mc4, ptr4, idx4);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
} }
for (size_t i = 0; i < 5; i++) { for (size_t i = 0; i < 5; i++) {
@ -1077,4 +1044,4 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
} }
} }
#endif /* __CRYPTONIGHT_X86_H__ */ #endif /* XMRIG_CRYPTONIGHT_X86_H */

View file

@ -135,6 +135,17 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_RTO>, cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_RTO>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_RTO>, cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_RTO>,
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_double_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_quad_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_penta_hash<CRYPTONIGHT, false, VARIANT_2>,
cryptonight_triple_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_quad_hash<CRYPTONIGHT, true, VARIANT_2>,
cryptonight_penta_hash<CRYPTONIGHT, true, VARIANT_2>,
# ifndef XMRIG_NO_AEON # ifndef XMRIG_NO_AEON
cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>, cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>, cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
@ -164,6 +175,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
# else # else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
@ -173,6 +185,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif # endif
# ifndef XMRIG_NO_SUMO # ifndef XMRIG_NO_SUMO
@ -216,6 +229,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
# else # else
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
@ -225,6 +239,7 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
# endif # endif
}; };

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -55,6 +56,7 @@ bool MultiWorker<N>::selfTest()
if (m_thread->algorithm() == CRYPTONIGHT) { if (m_thread->algorithm() == CRYPTONIGHT) {
return verify(VARIANT_0, test_output_v0) && return verify(VARIANT_0, test_output_v0) &&
verify(VARIANT_1, test_output_v1) && verify(VARIANT_1, test_output_v1) &&
verify(VARIANT_2, test_output_v2) &&
verify(VARIANT_XTL, test_output_xtl) && verify(VARIANT_XTL, test_output_xtl) &&
verify(VARIANT_MSR, test_output_msr) && verify(VARIANT_MSR, test_output_msr) &&
verify(VARIANT_XAO, test_output_xao) && verify(VARIANT_XAO, test_output_xao) &&
@ -102,7 +104,7 @@ void MultiWorker<N>::start()
storeStats(); storeStats();
} }
m_thread->fn(m_state.job.variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx); m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx);
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) { if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) {

View file

@ -6,6 +6,7 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -22,8 +23,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef __MULTIWORKER_H__ #ifndef XMRIG_MULTIWORKER_H
#define __MULTIWORKER_H__ #define XMRIG_MULTIWORKER_H
#include "common/net/Job.h" #include "common/net/Job.h"
@ -71,4 +72,4 @@ private:
}; };
#endif /* __MULTIWORKER_H__ */ #endif /* XMRIG_MULTIWORKER_H */