Merge branch 'dev'

This commit is contained in:
XMRig 2021-04-20 20:55:35 +07:00
commit e2c757d9dd
No known key found for this signature in database
GPG key ID: 446A53638BE94409
43 changed files with 6619 additions and 5742 deletions

View file

@ -1,3 +1,9 @@
# v6.12.0
- [#2276](https://github.com/xmrig/xmrig/pull/2276) Added support for Uplexa (`cn/upx2` algorithm).
- [#2261](https://github.com/xmrig/xmrig/pull/2261) Show total hashrate if compiled without OpenCL.
- [#2289](https://github.com/xmrig/xmrig/pull/2289) RandomX: optimized `IMUL_RCP` instruction.
- Added support for `--user` command line option for online benchmark.
# v6.11.2
- [#2207](https://github.com/xmrig/xmrig/issues/2207) Fixed regression in HTTP parser and llhttp updated to v5.1.0.

View file

@ -5,6 +5,7 @@ option(WITH_HWLOC "Enable hwloc support" ON)
option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON)
option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON)
option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
option(WITH_CN_FEMTO "Enable CryptoNight-UPX2 algorithm" ON)
option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
@ -196,6 +197,10 @@ if (WITH_CN_PICO)
add_definitions(/DXMRIG_ALGO_CN_PICO)
endif()
if (WITH_CN_FEMTO)
add_definitions(/DXMRIG_ALGO_CN_FEMTO)
endif()
if (WITH_EMBEDDED_CONFIG)
add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG)
endif()

View file

@ -334,13 +334,11 @@ void xmrig::CpuBackend::printHashrate(bool details)
i++;
}
# ifdef XMRIG_FEATURE_OPENCL
Log::print(WHITE_BOLD_S "| - | - | %7s | %7s | %7s |",
Hashrate::format(hashrate()->calc(Hashrate::ShortInterval), num, sizeof num / 3),
Hashrate::format(hashrate()->calc(Hashrate::MediumInterval), num + 8, sizeof num / 3),
Hashrate::format(hashrate()->calc(Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3)
);
# endif
}

View file

@ -197,6 +197,7 @@ void xmrig::CpuConfig::generate()
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, m_limit);
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, m_limit);
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, m_limit);
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, m_limit);
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);

View file

@ -100,6 +100,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<CpuThreads> &threads, uint32_
#endif
#ifdef XMRIG_ALGO_CN_FEMTO
template<>
size_t inline generate<Algorithm::CN_FEMTO>(Threads<CpuThreads>& threads, uint32_t limit)
{
return generate("cn/upx2", threads, Algorithm::CN_UPX2, limit);
}
#endif
#ifdef XMRIG_ALGO_RANDOMX
template<>
size_t inline generate<Algorithm::RANDOM_X>(Threads<CpuThreads> &threads, uint32_t limit)

View file

@ -193,6 +193,12 @@ bool xmrig::CpuWorker<N>::selfTest()
}
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
if (m_algorithm.family() == Algorithm::CN_FEMTO) {
return verify(Algorithm::CN_UPX2, test_output_femto_upx2);
}
# endif
# ifdef XMRIG_ALGO_ARGON2
if (m_algorithm.family() == Algorithm::ARGON2) {
return verify(Algorithm::AR2_CHUKWA, argon2_chukwa_test_out) &&

View file

@ -309,26 +309,34 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
return 1;
}
Algorithm::Family f = algorithm.family();
# ifdef XMRIG_ALGO_CN_LITE
if (algorithm.family() == Algorithm::CN_LITE) {
if (f == Algorithm::CN_LITE) {
return CpuThreads(count, 1);
}
# endif
# ifdef XMRIG_ALGO_CN_PICO
if (algorithm.family() == Algorithm::CN_PICO) {
if (f == Algorithm::CN_PICO) {
return CpuThreads(count, 2);
}
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
if (f == Algorithm::CN_FEMTO) {
return CpuThreads(count, 2);
}
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
if (algorithm.family() == Algorithm::CN_HEAVY) {
if (f == Algorithm::CN_HEAVY) {
return CpuThreads(std::max<size_t>(count / 4, 1), 1);
}
# endif
# ifdef XMRIG_ALGO_RANDOMX
if (algorithm.family() == Algorithm::RANDOM_X) {
if (f == Algorithm::RANDOM_X) {
if (algorithm == Algorithm::RX_WOW) {
return count;
}
@ -338,13 +346,13 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
# endif
# ifdef XMRIG_ALGO_ARGON2
if (algorithm.family() == Algorithm::ARGON2) {
if (f == Algorithm::ARGON2) {
return count;
}
# endif
# ifdef XMRIG_ALGO_ASTROBWT
if (algorithm.family() == Algorithm::ASTROBWT) {
if (f == Algorithm::ASTROBWT) {
CpuThreads threads;
for (size_t i = 0; i < count; ++i) {
threads.add(i, 0);

View file

@ -336,11 +336,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
size_t cacheHashes = ((L3 + extra) + (scratchpad / 2)) / scratchpad;
# ifdef XMRIG_ALGO_CN_PICO
if (intensity && algorithm == Algorithm::CN_PICO_0 && (cacheHashes / PUs) >= 2) {
Algorithm::Family family = algorithm.family();
if (intensity && ((family == Algorithm::CN_PICO) || (family == Algorithm::CN_FEMTO)) && (cacheHashes / PUs) >= 2) {
intensity = 2;
}
# endif
# ifdef XMRIG_ALGO_RANDOMX
if (extra == 0 && algorithm.l2() > 0) {

View file

@ -179,6 +179,7 @@ void xmrig::CudaConfig::generate()
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, devices);
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, devices);
count += xmrig::generate<Algorithm::KAWPOW>(m_threads, devices);

View file

@ -102,6 +102,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const
#endif
#ifdef XMRIG_ALGO_CN_FEMTO
template<>
size_t inline generate<Algorithm::CN_FEMTO>(Threads<CudaThreads>& threads, const std::vector<CudaDevice>& devices)
{
return generate("cn/upx2", threads, Algorithm::CN_UPX2, devices);
}
#endif
#ifdef XMRIG_ALGO_RANDOMX
template<>
size_t inline generate<Algorithm::RANDOM_X>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)

View file

@ -219,6 +219,7 @@ void xmrig::OclConfig::generate()
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, devices);
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, devices);
count += xmrig::generate<Algorithm::KAWPOW>(m_threads, devices);

View file

@ -101,6 +101,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<OclThreads> &threads, const s
#endif
#ifdef XMRIG_ALGO_CN_FEMTO
template<>
size_t inline generate<Algorithm::CN_FEMTO>(Threads<OclThreads>& threads, const std::vector<OclDevice>& devices)
{
return generate("cn/upx2", threads, Algorithm::CN_UPX2, devices);
}
#endif
#ifdef XMRIG_ALGO_RANDOMX
template<>
size_t inline generate<Algorithm::RANDOM_X>(Threads<OclThreads> &threads, const std::vector<OclDevice> &devices)

View file

@ -17,16 +17,17 @@
#define ALGO_CN_PICO_0 16
#define ALGO_CN_PICO_TLO 17
#define ALGO_CN_CCX 18
#define ALGO_RX_0 19
#define ALGO_RX_WOW 20
#define ALGO_RX_ARQMA 21
#define ALGO_RX_SFX 22
#define ALGO_RX_KEVA 23
#define ALGO_AR2_CHUKWA 24
#define ALGO_AR2_CHUKWA_V2 25
#define ALGO_AR2_WRKZ 26
#define ALGO_ASTROBWT_DERO 27
#define ALGO_KAWPOW_RVN 28
#define ALGO_CN_UPX2 19
#define ALGO_RX_0 20
#define ALGO_RX_WOW 21
#define ALGO_RX_ARQMA 22
#define ALGO_RX_SFX 23
#define ALGO_RX_KEVA 24
#define ALGO_AR2_CHUKWA 25
#define ALGO_AR2_CHUKWA_V2 26
#define ALGO_AR2_WRKZ 27
#define ALGO_ASTROBWT_DERO 28
#define ALGO_KAWPOW_RVN 29
#define FAMILY_UNKNOWN 0
#define FAMILY_CN 1

View file

@ -514,7 +514,7 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
c = AES_Round(AES0, AES1, AES2, AES3, c, ((uint4 *)a)[0]);
{
# if (ALGO == ALGO_CN_RWZ)
# if ((ALGO == ALGO_CN_RWZ) || (ALGO == ALGO_CN_UPX2))
const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3));
const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1));
@ -561,7 +561,7 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
t ^= chunk2;
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
# if (ALGO == ALGO_CN_RWZ)
# if ((ALGO == ALGO_CN_RWZ) || (ALGO == ALGO_CN_UPX2))
SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + bx1);
SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + bx0);
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -88,7 +88,8 @@ static inline uint32_t getIntensity(const OclDevice &device, const Algorithm &al
static inline uint32_t getWorksize(const Algorithm &algorithm)
{
if (algorithm.family() == Algorithm::CN_PICO) {
Algorithm::Family f = algorithm.family();
if (f == Algorithm::CN_PICO || f == Algorithm::CN_FEMTO) {
return 64;
}

View file

@ -39,10 +39,12 @@
xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
{
uint32_t stridedIndex = data.thread.stridedIndex();
Algorithm::Family f = m_algorithm.family();
if (data.device.vendorId() == OCL_VENDOR_NVIDIA) {
stridedIndex = 0;
}
else if (stridedIndex == 1 && (m_algorithm.family() == Algorithm::CN_PICO || (m_algorithm.family() == Algorithm::CN && CnAlgo<>::base(m_algorithm) == Algorithm::CN_2))) {
else if (stridedIndex == 1 && (f == Algorithm::CN_PICO || f == Algorithm::CN_FEMTO || (f == Algorithm::CN && CnAlgo<>::base(m_algorithm) == Algorithm::CN_2))) {
stridedIndex = 2;
}

View file

@ -6,8 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -129,6 +129,11 @@ static AlgoName const algorithm_names[] = {
# endif
{ "cryptonight/ccx", "cn/ccx", Algorithm::CN_CCX },
{ "cryptonight/conceal", "cn/conceal", Algorithm::CN_CCX },
# ifdef XMRIG_ALGO_CN_FEMTO
{ "cryptonight/upx2", "cn/upx2", Algorithm::CN_UPX2 },
{ "cn-extremelite/upx2", nullptr, Algorithm::CN_UPX2 },
{ "cryptonight-upx/2", nullptr, Algorithm::CN_UPX2 },
# endif
};
@ -199,6 +204,9 @@ size_t xmrig::Algorithm::l3() const
case CN_PICO:
return oneMiB / 4;
case CN_FEMTO:
return oneMiB / 8;
default:
break;
}
@ -329,6 +337,11 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
return CN_PICO;
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
case CN_UPX2:
return CN_FEMTO;
# endif
# ifdef XMRIG_ALGO_RANDOMX
case RX_0:
case RX_WOW:

View file

@ -6,8 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -64,6 +64,7 @@ public:
CN_PICO_0, // "cn-pico" CryptoNight-Pico
CN_PICO_TLO, // "cn-pico/tlo" CryptoNight-Pico (TLO)
CN_CCX, // "cn/ccx" Conceal (CCX)
CN_UPX2, // "cn/upx2" Uplexa (UPX2)
RX_0, // "rx/0" RandomX (reference configuration).
RX_WOW, // "rx/wow" RandomWOW (Wownero).
RX_ARQ, // "rx/arq" RandomARQ (Arqma).
@ -83,6 +84,7 @@ public:
CN_LITE,
CN_HEAVY,
CN_PICO,
CN_FEMTO,
RANDOM_X,
ARGON2,
ASTROBWT,
@ -94,7 +96,7 @@ public:
inline Algorithm(Id id) : m_id(id) {}
Algorithm(const rapidjson::Value &value);
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO || f == CN_FEMTO; }
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
inline bool isValid() const { return m_id != INVALID && family() != UNKNOWN; }
inline const char *name() const { return name(false); }

View file

@ -334,6 +334,7 @@ void xmrig::BenchClient::send(Request request)
{
doc.AddMember(StringRef(BenchConfig::kSize), m_benchmark->size(), allocator);
doc.AddMember(StringRef(BenchConfig::kAlgo), m_benchmark->algorithm().toJSON(), allocator);
doc.AddMember(StringRef(BenchConfig::kUser), m_benchmark->user().toJSON(), allocator);
doc.AddMember("version", APP_VERSION, allocator);
doc.AddMember("threads", m_threads, allocator);
doc.AddMember("steady_ready_ts", m_readyTime, allocator);

View file

@ -41,6 +41,7 @@ const char *BenchConfig::kSeed = "seed";
const char *BenchConfig::kSize = "size";
const char *BenchConfig::kSubmit = "submit";
const char *BenchConfig::kToken = "token";
const char *BenchConfig::kUser = "user";
const char *BenchConfig::kVerify = "verify";
#ifndef XMRIG_DEBUG_BENCHMARK_API
@ -59,8 +60,8 @@ xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson
m_id(id),
m_seed(Json::getString(object, kSeed)),
m_token(Json::getString(object, kToken)),
m_size(size),
m_hash(0)
m_user(Json::getString(object, kUser)),
m_size(size)
{
if (!m_algorithm.isValid() || m_algorithm.family() != Algorithm::RANDOM_X) {
m_algorithm = Algorithm::RX_0;
@ -111,6 +112,7 @@ rapidjson::Value xmrig::BenchConfig::toJSON(rapidjson::Document &doc) const
out.AddMember(StringRef(kVerify), m_id.toJSON(), allocator);
out.AddMember(StringRef(kToken), m_token.toJSON(), allocator);
out.AddMember(StringRef(kSeed), m_seed.toJSON(), allocator);
out.AddMember(StringRef(kUser), m_user.toJSON(), allocator);
if (m_hash) {
out.AddMember(StringRef(kHash), Value(fmt::format("{:016X}", m_hash).c_str(), allocator), allocator);

View file

@ -39,6 +39,7 @@ public:
static const char *kSize;
static const char *kSubmit;
static const char *kToken;
static const char *kUser;
static const char *kVerify;
# ifndef XMRIG_DEBUG_BENCHMARK_API
@ -59,6 +60,7 @@ public:
inline const String &id() const { return m_id; }
inline const String &seed() const { return m_seed; }
inline const String &token() const { return m_token; }
inline const String &user() const { return m_user; }
inline uint32_t size() const { return m_size; }
inline uint64_t hash() const { return m_hash; }
@ -73,8 +75,9 @@ private:
String m_id;
String m_seed;
String m_token;
String m_user;
uint32_t m_size;
uint64_t m_hash;
uint64_t m_hash = 0;
};

View file

@ -262,6 +262,7 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
case IConfig::BenchTokenKey: /* --token */
case IConfig::BenchSeedKey: /* --seed */
case IConfig::BenchHashKey: /* --hash */
case IConfig::UserKey: /* --user */
return transformBenchmark(doc, key, arg);
# endif
@ -347,6 +348,9 @@ void xmrig::ConfigTransform::transformBenchmark(rapidjson::Document &doc, int ke
case IConfig::BenchHashKey: /* --hash */
return set(doc, BenchConfig::kBenchmark, BenchConfig::kHash, arg);
case IConfig::UserKey: /* --user */
return set(doc, BenchConfig::kBenchmark, BenchConfig::kUser, arg);
}
}
#endif

View file

@ -96,6 +96,11 @@ public:
return CN_ITER / 8;
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
case Algorithm::CN_UPX2:
return CN_ITER / 32;
# endif
default:
break;
}
@ -111,6 +116,12 @@ public:
}
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
if (algo == Algorithm::CN_UPX2) {
return 0x1FFF0;
}
# endif
return ((memory(algo) - 1) / 16) * 16;
}
@ -149,6 +160,9 @@ public:
# ifdef XMRIG_ALGO_CN_PICO
case Algorithm::CN_PICO_0:
case Algorithm::CN_PICO_TLO:
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
case Algorithm::CN_UPX2:
# endif
return Algorithm::CN_2;
@ -176,6 +190,7 @@ template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() con
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_1>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_TUBE>::base() const { return Algorithm::CN_1; }
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_UPX2>::base() const { return Algorithm::CN_2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_FAST>::iterations() const { return CN_ITER / 2; }
@ -192,6 +207,7 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() con
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() const { return CN_ITER / 32; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
@ -201,9 +217,11 @@ template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_TUBE>::memory() co
template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_XHV>::memory() const { return CN_MEMORY * 2; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_0>::memory() const { return CN_MEMORY / 8; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_TLO>::memory() const { return CN_MEMORY / 8; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_UPX2>::memory() const { return CN_MEMORY / 16; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
} /* namespace xmrig */

View file

@ -99,8 +99,11 @@ cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
cn_mainloop_fun cn_upx2_double_mainloop_asm = nullptr;
template<typename T, typename U>
template<Algorithm::Id SOURCE_ALGO = Algorithm::CN_2, typename T, typename U>
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
{
auto p = reinterpret_cast<const uint8_t*>(src);
@ -124,11 +127,11 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
auto patched_data = reinterpret_cast<uint8_t*>(dst);
for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) {
switch (*(uint32_t*)(patched_data + i)) {
case CnAlgo<Algorithm::CN_2>().iterations():
case CnAlgo<SOURCE_ALGO>().iterations():
*(uint32_t*)(patched_data + i) = iterations;
break;
case CnAlgo<Algorithm::CN_2>().mask():
case CnAlgo<SOURCE_ALGO>().mask():
*(uint32_t*)(patched_data + i) = mask;
break;
}
@ -138,7 +141,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
static void patchAsmVariants()
{
const int allocation_size = 81920;
const int allocation_size = 131072;
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
@ -170,6 +173,11 @@ static void patchAsmVariants()
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
cn_upx2_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x14000);
cn_upx2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x15000);
# endif
{
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
@ -219,6 +227,16 @@ static void patchAsmVariants()
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
}
# ifdef XMRIG_ALGO_CN_FEMTO
{
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_UPX2>().iterations();
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_UPX2>().mask();
patchCode<Algorithm::CN_RWZ>(cn_upx2_mainloop_asm, cnv2_rwz_mainloop_asm, ITER, MASK);
patchCode<Algorithm::CN_RWZ>(cn_upx2_double_mainloop_asm, cnv2_rwz_double_mainloop_asm, ITER, MASK);
}
#endif
VirtualMemory::protectRX(base, allocation_size);
VirtualMemory::flushInstructionCache(base, allocation_size);
}
@ -272,6 +290,11 @@ xmrig::CnHash::CnHash()
ADD_FN(Algorithm::CN_CCX);
# ifdef XMRIG_ALGO_CN_FEMTO
ADD_FN(Algorithm::CN_UPX2);
ADD_FN_ASM(Algorithm::CN_UPX2);
# endif
# ifdef XMRIG_ALGO_ARGON2
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;

View file

@ -395,7 +395,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
uint64_t* mem_out = (uint64_t*)&l[idx];
if (props.base() == Algorithm::CN_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
@ -528,7 +528,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}
@ -704,7 +704,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}
@ -764,7 +764,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}

View file

@ -370,6 +370,23 @@ const static uint8_t test_output_pico_tlo[160] = {
#endif
#ifdef XMRIG_ALGO_CN_FEMTO
// "cn/upx2"
const static uint8_t test_output_femto_upx2[160] = {
0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
0xB0, 0x28, 0x9E, 0x2E, 0x5C, 0x36, 0x16, 0xE6, 0x1E, 0x78, 0x7A, 0x8F, 0xE4, 0x62, 0xEC, 0x5A,
0xFD, 0x58, 0xCC, 0x6F, 0x3C, 0xD3, 0x8A, 0x0B, 0x5B, 0x6C, 0x83, 0x4E, 0x9B, 0xD4, 0xC2, 0x5A,
0x43, 0x2C, 0x48, 0x98, 0xF3, 0x16, 0xCA, 0x87, 0xE9, 0x5F, 0x44, 0x93, 0x53, 0x48, 0x00, 0xA3,
0xE8, 0xE4, 0xB6, 0x9D, 0x5A, 0x3B, 0x49, 0x2C, 0x21, 0xE9, 0x4B, 0x02, 0xFC, 0x87, 0x8D, 0x75,
0x66, 0x05, 0xAF, 0xA3, 0x9D, 0xC9, 0xD8, 0x88, 0x2D, 0x67, 0x31, 0x21, 0x4C, 0x4D, 0x88, 0x7D,
0x86, 0x9E, 0x4D, 0x74, 0xF4, 0x4C, 0x57, 0x27, 0xCF, 0xEF, 0x86, 0x01, 0xB0, 0x52, 0x18, 0xF3,
0xAD, 0xE4, 0x52, 0x5E, 0xB0, 0x4A, 0x97, 0xB4, 0x96, 0x18, 0xB6, 0x9C, 0x93, 0x0E, 0x49, 0xBB,
};
#endif
#ifdef XMRIG_ALGO_ARGON2
// "argon2/chukwa"
const static uint8_t argon2_chukwa_test_out[160] = {

View file

@ -563,7 +563,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
constexpr CnAlgo<ALGO> props;
if (props.base() == Algorithm::CN_2) {
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
_mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx));
} else {
__m128i tmp = _mm_xor_si128(bx0, cx);
@ -715,7 +715,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}
@ -789,6 +789,7 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx * *ctx);
namespace xmrig {
@ -822,6 +823,8 @@ extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_upx2_mainloop_asm;
extern cn_mainloop_fun cn_upx2_double_mainloop_asm;
} // namespace xmrig
@ -934,6 +937,11 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
cn_double_mainloop_bulldozer_asm(ctx);
}
}
# ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) {
cn_upx2_mainloop_asm(ctx);
}
# endif
else if (props.isR()) {
ctx[0]->generated_code(ctx);
}
@ -976,6 +984,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
else if (ALGO == Algorithm::CN_PICO_TLO) {
cn_tlo_double_mainloop_sandybridge_asm(ctx);
}
# endif
# ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) {
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
cnv2_upx_double_mainloop_zen3_asm(ctx);
}
else {
cn_upx2_double_mainloop_asm(ctx);
}
}
# endif
else if (ALGO == Algorithm::CN_RWZ) {
cnv2_rwz_double_mainloop_asm(ctx);
@ -1134,7 +1152,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
} else {
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}
@ -1192,7 +1210,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) {
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
} else {
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
}
}
@ -1302,7 +1320,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
if (ALGO == Algorithm::CN_R) { \
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
} else { \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0)); \
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); \
} \
} \
if (ALGO == Algorithm::CN_R) { \

View file

@ -0,0 +1,320 @@
mov rax, rsp
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov rdi, QWORD PTR [rcx+8]
mov edx, 768
mov rbx, QWORD PTR [rcx]
mov ecx, 256
movaps XMMWORD PTR [rax-88], xmm6
movaps XMMWORD PTR [rax-104], xmm7
mov r13, QWORD PTR [rdi+224]
movq xmm0, QWORD PTR [rdi+104]
mov r12, QWORD PTR [rbx+224]
movaps XMMWORD PTR [rax-120], xmm8
movaps XMMWORD PTR [rax-136], xmm9
movaps XMMWORD PTR [rax-152], xmm10
movaps XMMWORD PTR [rsp+112], xmm11
movaps XMMWORD PTR [rsp+96], xmm12
movaps XMMWORD PTR [rsp+80], xmm13
movq xmm13, QWORD PTR [rbx+96]
movaps XMMWORD PTR [rsp+64], xmm14
movq xmm14, QWORD PTR [rbx+104]
movaps XMMWORD PTR [rsp+48], xmm15
movq xmm15, QWORD PTR [rdi+96]
mov QWORD PTR [rsp], r13
movdqa XMMWORD PTR [rsp+32], xmm0
stmxcsr DWORD PTR [rsp+24]
mov DWORD PTR [rsp+28], 24448
ldmxcsr DWORD PTR [rsp+28]
mov rcx, QWORD PTR [rbx+56]
xorps xmm12, xmm12
xor rcx, QWORD PTR [rbx+24]
mov rax, QWORD PTR [rbx+48]
xor rax, QWORD PTR [rbx+16]
mov rsi, QWORD PTR [rbx+32]
mov rbp, QWORD PTR [rdi+32]
movq xmm0, rcx
mov rcx, QWORD PTR [rbx+88]
xor rcx, QWORD PTR [rbx+72]
movq xmm7, rax
mov rax, QWORD PTR [rbx+80]
xor rax, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+40]
mov r15, QWORD PTR [rdi+40]
xor rsi, QWORD PTR [rbx]
xor rbp, QWORD PTR [rdi]
movq xmm9, rax
mov rax, QWORD PTR [rdi+48]
xor rax, QWORD PTR [rdi+16]
xor r14, QWORD PTR [rbx+8]
xor r15, QWORD PTR [rdi+8]
movq xmm8, rax
punpcklqdq xmm7, xmm0
mov eax, 1023
shl rax, 52
movq xmm11, rax
punpcklqdq xmm11, xmm11
mov rax, QWORD PTR [rdi+80]
movq xmm0, rcx
mov rcx, QWORD PTR [rdi+56]
xor rcx, QWORD PTR [rdi+24]
punpcklqdq xmm9, xmm0
mov QWORD PTR [rsp+8], 16384
movq xmm0, rcx
mov rcx, QWORD PTR [rdi+88]
xor rcx, QWORD PTR [rdi+72]
xor rax, QWORD PTR [rdi+64]
punpcklqdq xmm8, xmm0
movq xmm0, rcx
movq xmm10, rax
mov rax, 4389456576511
mov QWORD PTR [rsp+16], rax
mov rax, -4389456576512
mov QWORD PTR [rsp+216], rax
punpcklqdq xmm10, xmm0
ALIGN(64)
upx2_main_loop:
mov rdx, rsi
mov r9, rbp
and edx, 131056
and r9d, 131056
movdqu xmm6, XMMWORD PTR [rdx+r12]
lea r8, QWORD PTR [rdx+r12]
movdqu xmm4, XMMWORD PTR [r9+r13]
lea r10, QWORD PTR [r9+r13]
mov ecx, edx
mov eax, edx
xor rax, 32
xor rcx, 48
xor rdx, 16
movq xmm0, r14
movq xmm3, rsi
movq xmm5, rbp
punpcklqdq xmm3, xmm0
movq xmm0, r15
movdqu xmm2, XMMWORD PTR [rax+r12]
movdqu xmm1, XMMWORD PTR [rcx+r12]
paddq xmm2, xmm3
punpcklqdq xmm5, xmm0
paddq xmm1, xmm7
aesenc xmm6, xmm3
aesenc xmm4, xmm5
movdqa xmm0, xmm9
movq rdi, xmm4
paddq xmm0, XMMWORD PTR [rdx+r12]
movdqu XMMWORD PTR [rdx+r12], xmm0
xor edx, edx
movdqu XMMWORD PTR [rax+r12], xmm1
movdqa xmm0, xmm6
movdqu XMMWORD PTR [rcx+r12], xmm2
pxor xmm0, xmm7
movdqu XMMWORD PTR [r8], xmm0
mov ecx, r9d
xor rcx, 48
mov eax, r9d
xor rax, 32
xor r9, 16
movdqa xmm0, xmm10
movdqu xmm1, XMMWORD PTR [rcx+r13]
movdqu xmm2, XMMWORD PTR [rax+r13]
paddq xmm1, xmm8
paddq xmm0, XMMWORD PTR [r9+r13]
paddq xmm2, xmm5
movdqu XMMWORD PTR [r9+r13], xmm0
movq r9, xmm6
movdqu XMMWORD PTR [rax+r13], xmm1
movdqa xmm0, xmm4
movdqu XMMWORD PTR [rcx+r13], xmm2
pxor xmm0, xmm8
movdqu XMMWORD PTR [r10], xmm0
movq rcx, xmm14
mov rax, rcx
movq r10, xmm13
shl rax, 32
movdqa xmm0, xmm6
xor r10, rax
psrldq xmm0, 8
lea r8, QWORD PTR [rcx+rcx]
movq rax, xmm0
add r8d, r9d
mov ecx, -2147483647
or r8, rcx
mov r11, r9
div r8
and r11d, 131056
movaps xmm1, xmm12
mov eax, eax
add r11, r12
shl rdx, 32
add rdx, rax
xor r10, QWORD PTR [r11]
mov rbx, QWORD PTR [r11+8]
lea r8, QWORD PTR [rdx+r9]
movq xmm13, rdx
mov rax, r8
shr rax, 12
movq xmm0, rax
paddq xmm0, xmm11
sqrtsd xmm1, xmm0
movq rdx, xmm1
mov rax, rdx
shr rdx, 19
shr rax, 20
mov rcx, rdx
sub rcx, rax
add rax, QWORD PTR [rsp+216]
sub rcx, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp]
imul rcx, rax
mov rax, r10
sub rcx, r8
mov rcx, r9
adc rdx, 0
xor rcx, 32
and ecx, 131056
movq xmm14, rdx
movdqu xmm1, XMMWORD PTR [rcx+r12]
mul r9
paddq xmm1, xmm3
mov r8, rax
xor r8, QWORD PTR [rcx+r12+8]
add r14, r8
movq xmm0, rax
movq xmm2, rdx
xor rdx, QWORD PTR [rcx+r12]
mov rax, r9
xor rax, 48
punpcklqdq xmm2, xmm0
and eax, 131056
add rsi, rdx
xor r9, 16
xor edx, edx
and r9d, 131056
movdqu xmm0, XMMWORD PTR [rax+r12]
paddq xmm0, xmm7
pxor xmm2, XMMWORD PTR [r9+r12]
paddq xmm2, xmm9
movdqu XMMWORD PTR [r9+r12], xmm2
movq r9, xmm15
movdqu XMMWORD PTR [rcx+r12], xmm0
movdqa xmm0, xmm4
mov rcx, QWORD PTR [rsp+32]
movdqu XMMWORD PTR [rax+r12], xmm1
mov rax, rcx
shl rax, 32
movaps xmm1, xmm12
xor r9, rax
psrldq xmm0, 8
lea r8, QWORD PTR [rcx+rcx]
mov QWORD PTR [r11], rsi
add r8d, edi
mov QWORD PTR [r11+8], r14
movq rax, xmm0
mov ecx, -2147483647
or r8, rcx
xor rsi, r10
div r8
mov r10, rdi
xor r14, rbx
mov eax, eax
and r10d, 131056
shl rdx, 32
add r10, r13
add rdx, rax
xor r9, QWORD PTR [r10]
mov r11, QWORD PTR [r10+8]
lea r8, QWORD PTR [rdx+rdi]
mov rax, r8
movq xmm15, rdx
shr rax, 12
movq xmm0, rax
paddq xmm0, xmm11
sqrtsd xmm1, xmm0
movq rdx, xmm1
mov rax, rdx
shr rax, 20
shr rdx, 19
mov rcx, rdx
sub rcx, rax
sub rcx, QWORD PTR [rsp+16]
movdqa xmm9, xmm7
movdqa xmm7, xmm6
add rax, QWORD PTR [rsp+216]
imul rcx, rax
mov rax, r9
sub rcx, r8
mov rcx, rdi
adc rdx, 0
xor rcx, 32
and ecx, 131056
mov QWORD PTR [rsp+32], rdx
movdqu xmm1, XMMWORD PTR [rcx+r13]
mul rdi
paddq xmm1, xmm5
mov r8, rax
xor r8, QWORD PTR [rcx+r13+8]
add r15, r8
movq xmm0, rax
movq xmm2, rdx
xor rdx, QWORD PTR [rcx+r13]
mov rax, rdi
xor rdi, 16
punpcklqdq xmm2, xmm0
xor rax, 48
and edi, 131056
and eax, 131056
add rbp, rdx
pxor xmm2, XMMWORD PTR [rdi+r13]
movdqu xmm0, XMMWORD PTR [rax+r13]
paddq xmm2, xmm10
movdqu XMMWORD PTR [rdi+r13], xmm2
paddq xmm0, xmm8
movdqu XMMWORD PTR [rcx+r13], xmm0
movdqa xmm10, xmm8
movdqu XMMWORD PTR [rax+r13], xmm1
movdqa xmm8, xmm4
mov QWORD PTR [r10], rbp
xor rbp, r9
mov QWORD PTR [r10+8], r15
xor r15, r11
sub QWORD PTR [rsp+8], 1
jne upx2_main_loop
ldmxcsr DWORD PTR [rsp+24]
movaps xmm13, XMMWORD PTR [rsp+80]
lea r11, QWORD PTR [rsp+232]
movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56]
movaps xmm9, XMMWORD PTR [r11-72]
movaps xmm10, XMMWORD PTR [r11-88]
movaps xmm11, XMMWORD PTR [r11-104]
movaps xmm12, XMMWORD PTR [r11-120]
movaps xmm14, XMMWORD PTR [rsp+64]
movaps xmm15, XMMWORD PTR [rsp+48]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx

View file

@ -17,6 +17,7 @@
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
.global FN_PREFIX(cnv2_rwz_mainloop_asm)
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
@ -72,6 +73,15 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
sub rsp, 48
mov rcx, rdi
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
add rsp, 48
ret 0
mov eax, 3735929054
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View file

@ -48,5 +48,12 @@ cnv2_rwz_double_mainloop_asm PROC
mov eax, 3735929054
cnv2_rwz_double_mainloop_asm ENDP
ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm PROC
INCLUDE cn2/cnv2_upx_double_mainloop_zen3.inc
ret 0
mov eax, 3735929054
cnv2_upx_double_mainloop_zen3_asm ENDP
_TEXT_CNV2_MAINLOOP ENDS
END

View file

@ -0,0 +1,320 @@
mov rax, rsp
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 232
mov rdi, QWORD PTR [rcx+8]
mov edx, 768
mov rbx, QWORD PTR [rcx]
mov ecx, 256
movaps XMMWORD PTR [rax-88], xmm6
movaps XMMWORD PTR [rax-104], xmm7
mov r13, QWORD PTR [rdi+224]
movd xmm0, QWORD PTR [rdi+104]
mov r12, QWORD PTR [rbx+224]
movaps XMMWORD PTR [rax-120], xmm8
movaps XMMWORD PTR [rax-136], xmm9
movaps XMMWORD PTR [rax-152], xmm10
movaps XMMWORD PTR [rsp+112], xmm11
movaps XMMWORD PTR [rsp+96], xmm12
movaps XMMWORD PTR [rsp+80], xmm13
movd xmm13, QWORD PTR [rbx+96]
movaps XMMWORD PTR [rsp+64], xmm14
movd xmm14, QWORD PTR [rbx+104]
movaps XMMWORD PTR [rsp+48], xmm15
movd xmm15, QWORD PTR [rdi+96]
mov QWORD PTR [rsp], r13
movdqa XMMWORD PTR [rsp+32], xmm0
stmxcsr DWORD PTR [rsp+24]
mov DWORD PTR [rsp+28], 24448
ldmxcsr DWORD PTR [rsp+28]
mov rcx, QWORD PTR [rbx+56]
xorps xmm12, xmm12
xor rcx, QWORD PTR [rbx+24]
mov rax, QWORD PTR [rbx+48]
xor rax, QWORD PTR [rbx+16]
mov rsi, QWORD PTR [rbx+32]
mov rbp, QWORD PTR [rdi+32]
movd xmm0, rcx
mov rcx, QWORD PTR [rbx+88]
xor rcx, QWORD PTR [rbx+72]
movd xmm7, rax
mov rax, QWORD PTR [rbx+80]
xor rax, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+40]
mov r15, QWORD PTR [rdi+40]
xor rsi, QWORD PTR [rbx]
xor rbp, QWORD PTR [rdi]
movd xmm9, rax
mov rax, QWORD PTR [rdi+48]
xor rax, QWORD PTR [rdi+16]
xor r14, QWORD PTR [rbx+8]
xor r15, QWORD PTR [rdi+8]
movd xmm8, rax
punpcklqdq xmm7, xmm0
mov eax, 1023
shl rax, 52
movd xmm11, rax
punpcklqdq xmm11, xmm11
mov rax, QWORD PTR [rdi+80]
movd xmm0, rcx
mov rcx, QWORD PTR [rdi+56]
xor rcx, QWORD PTR [rdi+24]
punpcklqdq xmm9, xmm0
mov QWORD PTR [rsp+8], 16384
movd xmm0, rcx
mov rcx, QWORD PTR [rdi+88]
xor rcx, QWORD PTR [rdi+72]
xor rax, QWORD PTR [rdi+64]
punpcklqdq xmm8, xmm0
movd xmm0, rcx
movd xmm10, rax
mov rax, 4389456576511
mov QWORD PTR [rsp+16], rax
mov rax, -4389456576512
mov QWORD PTR [rsp+216], rax
punpcklqdq xmm10, xmm0
ALIGN(64)
upx2_main_loop:
mov rdx, rsi
mov r9, rbp
and edx, 131056
and r9d, 131056
movdqu xmm6, XMMWORD PTR [rdx+r12]
lea r8, QWORD PTR [rdx+r12]
movdqu xmm4, XMMWORD PTR [r9+r13]
lea r10, QWORD PTR [r9+r13]
mov ecx, edx
mov eax, edx
xor rax, 32
xor rcx, 48
xor rdx, 16
movd xmm0, r14
movd xmm3, rsi
movd xmm5, rbp
punpcklqdq xmm3, xmm0
movd xmm0, r15
movdqu xmm2, XMMWORD PTR [rax+r12]
movdqu xmm1, XMMWORD PTR [rcx+r12]
paddq xmm2, xmm3
punpcklqdq xmm5, xmm0
paddq xmm1, xmm7
aesenc xmm6, xmm3
aesenc xmm4, xmm5
movdqa xmm0, xmm9
movd rdi, xmm4
paddq xmm0, XMMWORD PTR [rdx+r12]
movdqu XMMWORD PTR [rdx+r12], xmm0
xor edx, edx
movdqu XMMWORD PTR [rax+r12], xmm1
movdqa xmm0, xmm6
movdqu XMMWORD PTR [rcx+r12], xmm2
pxor xmm0, xmm7
movdqu XMMWORD PTR [r8], xmm0
mov ecx, r9d
xor rcx, 48
mov eax, r9d
xor rax, 32
xor r9, 16
movdqa xmm0, xmm10
movdqu xmm1, XMMWORD PTR [rcx+r13]
movdqu xmm2, XMMWORD PTR [rax+r13]
paddq xmm1, xmm8
paddq xmm0, XMMWORD PTR [r9+r13]
paddq xmm2, xmm5
movdqu XMMWORD PTR [r9+r13], xmm0
movd r9, xmm6
movdqu XMMWORD PTR [rax+r13], xmm1
movdqa xmm0, xmm4
movdqu XMMWORD PTR [rcx+r13], xmm2
pxor xmm0, xmm8
movdqu XMMWORD PTR [r10], xmm0
movd rcx, xmm14
mov rax, rcx
movd r10, xmm13
shl rax, 32
movdqa xmm0, xmm6
xor r10, rax
psrldq xmm0, 8
lea r8, QWORD PTR [rcx+rcx]
movd rax, xmm0
add r8d, r9d
mov ecx, -2147483647
or r8, rcx
mov r11, r9
div r8
and r11d, 131056
movaps xmm1, xmm12
mov eax, eax
add r11, r12
shl rdx, 32
add rdx, rax
xor r10, QWORD PTR [r11]
mov rbx, QWORD PTR [r11+8]
lea r8, QWORD PTR [rdx+r9]
movd xmm13, rdx
mov rax, r8
shr rax, 12
movd xmm0, rax
paddq xmm0, xmm11
sqrtsd xmm1, xmm0
movd rdx, xmm1
mov rax, rdx
shr rdx, 19
shr rax, 20
mov rcx, rdx
sub rcx, rax
add rax, QWORD PTR [rsp+216]
sub rcx, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp]
imul rcx, rax
mov rax, r10
sub rcx, r8
mov rcx, r9
adc rdx, 0
xor rcx, 32
and ecx, 131056
movd xmm14, rdx
movdqu xmm1, XMMWORD PTR [rcx+r12]
mul r9
paddq xmm1, xmm3
mov r8, rax
xor r8, QWORD PTR [rcx+r12+8]
add r14, r8
movd xmm0, rax
movd xmm2, rdx
xor rdx, QWORD PTR [rcx+r12]
mov rax, r9
xor rax, 48
punpcklqdq xmm2, xmm0
and eax, 131056
add rsi, rdx
xor r9, 16
xor edx, edx
and r9d, 131056
movdqu xmm0, XMMWORD PTR [rax+r12]
paddq xmm0, xmm7
pxor xmm2, XMMWORD PTR [r9+r12]
paddq xmm2, xmm9
movdqu XMMWORD PTR [r9+r12], xmm2
movd r9, xmm15
movdqu XMMWORD PTR [rcx+r12], xmm0
movdqa xmm0, xmm4
mov rcx, QWORD PTR [rsp+32]
movdqu XMMWORD PTR [rax+r12], xmm1
mov rax, rcx
shl rax, 32
movaps xmm1, xmm12
xor r9, rax
psrldq xmm0, 8
lea r8, QWORD PTR [rcx+rcx]
mov QWORD PTR [r11], rsi
add r8d, edi
mov QWORD PTR [r11+8], r14
movd rax, xmm0
mov ecx, -2147483647
or r8, rcx
xor rsi, r10
div r8
mov r10, rdi
xor r14, rbx
mov eax, eax
and r10d, 131056
shl rdx, 32
add r10, r13
add rdx, rax
xor r9, QWORD PTR [r10]
mov r11, QWORD PTR [r10+8]
lea r8, QWORD PTR [rdx+rdi]
mov rax, r8
movd xmm15, rdx
shr rax, 12
movd xmm0, rax
paddq xmm0, xmm11
sqrtsd xmm1, xmm0
movd rdx, xmm1
mov rax, rdx
shr rax, 20
shr rdx, 19
mov rcx, rdx
sub rcx, rax
sub rcx, QWORD PTR [rsp+16]
movdqa xmm9, xmm7
movdqa xmm7, xmm6
add rax, QWORD PTR [rsp+216]
imul rcx, rax
mov rax, r9
sub rcx, r8
mov rcx, rdi
adc rdx, 0
xor rcx, 32
and ecx, 131056
mov QWORD PTR [rsp+32], rdx
movdqu xmm1, XMMWORD PTR [rcx+r13]
mul rdi
paddq xmm1, xmm5
mov r8, rax
xor r8, QWORD PTR [rcx+r13+8]
add r15, r8
movd xmm0, rax
movd xmm2, rdx
xor rdx, QWORD PTR [rcx+r13]
mov rax, rdi
xor rdi, 16
punpcklqdq xmm2, xmm0
xor rax, 48
and edi, 131056
and eax, 131056
add rbp, rdx
pxor xmm2, XMMWORD PTR [rdi+r13]
movdqu xmm0, XMMWORD PTR [rax+r13]
paddq xmm2, xmm10
movdqu XMMWORD PTR [rdi+r13], xmm2
paddq xmm0, xmm8
movdqu XMMWORD PTR [rcx+r13], xmm0
movdqa xmm10, xmm8
movdqu XMMWORD PTR [rax+r13], xmm1
movdqa xmm8, xmm4
mov QWORD PTR [r10], rbp
xor rbp, r9
mov QWORD PTR [r10+8], r15
xor r15, r11
sub QWORD PTR [rsp+8], 1
jne upx2_main_loop
ldmxcsr DWORD PTR [rsp+24]
movaps xmm13, XMMWORD PTR [rsp+80]
lea r11, QWORD PTR [rsp+232]
movaps xmm6, XMMWORD PTR [r11-24]
movaps xmm7, XMMWORD PTR [r11-40]
movaps xmm8, XMMWORD PTR [r11-56]
movaps xmm9, XMMWORD PTR [r11-72]
movaps xmm10, XMMWORD PTR [r11-88]
movaps xmm11, XMMWORD PTR [r11-104]
movaps xmm12, XMMWORD PTR [r11-120]
movaps xmm14, XMMWORD PTR [rsp+64]
movaps xmm15, XMMWORD PTR [rsp+48]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx

View file

@ -7,6 +7,7 @@
.global cnv2_double_mainloop_sandybridge_asm
.global cnv2_rwz_mainloop_asm
.global cnv2_rwz_double_mainloop_asm
.global cnv2_upx_double_mainloop_zen3_asm
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
@ -43,3 +44,9 @@ cnv2_rwz_double_mainloop_asm:
#include "cn2/cnv2_rwz_double_main_loop.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm:
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
ret 0
mov eax, 3735929054

View file

@ -48,5 +48,12 @@ cnv2_rwz_double_mainloop_asm PROC
mov eax, 3735929054
cnv2_rwz_double_mainloop_asm ENDP
ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm PROC
INCLUDE cn2/cnv2_upx_double_mainloop_zen3.inc
ret 0
mov eax, 3735929054
cnv2_upx_double_mainloop_zen3_asm ENDP
_TEXT_CNV2_MAINLOOP ENDS
END

View file

@ -41,7 +41,7 @@ public:
size_t size = 0;
inline bool isFullyAllocated() const { return allocated == total; }
inline double percent() const { return allocated == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
inline double percent() const { return total == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
inline void reset() { allocated = 0; total = 0; size = 0; }
inline HugePagesInfo &operator+=(const HugePagesInfo &other)

View file

@ -0,0 +1,17 @@
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
add rsp, 128

View file

@ -428,7 +428,10 @@ namespace randomx {
xmrig::RxFix::setMainLoopBounds(mainLoopBounds);
# endif
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
imul_rcp_storage = code + (ADDR(randomx_program_imul_rcp_store) - codePrologue) + 2;
imul_rcp_storage_used = 0;
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
codePos = codePosFirst;
prevCFROUND = 0;
@ -1012,13 +1015,24 @@ namespace randomx {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
*(uint32_t*)(p + pos) = 0xb848;
pos += 2;
emit64(randomx_reciprocal_fast(divisor), p, pos);
const uint32_t dst = instr.dst % RegistersCount;
emit32(0xc0af0f4c + (dst << 27), p, pos);
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
if (imul_rcp_storage_used < 16) {
*(uint64_t*)(imul_rcp_storage) = reciprocal;
*(uint64_t*)(p + pos) = 0x2444AF0F4Cull + (dst << 27) + (static_cast<uint64_t>(248 - imul_rcp_storage_used * 8) << 40);
++imul_rcp_storage_used;
imul_rcp_storage += 11;
pos += 6;
}
else {
*(uint32_t*)(p + pos) = 0xb848;
pos += 2;
emit64(reciprocal, p, pos);
emit32(0xc0af0f4c + (dst << 27), p, pos);
}
registerUsage[dst] = pos;
}

View file

@ -104,6 +104,9 @@ namespace randomx {
uint8_t* allocatedCode = nullptr;
size_t allocatedSize = 0;
uint8_t* imul_rcp_storage = nullptr;
uint32_t imul_rcp_storage_used = 0;
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
template<bool rax>

View file

@ -41,6 +41,7 @@
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_imul_rcp_store)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_loop_load_xop)
@ -106,11 +107,15 @@ DECL(randomx_program_prologue_first_load):
nop
nop
nop
jmp DECL(randomx_program_loop_begin)
jmp DECL(randomx_program_imul_rcp_store)
.balign 64
#include "asm/program_xmm_constants.inc"
DECL(randomx_program_imul_rcp_store):
#include "asm/program_imul_rcp_store.inc"
jmp DECL(randomx_program_loop_begin)
.balign 64
DECL(randomx_program_loop_begin):
nop

View file

@ -32,6 +32,7 @@ PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_imul_rcp_store
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_loop_load_xop
@ -94,12 +95,17 @@ randomx_program_prologue_first_load PROC
nop
nop
nop
jmp randomx_program_loop_begin
jmp randomx_program_imul_rcp_store
randomx_program_prologue_first_load ENDP
ALIGN 64
include asm/program_xmm_constants.inc
randomx_program_imul_rcp_store PROC
include asm/program_imul_rcp_store.inc
jmp randomx_program_loop_begin
randomx_program_imul_rcp_store ENDP
ALIGN 64
randomx_program_loop_begin PROC
nop

View file

@ -33,6 +33,7 @@ extern "C" {
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_imul_rcp_store();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_loop_load_xop();

View file

@ -28,15 +28,15 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "6.11.2"
#define APP_VERSION "6.12.0-dev"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
#define APP_KIND "miner"
#define APP_VER_MAJOR 6
#define APP_VER_MINOR 11
#define APP_VER_PATCH 2
#define APP_VER_MINOR 12
#define APP_VER_PATCH 0
#ifdef _MSC_VER
# if (_MSC_VER >= 1920)