mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-23 11:15:31 +00:00
Merge branch 'dev'
This commit is contained in:
commit
e2c757d9dd
43 changed files with 6619 additions and 5742 deletions
|
@ -1,3 +1,9 @@
|
|||
# v6.12.0
|
||||
- [#2276](https://github.com/xmrig/xmrig/pull/2276) Added support for Uplexa (`cn/upx2` algorithm).
|
||||
- [#2261](https://github.com/xmrig/xmrig/pull/2261) Show total hashrate if compiled without OpenCL.
|
||||
- [#2289](https://github.com/xmrig/xmrig/pull/2289) RandomX: optimized `IMUL_RCP` instruction.
|
||||
- Added support for `--user` command line option for online benchmark.
|
||||
|
||||
# v6.11.2
|
||||
- [#2207](https://github.com/xmrig/xmrig/issues/2207) Fixed regression in HTTP parser and llhttp updated to v5.1.0.
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ option(WITH_HWLOC "Enable hwloc support" ON)
|
|||
option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON)
|
||||
option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON)
|
||||
option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
|
||||
option(WITH_CN_FEMTO "Enable CryptoNight-UPX2 algorithm" ON)
|
||||
option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
|
||||
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
|
||||
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
|
||||
|
@ -196,6 +197,10 @@ if (WITH_CN_PICO)
|
|||
add_definitions(/DXMRIG_ALGO_CN_PICO)
|
||||
endif()
|
||||
|
||||
if (WITH_CN_FEMTO)
|
||||
add_definitions(/DXMRIG_ALGO_CN_FEMTO)
|
||||
endif()
|
||||
|
||||
if (WITH_EMBEDDED_CONFIG)
|
||||
add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG)
|
||||
endif()
|
||||
|
|
|
@ -334,13 +334,11 @@ void xmrig::CpuBackend::printHashrate(bool details)
|
|||
i++;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_FEATURE_OPENCL
|
||||
Log::print(WHITE_BOLD_S "| - | - | %7s | %7s | %7s |",
|
||||
Hashrate::format(hashrate()->calc(Hashrate::ShortInterval), num, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(Hashrate::MediumInterval), num + 8, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3)
|
||||
);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -197,6 +197,7 @@ void xmrig::CpuConfig::generate()
|
|||
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
|
||||
|
|
|
@ -100,6 +100,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<CpuThreads> &threads, uint32_
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
template<>
|
||||
size_t inline generate<Algorithm::CN_FEMTO>(Threads<CpuThreads>& threads, uint32_t limit)
|
||||
{
|
||||
return generate("cn/upx2", threads, Algorithm::CN_UPX2, limit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
template<>
|
||||
size_t inline generate<Algorithm::RANDOM_X>(Threads<CpuThreads> &threads, uint32_t limit)
|
||||
|
|
|
@ -193,6 +193,12 @@ bool xmrig::CpuWorker<N>::selfTest()
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
if (m_algorithm.family() == Algorithm::CN_FEMTO) {
|
||||
return verify(Algorithm::CN_UPX2, test_output_femto_upx2);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ARGON2
|
||||
if (m_algorithm.family() == Algorithm::ARGON2) {
|
||||
return verify(Algorithm::AR2_CHUKWA, argon2_chukwa_test_out) &&
|
||||
|
|
|
@ -309,26 +309,34 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
|
|||
return 1;
|
||||
}
|
||||
|
||||
Algorithm::Family f = algorithm.family();
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_LITE
|
||||
if (algorithm.family() == Algorithm::CN_LITE) {
|
||||
if (f == Algorithm::CN_LITE) {
|
||||
return CpuThreads(count, 1);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
if (algorithm.family() == Algorithm::CN_PICO) {
|
||||
if (f == Algorithm::CN_PICO) {
|
||||
return CpuThreads(count, 2);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
if (f == Algorithm::CN_FEMTO) {
|
||||
return CpuThreads(count, 2);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
if (algorithm.family() == Algorithm::CN_HEAVY) {
|
||||
if (f == Algorithm::CN_HEAVY) {
|
||||
return CpuThreads(std::max<size_t>(count / 4, 1), 1);
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
if (algorithm.family() == Algorithm::RANDOM_X) {
|
||||
if (f == Algorithm::RANDOM_X) {
|
||||
if (algorithm == Algorithm::RX_WOW) {
|
||||
return count;
|
||||
}
|
||||
|
@ -338,13 +346,13 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
|
|||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ARGON2
|
||||
if (algorithm.family() == Algorithm::ARGON2) {
|
||||
if (f == Algorithm::ARGON2) {
|
||||
return count;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
if (algorithm.family() == Algorithm::ASTROBWT) {
|
||||
if (f == Algorithm::ASTROBWT) {
|
||||
CpuThreads threads;
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
threads.add(i, 0);
|
||||
|
|
|
@ -336,11 +336,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
|
||||
size_t cacheHashes = ((L3 + extra) + (scratchpad / 2)) / scratchpad;
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
if (intensity && algorithm == Algorithm::CN_PICO_0 && (cacheHashes / PUs) >= 2) {
|
||||
Algorithm::Family family = algorithm.family();
|
||||
if (intensity && ((family == Algorithm::CN_PICO) || (family == Algorithm::CN_FEMTO)) && (cacheHashes / PUs) >= 2) {
|
||||
intensity = 2;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
if (extra == 0 && algorithm.l2() > 0) {
|
||||
|
|
|
@ -179,6 +179,7 @@ void xmrig::CudaConfig::generate()
|
|||
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::KAWPOW>(m_threads, devices);
|
||||
|
|
|
@ -102,6 +102,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
template<>
|
||||
size_t inline generate<Algorithm::CN_FEMTO>(Threads<CudaThreads>& threads, const std::vector<CudaDevice>& devices)
|
||||
{
|
||||
return generate("cn/upx2", threads, Algorithm::CN_UPX2, devices);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
template<>
|
||||
size_t inline generate<Algorithm::RANDOM_X>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
|
||||
|
|
|
@ -219,6 +219,7 @@ void xmrig::OclConfig::generate()
|
|||
count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::CN_FEMTO>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, devices);
|
||||
count += xmrig::generate<Algorithm::KAWPOW>(m_threads, devices);
|
||||
|
|
|
@ -101,6 +101,15 @@ size_t inline generate<Algorithm::CN_PICO>(Threads<OclThreads> &threads, const s
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
template<>
|
||||
size_t inline generate<Algorithm::CN_FEMTO>(Threads<OclThreads>& threads, const std::vector<OclDevice>& devices)
|
||||
{
|
||||
return generate("cn/upx2", threads, Algorithm::CN_UPX2, devices);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
template<>
|
||||
size_t inline generate<Algorithm::RANDOM_X>(Threads<OclThreads> &threads, const std::vector<OclDevice> &devices)
|
||||
|
|
|
@ -17,16 +17,17 @@
|
|||
#define ALGO_CN_PICO_0 16
|
||||
#define ALGO_CN_PICO_TLO 17
|
||||
#define ALGO_CN_CCX 18
|
||||
#define ALGO_RX_0 19
|
||||
#define ALGO_RX_WOW 20
|
||||
#define ALGO_RX_ARQMA 21
|
||||
#define ALGO_RX_SFX 22
|
||||
#define ALGO_RX_KEVA 23
|
||||
#define ALGO_AR2_CHUKWA 24
|
||||
#define ALGO_AR2_CHUKWA_V2 25
|
||||
#define ALGO_AR2_WRKZ 26
|
||||
#define ALGO_ASTROBWT_DERO 27
|
||||
#define ALGO_KAWPOW_RVN 28
|
||||
#define ALGO_CN_UPX2 19
|
||||
#define ALGO_RX_0 20
|
||||
#define ALGO_RX_WOW 21
|
||||
#define ALGO_RX_ARQMA 22
|
||||
#define ALGO_RX_SFX 23
|
||||
#define ALGO_RX_KEVA 24
|
||||
#define ALGO_AR2_CHUKWA 25
|
||||
#define ALGO_AR2_CHUKWA_V2 26
|
||||
#define ALGO_AR2_WRKZ 27
|
||||
#define ALGO_ASTROBWT_DERO 28
|
||||
#define ALGO_KAWPOW_RVN 29
|
||||
|
||||
#define FAMILY_UNKNOWN 0
|
||||
#define FAMILY_CN 1
|
||||
|
|
|
@ -514,7 +514,7 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
c = AES_Round(AES0, AES1, AES2, AES3, c, ((uint4 *)a)[0]);
|
||||
|
||||
{
|
||||
# if (ALGO == ALGO_CN_RWZ)
|
||||
# if ((ALGO == ALGO_CN_RWZ) || (ALGO == ALGO_CN_UPX2))
|
||||
const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3));
|
||||
const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
|
||||
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1));
|
||||
|
@ -561,7 +561,7 @@ __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
t ^= chunk2;
|
||||
const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
|
||||
|
||||
# if (ALGO == ALGO_CN_RWZ)
|
||||
# if ((ALGO == ALGO_CN_RWZ) || (ALGO == ALGO_CN_UPX2))
|
||||
SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + bx1);
|
||||
SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + bx0);
|
||||
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -88,7 +88,8 @@ static inline uint32_t getIntensity(const OclDevice &device, const Algorithm &al
|
|||
|
||||
static inline uint32_t getWorksize(const Algorithm &algorithm)
|
||||
{
|
||||
if (algorithm.family() == Algorithm::CN_PICO) {
|
||||
Algorithm::Family f = algorithm.family();
|
||||
if (f == Algorithm::CN_PICO || f == Algorithm::CN_FEMTO) {
|
||||
return 64;
|
||||
}
|
||||
|
||||
|
|
|
@ -39,10 +39,12 @@
|
|||
xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBaseRunner(index, data)
|
||||
{
|
||||
uint32_t stridedIndex = data.thread.stridedIndex();
|
||||
Algorithm::Family f = m_algorithm.family();
|
||||
|
||||
if (data.device.vendorId() == OCL_VENDOR_NVIDIA) {
|
||||
stridedIndex = 0;
|
||||
}
|
||||
else if (stridedIndex == 1 && (m_algorithm.family() == Algorithm::CN_PICO || (m_algorithm.family() == Algorithm::CN && CnAlgo<>::base(m_algorithm) == Algorithm::CN_2))) {
|
||||
else if (stridedIndex == 1 && (f == Algorithm::CN_PICO || f == Algorithm::CN_FEMTO || (f == Algorithm::CN && CnAlgo<>::base(m_algorithm) == Algorithm::CN_2))) {
|
||||
stridedIndex = 2;
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -129,6 +129,11 @@ static AlgoName const algorithm_names[] = {
|
|||
# endif
|
||||
{ "cryptonight/ccx", "cn/ccx", Algorithm::CN_CCX },
|
||||
{ "cryptonight/conceal", "cn/conceal", Algorithm::CN_CCX },
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
{ "cryptonight/upx2", "cn/upx2", Algorithm::CN_UPX2 },
|
||||
{ "cn-extremelite/upx2", nullptr, Algorithm::CN_UPX2 },
|
||||
{ "cryptonight-upx/2", nullptr, Algorithm::CN_UPX2 },
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -199,6 +204,9 @@ size_t xmrig::Algorithm::l3() const
|
|||
case CN_PICO:
|
||||
return oneMiB / 4;
|
||||
|
||||
case CN_FEMTO:
|
||||
return oneMiB / 8;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -329,6 +337,11 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
|
|||
return CN_PICO;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
case CN_UPX2:
|
||||
return CN_FEMTO;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
case RX_0:
|
||||
case RX_WOW:
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -64,6 +64,7 @@ public:
|
|||
CN_PICO_0, // "cn-pico" CryptoNight-Pico
|
||||
CN_PICO_TLO, // "cn-pico/tlo" CryptoNight-Pico (TLO)
|
||||
CN_CCX, // "cn/ccx" Conceal (CCX)
|
||||
CN_UPX2, // "cn/upx2" Uplexa (UPX2)
|
||||
RX_0, // "rx/0" RandomX (reference configuration).
|
||||
RX_WOW, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_ARQ, // "rx/arq" RandomARQ (Arqma).
|
||||
|
@ -83,6 +84,7 @@ public:
|
|||
CN_LITE,
|
||||
CN_HEAVY,
|
||||
CN_PICO,
|
||||
CN_FEMTO,
|
||||
RANDOM_X,
|
||||
ARGON2,
|
||||
ASTROBWT,
|
||||
|
@ -94,7 +96,7 @@ public:
|
|||
inline Algorithm(Id id) : m_id(id) {}
|
||||
Algorithm(const rapidjson::Value &value);
|
||||
|
||||
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
|
||||
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO || f == CN_FEMTO; }
|
||||
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
|
||||
inline bool isValid() const { return m_id != INVALID && family() != UNKNOWN; }
|
||||
inline const char *name() const { return name(false); }
|
||||
|
|
|
@ -334,6 +334,7 @@ void xmrig::BenchClient::send(Request request)
|
|||
{
|
||||
doc.AddMember(StringRef(BenchConfig::kSize), m_benchmark->size(), allocator);
|
||||
doc.AddMember(StringRef(BenchConfig::kAlgo), m_benchmark->algorithm().toJSON(), allocator);
|
||||
doc.AddMember(StringRef(BenchConfig::kUser), m_benchmark->user().toJSON(), allocator);
|
||||
doc.AddMember("version", APP_VERSION, allocator);
|
||||
doc.AddMember("threads", m_threads, allocator);
|
||||
doc.AddMember("steady_ready_ts", m_readyTime, allocator);
|
||||
|
|
|
@ -41,6 +41,7 @@ const char *BenchConfig::kSeed = "seed";
|
|||
const char *BenchConfig::kSize = "size";
|
||||
const char *BenchConfig::kSubmit = "submit";
|
||||
const char *BenchConfig::kToken = "token";
|
||||
const char *BenchConfig::kUser = "user";
|
||||
const char *BenchConfig::kVerify = "verify";
|
||||
|
||||
#ifndef XMRIG_DEBUG_BENCHMARK_API
|
||||
|
@ -59,8 +60,8 @@ xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson
|
|||
m_id(id),
|
||||
m_seed(Json::getString(object, kSeed)),
|
||||
m_token(Json::getString(object, kToken)),
|
||||
m_size(size),
|
||||
m_hash(0)
|
||||
m_user(Json::getString(object, kUser)),
|
||||
m_size(size)
|
||||
{
|
||||
if (!m_algorithm.isValid() || m_algorithm.family() != Algorithm::RANDOM_X) {
|
||||
m_algorithm = Algorithm::RX_0;
|
||||
|
@ -111,6 +112,7 @@ rapidjson::Value xmrig::BenchConfig::toJSON(rapidjson::Document &doc) const
|
|||
out.AddMember(StringRef(kVerify), m_id.toJSON(), allocator);
|
||||
out.AddMember(StringRef(kToken), m_token.toJSON(), allocator);
|
||||
out.AddMember(StringRef(kSeed), m_seed.toJSON(), allocator);
|
||||
out.AddMember(StringRef(kUser), m_user.toJSON(), allocator);
|
||||
|
||||
if (m_hash) {
|
||||
out.AddMember(StringRef(kHash), Value(fmt::format("{:016X}", m_hash).c_str(), allocator), allocator);
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
static const char *kSize;
|
||||
static const char *kSubmit;
|
||||
static const char *kToken;
|
||||
static const char *kUser;
|
||||
static const char *kVerify;
|
||||
|
||||
# ifndef XMRIG_DEBUG_BENCHMARK_API
|
||||
|
@ -59,6 +60,7 @@ public:
|
|||
inline const String &id() const { return m_id; }
|
||||
inline const String &seed() const { return m_seed; }
|
||||
inline const String &token() const { return m_token; }
|
||||
inline const String &user() const { return m_user; }
|
||||
inline uint32_t size() const { return m_size; }
|
||||
inline uint64_t hash() const { return m_hash; }
|
||||
|
||||
|
@ -73,8 +75,9 @@ private:
|
|||
String m_id;
|
||||
String m_seed;
|
||||
String m_token;
|
||||
String m_user;
|
||||
uint32_t m_size;
|
||||
uint64_t m_hash;
|
||||
uint64_t m_hash = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -262,6 +262,7 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
|
|||
case IConfig::BenchTokenKey: /* --token */
|
||||
case IConfig::BenchSeedKey: /* --seed */
|
||||
case IConfig::BenchHashKey: /* --hash */
|
||||
case IConfig::UserKey: /* --user */
|
||||
return transformBenchmark(doc, key, arg);
|
||||
# endif
|
||||
|
||||
|
@ -347,6 +348,9 @@ void xmrig::ConfigTransform::transformBenchmark(rapidjson::Document &doc, int ke
|
|||
|
||||
case IConfig::BenchHashKey: /* --hash */
|
||||
return set(doc, BenchConfig::kBenchmark, BenchConfig::kHash, arg);
|
||||
|
||||
case IConfig::UserKey: /* --user */
|
||||
return set(doc, BenchConfig::kBenchmark, BenchConfig::kUser, arg);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -96,6 +96,11 @@ public:
|
|||
return CN_ITER / 8;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
case Algorithm::CN_UPX2:
|
||||
return CN_ITER / 32;
|
||||
# endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -111,6 +116,12 @@ public:
|
|||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
if (algo == Algorithm::CN_UPX2) {
|
||||
return 0x1FFF0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return ((memory(algo) - 1) / 16) * 16;
|
||||
}
|
||||
|
||||
|
@ -149,6 +160,9 @@ public:
|
|||
# ifdef XMRIG_ALGO_CN_PICO
|
||||
case Algorithm::CN_PICO_0:
|
||||
case Algorithm::CN_PICO_TLO:
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
case Algorithm::CN_UPX2:
|
||||
# endif
|
||||
return Algorithm::CN_2;
|
||||
|
||||
|
@ -176,6 +190,7 @@ template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_FAST>::base() con
|
|||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_RTO>::base() const { return Algorithm::CN_1; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_LITE_1>::base() const { return Algorithm::CN_1; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_HEAVY_TUBE>::base() const { return Algorithm::CN_1; }
|
||||
template<> constexpr inline Algorithm::Id CnAlgo<Algorithm::CN_UPX2>::base() const { return Algorithm::CN_2; }
|
||||
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_FAST>::iterations() const { return CN_ITER / 2; }
|
||||
|
@ -192,6 +207,7 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() con
|
|||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() const { return CN_ITER / 32; }
|
||||
|
||||
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
|
||||
|
@ -201,9 +217,11 @@ template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_TUBE>::memory() co
|
|||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_HEAVY_XHV>::memory() const { return CN_MEMORY * 2; }
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_0>::memory() const { return CN_MEMORY / 8; }
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_TLO>::memory() const { return CN_MEMORY / 8; }
|
||||
template<> constexpr inline size_t CnAlgo<Algorithm::CN_UPX2>::memory() const { return CN_MEMORY / 16; }
|
||||
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
|
|
@ -99,8 +99,11 @@ cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
|
|||
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
|
||||
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_upx2_double_mainloop_asm = nullptr;
|
||||
|
||||
template<typename T, typename U>
|
||||
|
||||
template<Algorithm::Id SOURCE_ALGO = Algorithm::CN_2, typename T, typename U>
|
||||
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
|
||||
{
|
||||
auto p = reinterpret_cast<const uint8_t*>(src);
|
||||
|
@ -124,11 +127,11 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
|
|||
auto patched_data = reinterpret_cast<uint8_t*>(dst);
|
||||
for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) {
|
||||
switch (*(uint32_t*)(patched_data + i)) {
|
||||
case CnAlgo<Algorithm::CN_2>().iterations():
|
||||
case CnAlgo<SOURCE_ALGO>().iterations():
|
||||
*(uint32_t*)(patched_data + i) = iterations;
|
||||
break;
|
||||
|
||||
case CnAlgo<Algorithm::CN_2>().mask():
|
||||
case CnAlgo<SOURCE_ALGO>().mask():
|
||||
*(uint32_t*)(patched_data + i) = mask;
|
||||
break;
|
||||
}
|
||||
|
@ -138,7 +141,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
|
|||
|
||||
static void patchAsmVariants()
|
||||
{
|
||||
const int allocation_size = 81920;
|
||||
const int allocation_size = 131072;
|
||||
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
|
||||
|
||||
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
|
||||
|
@ -170,6 +173,11 @@ static void patchAsmVariants()
|
|||
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
cn_upx2_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x14000);
|
||||
cn_upx2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x15000);
|
||||
# endif
|
||||
|
||||
{
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
|
||||
|
||||
|
@ -219,6 +227,16 @@ static void patchAsmVariants()
|
|||
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
{
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_UPX2>().iterations();
|
||||
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_UPX2>().mask();
|
||||
|
||||
patchCode<Algorithm::CN_RWZ>(cn_upx2_mainloop_asm, cnv2_rwz_mainloop_asm, ITER, MASK);
|
||||
patchCode<Algorithm::CN_RWZ>(cn_upx2_double_mainloop_asm, cnv2_rwz_double_mainloop_asm, ITER, MASK);
|
||||
}
|
||||
#endif
|
||||
|
||||
VirtualMemory::protectRX(base, allocation_size);
|
||||
VirtualMemory::flushInstructionCache(base, allocation_size);
|
||||
}
|
||||
|
@ -272,6 +290,11 @@ xmrig::CnHash::CnHash()
|
|||
|
||||
ADD_FN(Algorithm::CN_CCX);
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
ADD_FN(Algorithm::CN_UPX2);
|
||||
ADD_FN_ASM(Algorithm::CN_UPX2);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_ARGON2
|
||||
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
|
||||
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2::single_hash<Algorithm::AR2_CHUKWA>;
|
||||
|
|
|
@ -395,7 +395,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
|||
uint64_t* mem_out = (uint64_t*)&l[idx];
|
||||
|
||||
if (props.base() == Algorithm::CN_2) {
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
_mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
|
||||
} else {
|
||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||
|
@ -528,7 +528,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -704,7 +704,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -764,7 +764,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -370,6 +370,23 @@ const static uint8_t test_output_pico_tlo[160] = {
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
// "cn/upx2"
|
||||
const static uint8_t test_output_femto_upx2[160] = {
|
||||
0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
|
||||
0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
|
||||
0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
|
||||
0xB0, 0x28, 0x9E, 0x2E, 0x5C, 0x36, 0x16, 0xE6, 0x1E, 0x78, 0x7A, 0x8F, 0xE4, 0x62, 0xEC, 0x5A,
|
||||
0xFD, 0x58, 0xCC, 0x6F, 0x3C, 0xD3, 0x8A, 0x0B, 0x5B, 0x6C, 0x83, 0x4E, 0x9B, 0xD4, 0xC2, 0x5A,
|
||||
0x43, 0x2C, 0x48, 0x98, 0xF3, 0x16, 0xCA, 0x87, 0xE9, 0x5F, 0x44, 0x93, 0x53, 0x48, 0x00, 0xA3,
|
||||
0xE8, 0xE4, 0xB6, 0x9D, 0x5A, 0x3B, 0x49, 0x2C, 0x21, 0xE9, 0x4B, 0x02, 0xFC, 0x87, 0x8D, 0x75,
|
||||
0x66, 0x05, 0xAF, 0xA3, 0x9D, 0xC9, 0xD8, 0x88, 0x2D, 0x67, 0x31, 0x21, 0x4C, 0x4D, 0x88, 0x7D,
|
||||
0x86, 0x9E, 0x4D, 0x74, 0xF4, 0x4C, 0x57, 0x27, 0xCF, 0xEF, 0x86, 0x01, 0xB0, 0x52, 0x18, 0xF3,
|
||||
0xAD, 0xE4, 0x52, 0x5E, 0xB0, 0x4A, 0x97, 0xB4, 0x96, 0x18, 0xB6, 0x9C, 0x93, 0x0E, 0x49, 0xBB,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_ARGON2
|
||||
// "argon2/chukwa"
|
||||
const static uint8_t argon2_chukwa_test_out[160] = {
|
||||
|
|
|
@ -563,7 +563,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
|
|||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
if (props.base() == Algorithm::CN_2) {
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(mem_out), _mm_xor_si128(bx0, cx));
|
||||
} else {
|
||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||
|
@ -715,7 +715,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -789,6 +789,7 @@ extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
|
|||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx * *ctx);
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -822,6 +823,8 @@ extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
|
|||
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
|
||||
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_upx2_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_upx2_double_mainloop_asm;
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
@ -934,6 +937,11 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
cn_double_mainloop_bulldozer_asm(ctx);
|
||||
}
|
||||
}
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
cn_upx2_mainloop_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
else if (props.isR()) {
|
||||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
@ -976,6 +984,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
else if (ALGO == Algorithm::CN_PICO_TLO) {
|
||||
cn_tlo_double_mainloop_sandybridge_asm(ctx);
|
||||
}
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_CN_FEMTO
|
||||
else if (ALGO == Algorithm::CN_UPX2) {
|
||||
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
|
||||
cnv2_upx_double_mainloop_zen3_asm(ctx);
|
||||
}
|
||||
else {
|
||||
cn_upx2_double_mainloop_asm(ctx);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
else if (ALGO == Algorithm::CN_RWZ) {
|
||||
cnv2_rwz_double_mainloop_asm(ctx);
|
||||
|
@ -1134,7 +1152,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1192,7 +1210,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) {
|
||||
VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
|
||||
} else {
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0));
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1302,7 +1320,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
if (ALGO == Algorithm::CN_R) { \
|
||||
VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
|
||||
} else { \
|
||||
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (ALGO == Algorithm::CN_RWZ ? 1 : 0)); \
|
||||
VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (((ALGO == Algorithm::CN_RWZ) || (ALGO == Algorithm::CN_UPX2)) ? 1 : 0)); \
|
||||
} \
|
||||
} \
|
||||
if (ALGO == Algorithm::CN_R) { \
|
||||
|
|
320
src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc
Normal file
320
src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc
Normal file
|
@ -0,0 +1,320 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov rdi, QWORD PTR [rcx+8]
|
||||
|
||||
mov edx, 768
|
||||
mov rbx, QWORD PTR [rcx]
|
||||
mov ecx, 256
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
mov r13, QWORD PTR [rdi+224]
|
||||
movq xmm0, QWORD PTR [rdi+104]
|
||||
mov r12, QWORD PTR [rbx+224]
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rax-136], xmm9
|
||||
movaps XMMWORD PTR [rax-152], xmm10
|
||||
movaps XMMWORD PTR [rsp+112], xmm11
|
||||
movaps XMMWORD PTR [rsp+96], xmm12
|
||||
movaps XMMWORD PTR [rsp+80], xmm13
|
||||
movq xmm13, QWORD PTR [rbx+96]
|
||||
movaps XMMWORD PTR [rsp+64], xmm14
|
||||
movq xmm14, QWORD PTR [rbx+104]
|
||||
movaps XMMWORD PTR [rsp+48], xmm15
|
||||
movq xmm15, QWORD PTR [rdi+96]
|
||||
mov QWORD PTR [rsp], r13
|
||||
movdqa XMMWORD PTR [rsp+32], xmm0
|
||||
|
||||
stmxcsr DWORD PTR [rsp+24]
|
||||
mov DWORD PTR [rsp+28], 24448
|
||||
ldmxcsr DWORD PTR [rsp+28]
|
||||
|
||||
mov rcx, QWORD PTR [rbx+56]
|
||||
xorps xmm12, xmm12
|
||||
xor rcx, QWORD PTR [rbx+24]
|
||||
mov rax, QWORD PTR [rbx+48]
|
||||
xor rax, QWORD PTR [rbx+16]
|
||||
mov rsi, QWORD PTR [rbx+32]
|
||||
mov rbp, QWORD PTR [rdi+32]
|
||||
movq xmm0, rcx
|
||||
|
||||
mov rcx, QWORD PTR [rbx+88]
|
||||
xor rcx, QWORD PTR [rbx+72]
|
||||
movq xmm7, rax
|
||||
mov rax, QWORD PTR [rbx+80]
|
||||
xor rax, QWORD PTR [rbx+64]
|
||||
mov r14, QWORD PTR [rbx+40]
|
||||
mov r15, QWORD PTR [rdi+40]
|
||||
xor rsi, QWORD PTR [rbx]
|
||||
xor rbp, QWORD PTR [rdi]
|
||||
movq xmm9, rax
|
||||
|
||||
mov rax, QWORD PTR [rdi+48]
|
||||
xor rax, QWORD PTR [rdi+16]
|
||||
xor r14, QWORD PTR [rbx+8]
|
||||
xor r15, QWORD PTR [rdi+8]
|
||||
movq xmm8, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm11, rax
|
||||
punpcklqdq xmm11, xmm11
|
||||
|
||||
mov rax, QWORD PTR [rdi+80]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdi+56]
|
||||
xor rcx, QWORD PTR [rdi+24]
|
||||
punpcklqdq xmm9, xmm0
|
||||
mov QWORD PTR [rsp+8], 16384
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdi+88]
|
||||
xor rcx, QWORD PTR [rdi+72]
|
||||
xor rax, QWORD PTR [rdi+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, rcx
|
||||
movq xmm10, rax
|
||||
mov rax, 4389456576511
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, -4389456576512
|
||||
mov QWORD PTR [rsp+216], rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
upx2_main_loop:
|
||||
mov rdx, rsi
|
||||
mov r9, rbp
|
||||
and edx, 131056
|
||||
and r9d, 131056
|
||||
movdqu xmm6, XMMWORD PTR [rdx+r12]
|
||||
lea r8, QWORD PTR [rdx+r12]
|
||||
movdqu xmm4, XMMWORD PTR [r9+r13]
|
||||
lea r10, QWORD PTR [r9+r13]
|
||||
mov ecx, edx
|
||||
mov eax, edx
|
||||
xor rax, 32
|
||||
xor rcx, 48
|
||||
xor rdx, 16
|
||||
movq xmm0, r14
|
||||
movq xmm3, rsi
|
||||
movq xmm5, rbp
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, r15
|
||||
movdqu xmm2, XMMWORD PTR [rax+r12]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r12]
|
||||
paddq xmm2, xmm3
|
||||
punpcklqdq xmm5, xmm0
|
||||
paddq xmm1, xmm7
|
||||
aesenc xmm6, xmm3
|
||||
aesenc xmm4, xmm5
|
||||
movdqa xmm0, xmm9
|
||||
movq rdi, xmm4
|
||||
paddq xmm0, XMMWORD PTR [rdx+r12]
|
||||
movdqu XMMWORD PTR [rdx+r12], xmm0
|
||||
xor edx, edx
|
||||
movdqu XMMWORD PTR [rax+r12], xmm1
|
||||
movdqa xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rcx+r12], xmm2
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov ecx, r9d
|
||||
xor rcx, 48
|
||||
mov eax, r9d
|
||||
xor rax, 32
|
||||
xor r9, 16
|
||||
movdqa xmm0, xmm10
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r13]
|
||||
movdqu xmm2, XMMWORD PTR [rax+r13]
|
||||
paddq xmm1, xmm8
|
||||
paddq xmm0, XMMWORD PTR [r9+r13]
|
||||
paddq xmm2, xmm5
|
||||
movdqu XMMWORD PTR [r9+r13], xmm0
|
||||
movq r9, xmm6
|
||||
movdqu XMMWORD PTR [rax+r13], xmm1
|
||||
movdqa xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rcx+r13], xmm2
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [r10], xmm0
|
||||
movq rcx, xmm14
|
||||
mov rax, rcx
|
||||
movq r10, xmm13
|
||||
shl rax, 32
|
||||
movdqa xmm0, xmm6
|
||||
xor r10, rax
|
||||
psrldq xmm0, 8
|
||||
lea r8, QWORD PTR [rcx+rcx]
|
||||
movq rax, xmm0
|
||||
add r8d, r9d
|
||||
mov ecx, -2147483647
|
||||
or r8, rcx
|
||||
mov r11, r9
|
||||
div r8
|
||||
and r11d, 131056
|
||||
movaps xmm1, xmm12
|
||||
mov eax, eax
|
||||
add r11, r12
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
xor r10, QWORD PTR [r11]
|
||||
mov rbx, QWORD PTR [r11+8]
|
||||
lea r8, QWORD PTR [rdx+r9]
|
||||
movq xmm13, rdx
|
||||
mov rax, r8
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm11
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdx, xmm1
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, QWORD PTR [rsp+216]
|
||||
sub rcx, QWORD PTR [rsp+16]
|
||||
mov r13, QWORD PTR [rsp]
|
||||
imul rcx, rax
|
||||
mov rax, r10
|
||||
sub rcx, r8
|
||||
mov rcx, r9
|
||||
adc rdx, 0
|
||||
xor rcx, 32
|
||||
and ecx, 131056
|
||||
movq xmm14, rdx
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r12]
|
||||
mul r9
|
||||
paddq xmm1, xmm3
|
||||
mov r8, rax
|
||||
xor r8, QWORD PTR [rcx+r12+8]
|
||||
add r14, r8
|
||||
movq xmm0, rax
|
||||
movq xmm2, rdx
|
||||
xor rdx, QWORD PTR [rcx+r12]
|
||||
mov rax, r9
|
||||
xor rax, 48
|
||||
punpcklqdq xmm2, xmm0
|
||||
and eax, 131056
|
||||
add rsi, rdx
|
||||
xor r9, 16
|
||||
xor edx, edx
|
||||
and r9d, 131056
|
||||
movdqu xmm0, XMMWORD PTR [rax+r12]
|
||||
paddq xmm0, xmm7
|
||||
pxor xmm2, XMMWORD PTR [r9+r12]
|
||||
paddq xmm2, xmm9
|
||||
movdqu XMMWORD PTR [r9+r12], xmm2
|
||||
movq r9, xmm15
|
||||
movdqu XMMWORD PTR [rcx+r12], xmm0
|
||||
movdqa xmm0, xmm4
|
||||
mov rcx, QWORD PTR [rsp+32]
|
||||
movdqu XMMWORD PTR [rax+r12], xmm1
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
movaps xmm1, xmm12
|
||||
xor r9, rax
|
||||
psrldq xmm0, 8
|
||||
lea r8, QWORD PTR [rcx+rcx]
|
||||
mov QWORD PTR [r11], rsi
|
||||
add r8d, edi
|
||||
mov QWORD PTR [r11+8], r14
|
||||
movq rax, xmm0
|
||||
mov ecx, -2147483647
|
||||
or r8, rcx
|
||||
xor rsi, r10
|
||||
div r8
|
||||
mov r10, rdi
|
||||
xor r14, rbx
|
||||
mov eax, eax
|
||||
and r10d, 131056
|
||||
shl rdx, 32
|
||||
add r10, r13
|
||||
add rdx, rax
|
||||
xor r9, QWORD PTR [r10]
|
||||
mov r11, QWORD PTR [r10+8]
|
||||
lea r8, QWORD PTR [rdx+rdi]
|
||||
mov rax, r8
|
||||
movq xmm15, rdx
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm11
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdx, xmm1
|
||||
mov rax, rdx
|
||||
shr rax, 20
|
||||
shr rdx, 19
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
sub rcx, QWORD PTR [rsp+16]
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
add rax, QWORD PTR [rsp+216]
|
||||
imul rcx, rax
|
||||
mov rax, r9
|
||||
sub rcx, r8
|
||||
mov rcx, rdi
|
||||
adc rdx, 0
|
||||
xor rcx, 32
|
||||
and ecx, 131056
|
||||
mov QWORD PTR [rsp+32], rdx
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r13]
|
||||
mul rdi
|
||||
paddq xmm1, xmm5
|
||||
mov r8, rax
|
||||
xor r8, QWORD PTR [rcx+r13+8]
|
||||
add r15, r8
|
||||
movq xmm0, rax
|
||||
movq xmm2, rdx
|
||||
xor rdx, QWORD PTR [rcx+r13]
|
||||
mov rax, rdi
|
||||
xor rdi, 16
|
||||
punpcklqdq xmm2, xmm0
|
||||
xor rax, 48
|
||||
and edi, 131056
|
||||
and eax, 131056
|
||||
add rbp, rdx
|
||||
pxor xmm2, XMMWORD PTR [rdi+r13]
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
paddq xmm2, xmm10
|
||||
movdqu XMMWORD PTR [rdi+r13], xmm2
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+r13], xmm0
|
||||
movdqa xmm10, xmm8
|
||||
movdqu XMMWORD PTR [rax+r13], xmm1
|
||||
movdqa xmm8, xmm4
|
||||
mov QWORD PTR [r10], rbp
|
||||
xor rbp, r9
|
||||
mov QWORD PTR [r10+8], r15
|
||||
xor r15, r11
|
||||
sub QWORD PTR [rsp+8], 1
|
||||
jne upx2_main_loop
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+24]
|
||||
|
||||
movaps xmm13, XMMWORD PTR [rsp+80]
|
||||
lea r11, QWORD PTR [rsp+232]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+64]
|
||||
movaps xmm15, XMMWORD PTR [rsp+48]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
|
@ -17,6 +17,7 @@
|
|||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_rwz_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
|
@ -72,6 +73,15 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
|
|||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
|
|
@ -48,5 +48,12 @@ cnv2_rwz_double_mainloop_asm PROC
|
|||
mov eax, 3735929054
|
||||
cnv2_rwz_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_upx_double_mainloop_zen3_asm PROC
|
||||
INCLUDE cn2/cnv2_upx_double_mainloop_zen3.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_upx_double_mainloop_zen3_asm ENDP
|
||||
|
||||
_TEXT_CNV2_MAINLOOP ENDS
|
||||
END
|
||||
|
|
320
src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc
Normal file
320
src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc
Normal file
|
@ -0,0 +1,320 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov rdi, QWORD PTR [rcx+8]
|
||||
|
||||
mov edx, 768
|
||||
mov rbx, QWORD PTR [rcx]
|
||||
mov ecx, 256
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
mov r13, QWORD PTR [rdi+224]
|
||||
movd xmm0, QWORD PTR [rdi+104]
|
||||
mov r12, QWORD PTR [rbx+224]
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rax-136], xmm9
|
||||
movaps XMMWORD PTR [rax-152], xmm10
|
||||
movaps XMMWORD PTR [rsp+112], xmm11
|
||||
movaps XMMWORD PTR [rsp+96], xmm12
|
||||
movaps XMMWORD PTR [rsp+80], xmm13
|
||||
movd xmm13, QWORD PTR [rbx+96]
|
||||
movaps XMMWORD PTR [rsp+64], xmm14
|
||||
movd xmm14, QWORD PTR [rbx+104]
|
||||
movaps XMMWORD PTR [rsp+48], xmm15
|
||||
movd xmm15, QWORD PTR [rdi+96]
|
||||
mov QWORD PTR [rsp], r13
|
||||
movdqa XMMWORD PTR [rsp+32], xmm0
|
||||
|
||||
stmxcsr DWORD PTR [rsp+24]
|
||||
mov DWORD PTR [rsp+28], 24448
|
||||
ldmxcsr DWORD PTR [rsp+28]
|
||||
|
||||
mov rcx, QWORD PTR [rbx+56]
|
||||
xorps xmm12, xmm12
|
||||
xor rcx, QWORD PTR [rbx+24]
|
||||
mov rax, QWORD PTR [rbx+48]
|
||||
xor rax, QWORD PTR [rbx+16]
|
||||
mov rsi, QWORD PTR [rbx+32]
|
||||
mov rbp, QWORD PTR [rdi+32]
|
||||
movd xmm0, rcx
|
||||
|
||||
mov rcx, QWORD PTR [rbx+88]
|
||||
xor rcx, QWORD PTR [rbx+72]
|
||||
movd xmm7, rax
|
||||
mov rax, QWORD PTR [rbx+80]
|
||||
xor rax, QWORD PTR [rbx+64]
|
||||
mov r14, QWORD PTR [rbx+40]
|
||||
mov r15, QWORD PTR [rdi+40]
|
||||
xor rsi, QWORD PTR [rbx]
|
||||
xor rbp, QWORD PTR [rdi]
|
||||
movd xmm9, rax
|
||||
|
||||
mov rax, QWORD PTR [rdi+48]
|
||||
xor rax, QWORD PTR [rdi+16]
|
||||
xor r14, QWORD PTR [rbx+8]
|
||||
xor r15, QWORD PTR [rdi+8]
|
||||
movd xmm8, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movd xmm11, rax
|
||||
punpcklqdq xmm11, xmm11
|
||||
|
||||
mov rax, QWORD PTR [rdi+80]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdi+56]
|
||||
xor rcx, QWORD PTR [rdi+24]
|
||||
punpcklqdq xmm9, xmm0
|
||||
mov QWORD PTR [rsp+8], 16384
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdi+88]
|
||||
xor rcx, QWORD PTR [rdi+72]
|
||||
xor rax, QWORD PTR [rdi+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
movd xmm0, rcx
|
||||
movd xmm10, rax
|
||||
mov rax, 4389456576511
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, -4389456576512
|
||||
mov QWORD PTR [rsp+216], rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
upx2_main_loop:
|
||||
mov rdx, rsi
|
||||
mov r9, rbp
|
||||
and edx, 131056
|
||||
and r9d, 131056
|
||||
movdqu xmm6, XMMWORD PTR [rdx+r12]
|
||||
lea r8, QWORD PTR [rdx+r12]
|
||||
movdqu xmm4, XMMWORD PTR [r9+r13]
|
||||
lea r10, QWORD PTR [r9+r13]
|
||||
mov ecx, edx
|
||||
mov eax, edx
|
||||
xor rax, 32
|
||||
xor rcx, 48
|
||||
xor rdx, 16
|
||||
movd xmm0, r14
|
||||
movd xmm3, rsi
|
||||
movd xmm5, rbp
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, r15
|
||||
movdqu xmm2, XMMWORD PTR [rax+r12]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r12]
|
||||
paddq xmm2, xmm3
|
||||
punpcklqdq xmm5, xmm0
|
||||
paddq xmm1, xmm7
|
||||
aesenc xmm6, xmm3
|
||||
aesenc xmm4, xmm5
|
||||
movdqa xmm0, xmm9
|
||||
movd rdi, xmm4
|
||||
paddq xmm0, XMMWORD PTR [rdx+r12]
|
||||
movdqu XMMWORD PTR [rdx+r12], xmm0
|
||||
xor edx, edx
|
||||
movdqu XMMWORD PTR [rax+r12], xmm1
|
||||
movdqa xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rcx+r12], xmm2
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov ecx, r9d
|
||||
xor rcx, 48
|
||||
mov eax, r9d
|
||||
xor rax, 32
|
||||
xor r9, 16
|
||||
movdqa xmm0, xmm10
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r13]
|
||||
movdqu xmm2, XMMWORD PTR [rax+r13]
|
||||
paddq xmm1, xmm8
|
||||
paddq xmm0, XMMWORD PTR [r9+r13]
|
||||
paddq xmm2, xmm5
|
||||
movdqu XMMWORD PTR [r9+r13], xmm0
|
||||
movd r9, xmm6
|
||||
movdqu XMMWORD PTR [rax+r13], xmm1
|
||||
movdqa xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rcx+r13], xmm2
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [r10], xmm0
|
||||
movd rcx, xmm14
|
||||
mov rax, rcx
|
||||
movd r10, xmm13
|
||||
shl rax, 32
|
||||
movdqa xmm0, xmm6
|
||||
xor r10, rax
|
||||
psrldq xmm0, 8
|
||||
lea r8, QWORD PTR [rcx+rcx]
|
||||
movd rax, xmm0
|
||||
add r8d, r9d
|
||||
mov ecx, -2147483647
|
||||
or r8, rcx
|
||||
mov r11, r9
|
||||
div r8
|
||||
and r11d, 131056
|
||||
movaps xmm1, xmm12
|
||||
mov eax, eax
|
||||
add r11, r12
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
xor r10, QWORD PTR [r11]
|
||||
mov rbx, QWORD PTR [r11+8]
|
||||
lea r8, QWORD PTR [rdx+r9]
|
||||
movd xmm13, rdx
|
||||
mov rax, r8
|
||||
shr rax, 12
|
||||
movd xmm0, rax
|
||||
paddq xmm0, xmm11
|
||||
sqrtsd xmm1, xmm0
|
||||
movd rdx, xmm1
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, QWORD PTR [rsp+216]
|
||||
sub rcx, QWORD PTR [rsp+16]
|
||||
mov r13, QWORD PTR [rsp]
|
||||
imul rcx, rax
|
||||
mov rax, r10
|
||||
sub rcx, r8
|
||||
mov rcx, r9
|
||||
adc rdx, 0
|
||||
xor rcx, 32
|
||||
and ecx, 131056
|
||||
movd xmm14, rdx
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r12]
|
||||
mul r9
|
||||
paddq xmm1, xmm3
|
||||
mov r8, rax
|
||||
xor r8, QWORD PTR [rcx+r12+8]
|
||||
add r14, r8
|
||||
movd xmm0, rax
|
||||
movd xmm2, rdx
|
||||
xor rdx, QWORD PTR [rcx+r12]
|
||||
mov rax, r9
|
||||
xor rax, 48
|
||||
punpcklqdq xmm2, xmm0
|
||||
and eax, 131056
|
||||
add rsi, rdx
|
||||
xor r9, 16
|
||||
xor edx, edx
|
||||
and r9d, 131056
|
||||
movdqu xmm0, XMMWORD PTR [rax+r12]
|
||||
paddq xmm0, xmm7
|
||||
pxor xmm2, XMMWORD PTR [r9+r12]
|
||||
paddq xmm2, xmm9
|
||||
movdqu XMMWORD PTR [r9+r12], xmm2
|
||||
movd r9, xmm15
|
||||
movdqu XMMWORD PTR [rcx+r12], xmm0
|
||||
movdqa xmm0, xmm4
|
||||
mov rcx, QWORD PTR [rsp+32]
|
||||
movdqu XMMWORD PTR [rax+r12], xmm1
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
movaps xmm1, xmm12
|
||||
xor r9, rax
|
||||
psrldq xmm0, 8
|
||||
lea r8, QWORD PTR [rcx+rcx]
|
||||
mov QWORD PTR [r11], rsi
|
||||
add r8d, edi
|
||||
mov QWORD PTR [r11+8], r14
|
||||
movd rax, xmm0
|
||||
mov ecx, -2147483647
|
||||
or r8, rcx
|
||||
xor rsi, r10
|
||||
div r8
|
||||
mov r10, rdi
|
||||
xor r14, rbx
|
||||
mov eax, eax
|
||||
and r10d, 131056
|
||||
shl rdx, 32
|
||||
add r10, r13
|
||||
add rdx, rax
|
||||
xor r9, QWORD PTR [r10]
|
||||
mov r11, QWORD PTR [r10+8]
|
||||
lea r8, QWORD PTR [rdx+rdi]
|
||||
mov rax, r8
|
||||
movd xmm15, rdx
|
||||
shr rax, 12
|
||||
movd xmm0, rax
|
||||
paddq xmm0, xmm11
|
||||
sqrtsd xmm1, xmm0
|
||||
movd rdx, xmm1
|
||||
mov rax, rdx
|
||||
shr rax, 20
|
||||
shr rdx, 19
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
sub rcx, QWORD PTR [rsp+16]
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
add rax, QWORD PTR [rsp+216]
|
||||
imul rcx, rax
|
||||
mov rax, r9
|
||||
sub rcx, r8
|
||||
mov rcx, rdi
|
||||
adc rdx, 0
|
||||
xor rcx, 32
|
||||
and ecx, 131056
|
||||
mov QWORD PTR [rsp+32], rdx
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r13]
|
||||
mul rdi
|
||||
paddq xmm1, xmm5
|
||||
mov r8, rax
|
||||
xor r8, QWORD PTR [rcx+r13+8]
|
||||
add r15, r8
|
||||
movd xmm0, rax
|
||||
movd xmm2, rdx
|
||||
xor rdx, QWORD PTR [rcx+r13]
|
||||
mov rax, rdi
|
||||
xor rdi, 16
|
||||
punpcklqdq xmm2, xmm0
|
||||
xor rax, 48
|
||||
and edi, 131056
|
||||
and eax, 131056
|
||||
add rbp, rdx
|
||||
pxor xmm2, XMMWORD PTR [rdi+r13]
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
paddq xmm2, xmm10
|
||||
movdqu XMMWORD PTR [rdi+r13], xmm2
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+r13], xmm0
|
||||
movdqa xmm10, xmm8
|
||||
movdqu XMMWORD PTR [rax+r13], xmm1
|
||||
movdqa xmm8, xmm4
|
||||
mov QWORD PTR [r10], rbp
|
||||
xor rbp, r9
|
||||
mov QWORD PTR [r10+8], r15
|
||||
xor r15, r11
|
||||
sub QWORD PTR [rsp+8], 1
|
||||
jne upx2_main_loop
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+24]
|
||||
|
||||
movaps xmm13, XMMWORD PTR [rsp+80]
|
||||
lea r11, QWORD PTR [rsp+232]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+64]
|
||||
movaps xmm15, XMMWORD PTR [rsp+48]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
|
@ -7,6 +7,7 @@
|
|||
.global cnv2_double_mainloop_sandybridge_asm
|
||||
.global cnv2_rwz_mainloop_asm
|
||||
.global cnv2_rwz_double_mainloop_asm
|
||||
.global cnv2_upx_double_mainloop_zen3_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
|
@ -43,3 +44,9 @@ cnv2_rwz_double_mainloop_asm:
|
|||
#include "cn2/cnv2_rwz_double_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_upx_double_mainloop_zen3_asm:
|
||||
#include "cn2/cnv2_upx_double_mainloop_zen3.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
|
|
@ -48,5 +48,12 @@ cnv2_rwz_double_mainloop_asm PROC
|
|||
mov eax, 3735929054
|
||||
cnv2_rwz_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_upx_double_mainloop_zen3_asm PROC
|
||||
INCLUDE cn2/cnv2_upx_double_mainloop_zen3.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_upx_double_mainloop_zen3_asm ENDP
|
||||
|
||||
_TEXT_CNV2_MAINLOOP ENDS
|
||||
END
|
||||
|
|
|
@ -41,7 +41,7 @@ public:
|
|||
size_t size = 0;
|
||||
|
||||
inline bool isFullyAllocated() const { return allocated == total; }
|
||||
inline double percent() const { return allocated == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
|
||||
inline double percent() const { return total == 0 ? 0.0 : static_cast<double>(allocated) / total * 100.0; }
|
||||
inline void reset() { allocated = 0; total = 0; size = 0; }
|
||||
|
||||
inline HugePagesInfo &operator+=(const HugePagesInfo &other)
|
||||
|
|
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
17
src/crypto/randomx/asm/program_imul_rcp_store.inc
Normal file
|
@ -0,0 +1,17 @@
|
|||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
db 72, 185, 0, 0, 0, 0, 0, 0, 0, 0, 81
|
||||
add rsp, 128
|
|
@ -428,7 +428,10 @@ namespace randomx {
|
|||
xmrig::RxFix::setMainLoopBounds(mainLoopBounds);
|
||||
# endif
|
||||
|
||||
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
imul_rcp_storage = code + (ADDR(randomx_program_imul_rcp_store) - codePrologue) + 2;
|
||||
imul_rcp_storage_used = 0;
|
||||
|
||||
memcpy(imul_rcp_storage - 34, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
codePos = codePosFirst;
|
||||
prevCFROUND = 0;
|
||||
|
||||
|
@ -1012,13 +1015,24 @@ namespace randomx {
|
|||
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
*(uint32_t*)(p + pos) = 0xb848;
|
||||
pos += 2;
|
||||
|
||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||
|
||||
const uint32_t dst = instr.dst % RegistersCount;
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
|
||||
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
|
||||
if (imul_rcp_storage_used < 16) {
|
||||
*(uint64_t*)(imul_rcp_storage) = reciprocal;
|
||||
*(uint64_t*)(p + pos) = 0x2444AF0F4Cull + (dst << 27) + (static_cast<uint64_t>(248 - imul_rcp_storage_used * 8) << 40);
|
||||
++imul_rcp_storage_used;
|
||||
imul_rcp_storage += 11;
|
||||
pos += 6;
|
||||
}
|
||||
else {
|
||||
*(uint32_t*)(p + pos) = 0xb848;
|
||||
pos += 2;
|
||||
|
||||
emit64(reciprocal, p, pos);
|
||||
|
||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||
}
|
||||
|
||||
registerUsage[dst] = pos;
|
||||
}
|
||||
|
|
|
@ -104,6 +104,9 @@ namespace randomx {
|
|||
uint8_t* allocatedCode = nullptr;
|
||||
size_t allocatedSize = 0;
|
||||
|
||||
uint8_t* imul_rcp_storage = nullptr;
|
||||
uint32_t imul_rcp_storage_used = 0;
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
template<bool rax>
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
.global DECL(randomx_prefetch_scratchpad_end)
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_prologue_first_load)
|
||||
.global DECL(randomx_program_imul_rcp_store)
|
||||
.global DECL(randomx_program_loop_begin)
|
||||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_loop_load_xop)
|
||||
|
@ -106,11 +107,15 @@ DECL(randomx_program_prologue_first_load):
|
|||
nop
|
||||
nop
|
||||
nop
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
jmp DECL(randomx_program_imul_rcp_store)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
DECL(randomx_program_imul_rcp_store):
|
||||
#include "asm/program_imul_rcp_store.inc"
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_loop_begin):
|
||||
nop
|
||||
|
|
|
@ -32,6 +32,7 @@ PUBLIC randomx_prefetch_scratchpad
|
|||
PUBLIC randomx_prefetch_scratchpad_end
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_prologue_first_load
|
||||
PUBLIC randomx_program_imul_rcp_store
|
||||
PUBLIC randomx_program_loop_begin
|
||||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_loop_load_xop
|
||||
|
@ -94,12 +95,17 @@ randomx_program_prologue_first_load PROC
|
|||
nop
|
||||
nop
|
||||
nop
|
||||
jmp randomx_program_loop_begin
|
||||
jmp randomx_program_imul_rcp_store
|
||||
randomx_program_prologue_first_load ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
randomx_program_imul_rcp_store PROC
|
||||
include asm/program_imul_rcp_store.inc
|
||||
jmp randomx_program_loop_begin
|
||||
randomx_program_imul_rcp_store ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_loop_begin PROC
|
||||
nop
|
||||
|
|
|
@ -33,6 +33,7 @@ extern "C" {
|
|||
void randomx_prefetch_scratchpad_end();
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_prologue_first_load();
|
||||
void randomx_program_imul_rcp_store();
|
||||
void randomx_program_loop_begin();
|
||||
void randomx_program_loop_load();
|
||||
void randomx_program_loop_load_xop();
|
||||
|
|
|
@ -28,15 +28,15 @@
|
|||
#define APP_ID "xmrig"
|
||||
#define APP_NAME "XMRig"
|
||||
#define APP_DESC "XMRig miner"
|
||||
#define APP_VERSION "6.11.2"
|
||||
#define APP_VERSION "6.12.0-dev"
|
||||
#define APP_DOMAIN "xmrig.com"
|
||||
#define APP_SITE "www.xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
|
||||
#define APP_KIND "miner"
|
||||
|
||||
#define APP_VER_MAJOR 6
|
||||
#define APP_VER_MINOR 11
|
||||
#define APP_VER_PATCH 2
|
||||
#define APP_VER_MINOR 12
|
||||
#define APP_VER_PATCH 0
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if (_MSC_VER >= 1920)
|
||||
|
|
Loading…
Reference in a new issue