mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-25 20:16:05 +00:00
Merge branch 'dev'
This commit is contained in:
commit
dabafaaadb
15 changed files with 224 additions and 112 deletions
|
@ -1,3 +1,7 @@
|
|||
# v6.8.2
|
||||
- [#2080](https://github.com/xmrig/xmrig/pull/2080) Fixed compile error in Termux.
|
||||
- [#2089](https://github.com/xmrig/xmrig/pull/2089) Optimized CryptoNight-Heavy for Zen3, 7-8% speedup.
|
||||
|
||||
# v6.8.1
|
||||
- [#2064](https://github.com/xmrig/xmrig/pull/2064) Added documentation for config.json CPU options.
|
||||
- [#2066](https://github.com/xmrig/xmrig/issues/2066) Fixed AMD GPUs health data readings on Linux.
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
6bb1a2e3a0fbca5195be6022f2a9fbff8a353c37c7542e7ab89420cb45b64505 xmrig-5.0.1-gcc-win32.zip
|
||||
24dba9ec281acfb2ea2c401ebd0e4e2d1f1ee5fd557da5ff3c7049020c1f78b6 xmrig-5.0.1-gcc-win64.zip
|
||||
86d65c6693ec9e35cd7547329580638b85c9eb0cf8383892a1c15199de5b556f xmrig-5.0.1-msvc-cuda10_1-win64.zip
|
||||
0fbfe518b1c4b6993b0f66ff01302626375b15620ccf8f64d6fb97845068ffca xmrig-5.0.1-msvc-win64.zip
|
||||
aa34890738a3494de2fa0e44db346937fea7339852f5f10b5d4655f95e2d8f1f xmrig-5.0.1-xenial-x64.tar.gz
|
|
@ -1,11 +0,0 @@
|
|||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iQEzBAABCgAdFiEEmsTOqOZuNaXHzdwbRGpTY4vpRAkFAl3VcsoACgkQRGpTY4vp
|
||||
RAm9vQgA1MyTUU2jley2TCYLUzQy2Fffc8fbXYv64r44jbWOjC/6qo2iIlRgPhIc
|
||||
oVyPKr5TYS3QjDzCEm8IvozS0YudS6soESbPzqDonboK8pd0K4bsML9TQY2feV7A
|
||||
NL5vln0rfVHp1wxLLrQpfBqAgvJUXEyaHece6gFQN79JOGhEo2bHL2NyrOl+FViS
|
||||
b2BaMtXq410Fh+XT6ShnOaG/2EuO8ZqSGdCO6A/2LHQw1UY+mZiCvue6P6B06HmB
|
||||
WD/urOv38V389v+V+Sp4UlEW6VpBOOjvtChoVWtLt+tKzydrnt2EmoWWWg475pka
|
||||
4G6whHuMWS8CTt5/PDhJpvVXNQTIOw==
|
||||
=C764
|
||||
-----END PGP SIGNATURE-----
|
|
@ -81,6 +81,7 @@ public:
|
|||
|
||||
inline void start(const std::vector<CpuLaunchData> &threads, size_t memory)
|
||||
{
|
||||
m_workersMemory.clear();
|
||||
m_hugePages.reset();
|
||||
m_memory = memory;
|
||||
m_started = 0;
|
||||
|
@ -95,7 +96,9 @@ public:
|
|||
if (ready) {
|
||||
m_started++;
|
||||
|
||||
if (m_workersMemory.insert(worker->memory()).second) {
|
||||
m_hugePages += worker->memory()->hugePages();
|
||||
}
|
||||
m_ways += worker->intensity();
|
||||
}
|
||||
else {
|
||||
|
@ -126,6 +129,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
std::set<const VirtualMemory*> m_workersMemory;
|
||||
HugePagesInfo m_hugePages;
|
||||
size_t m_errors = 0;
|
||||
size_t m_memory = 0;
|
||||
|
|
|
@ -103,12 +103,16 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
|
|||
|
||||
size_t xmrig::CpuConfig::memPoolSize() const
|
||||
{
|
||||
return m_memoryPool < 0 ? Cpu::info()->threads() : m_memoryPool;
|
||||
return m_memoryPool < 0 ? std::max(Cpu::info()->threads(), Cpu::info()->L3() >> 21) : m_memoryPool;
|
||||
}
|
||||
|
||||
|
||||
std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, const Algorithm &algorithm) const
|
||||
{
|
||||
if (algorithm.family() == Algorithm::KAWPOW) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<CpuLaunchData> out;
|
||||
const auto &threads = m_threads.get(algorithm);
|
||||
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "backend/cpu/CpuWorker.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "core/config/Config.h"
|
||||
|
@ -55,6 +57,12 @@ namespace xmrig {
|
|||
|
||||
static constexpr uint32_t kReserveCount = 32768;
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_HEAVY
|
||||
static std::mutex cn_heavyZen3MemoryMutex;
|
||||
VirtualMemory* cn_heavyZen3Memory = nullptr;
|
||||
#endif
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
|
@ -72,9 +80,22 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
|||
m_miner(data.miner),
|
||||
m_threads(data.threads),
|
||||
m_ctx()
|
||||
{
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
|
||||
std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
|
||||
if (!cn_heavyZen3Memory) {
|
||||
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * m_threads, data.hugePages, false, false, node());
|
||||
}
|
||||
m_memory = cn_heavyZen3Memory;
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<size_t N>
|
||||
|
@ -85,8 +106,14 @@ xmrig::CpuWorker<N>::~CpuWorker()
|
|||
# endif
|
||||
|
||||
CnCtx::release(m_ctx, N);
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
if (m_memory != cn_heavyZen3Memory)
|
||||
# endif
|
||||
{
|
||||
delete m_memory;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_RANDOMX
|
||||
|
@ -387,7 +414,16 @@ template<size_t N>
|
|||
void xmrig::CpuWorker<N>::allocateCnCtx()
|
||||
{
|
||||
if (m_ctx[0] == nullptr) {
|
||||
CnCtx::create(m_ctx, m_memory->scratchpad(), m_algorithm.l3(), N);
|
||||
int shift = 0;
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if (m_memory == cn_heavyZen3Memory) {
|
||||
shift = (id() / 8) * m_algorithm.l3() * 8 + (id() % 8) * 64;
|
||||
}
|
||||
# endif
|
||||
|
||||
CnCtx::create(m_ctx, m_memory->scratchpad() + shift, m_algorithm.l3(), N);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -363,10 +363,14 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
return;
|
||||
}
|
||||
|
||||
std::vector<std::pair<int64_t, int32_t>> threads_data;
|
||||
threads_data.reserve(cores.size());
|
||||
|
||||
size_t pu_id = 0;
|
||||
while (cacheHashes > 0 && PUs > 0) {
|
||||
bool allocated_pu = false;
|
||||
|
||||
threads_data.clear();
|
||||
for (hwloc_obj_t core : cores) {
|
||||
const std::vector<hwloc_obj_t> units = findByType(core, HWLOC_OBJ_PU);
|
||||
if (units.size() <= pu_id) {
|
||||
|
@ -377,18 +381,31 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
PUs--;
|
||||
|
||||
allocated_pu = true;
|
||||
threads.add(units[pu_id]->os_index, intensity);
|
||||
threads_data.emplace_back(units[pu_id]->os_index, intensity);
|
||||
|
||||
if (cacheHashes == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Reversing of "threads_data" and "cores" is done to fill in virtual cores starting from the last one, but still in order
|
||||
// For example, cn-heavy threads on 6-core Zen2/Zen3 will have affinity [0,2,4,6,8,10,9,11]
|
||||
// This is important for Zen3 cn-heavy optimization
|
||||
|
||||
if (pu_id & 1) {
|
||||
std::reverse(threads_data.begin(), threads_data.end());
|
||||
}
|
||||
|
||||
for (const auto& t : threads_data) {
|
||||
threads.add(t.first, t.second);
|
||||
}
|
||||
|
||||
if (!allocated_pu) {
|
||||
break;
|
||||
}
|
||||
|
||||
pu_id++;
|
||||
std::reverse(cores.begin(), cores.end());
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
|
|
@ -37,9 +37,9 @@
|
|||
"asm": true,
|
||||
"argon2-impl": null,
|
||||
"astrobwt-max-size": 550,
|
||||
"astrobwt-avx2": false,
|
||||
"cn/0": false,
|
||||
"cn-lite/0": false,
|
||||
"kawpow": false
|
||||
"cn-lite/0": false
|
||||
},
|
||||
"opencl": {
|
||||
"enabled": false,
|
||||
|
|
|
@ -67,9 +67,9 @@ R"===(
|
|||
"asm": true,
|
||||
"argon2-impl": null,
|
||||
"astrobwt-max-size": 550,
|
||||
"astrobwt-avx2": false,
|
||||
"cn/0": false,
|
||||
"cn-lite/0": false,
|
||||
"kawpow": false
|
||||
"cn-lite/0": false
|
||||
},
|
||||
"opencl": {
|
||||
"enabled": false,
|
||||
|
|
|
@ -49,8 +49,8 @@
|
|||
|
||||
|
||||
#define ADD_FN(algo) \
|
||||
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \
|
||||
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \
|
||||
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false, 0>; \
|
||||
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true, 0>; \
|
||||
m_map[algo][AV_DOUBLE][Assembly::NONE] = cryptonight_double_hash<algo, false>; \
|
||||
m_map[algo][AV_DOUBLE_SOFT][Assembly::NONE] = cryptonight_double_hash<algo, true>; \
|
||||
m_map[algo][AV_TRIPLE][Assembly::NONE] = cryptonight_triple_hash<algo, false>; \
|
||||
|
@ -298,6 +298,22 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((av == AV_SINGLE) && (xmrig::Cpu::info()->arch() == xmrig::ICpuInfo::ARCH_ZEN3)) {
|
||||
switch (algorithm.id()) {
|
||||
case xmrig::Algorithm::CN_HEAVY_0:
|
||||
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_0, false, 3>;
|
||||
case xmrig::Algorithm::CN_HEAVY_TUBE:
|
||||
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_TUBE, false, 3>;
|
||||
case xmrig::Algorithm::CN_HEAVY_XHV:
|
||||
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_XHV, false, 3>;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
cn_hash_fun fun = cnHash.m_map[algorithm][av][Cpu::assembly(assembly)];
|
||||
if (fun) {
|
||||
|
|
|
@ -431,7 +431,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
|||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
|
|
@ -306,7 +306,21 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<int interleave>
|
||||
static inline constexpr uint64_t interleaved_index(uint64_t k)
|
||||
{
|
||||
return ((k & ~63ULL) << interleave) | (k & 63);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
inline constexpr uint64_t interleaved_index<0>(uint64_t k)
|
||||
{
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -343,6 +357,11 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
if (interleave > 0) {
|
||||
_mm_prefetch((const char*)(output), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
@ -354,19 +373,21 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
output += (64 << interleave) / sizeof(__m128i);
|
||||
_mm_store_si128(output + 0, xin4);
|
||||
_mm_store_si128(output + 1, xin5);
|
||||
_mm_store_si128(output + 2, xin6);
|
||||
_mm_store_si128(output + 3, xin7);
|
||||
output += (64 << interleave) / sizeof(__m128i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -387,15 +408,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
const __m128i* input_begin = input;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
i += 8;
|
||||
|
||||
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -414,15 +445,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
}
|
||||
|
||||
if (IS_HEAVY) {
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
input = input_begin;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
i += 8;
|
||||
|
||||
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -558,7 +599,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
|||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -577,7 +618,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
|
||||
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint8_t *l0 = ctx[0]->memory;
|
||||
|
@ -620,7 +661,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
}
|
||||
|
@ -632,12 +673,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
else if (SOFT_AES) {
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx = soft_aesenc(&l0[interleaved_index<interleave>(idx0 & MASK)], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -645,16 +686,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||
} else {
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[idx0 & MASK]), _mm_xor_si128(bx0, cx));
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
|
||||
}
|
||||
|
||||
idx0 = static_cast<uint64_t>(_mm_cvtsi128_si64(cx));
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[0];
|
||||
ch = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[1];
|
||||
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
|
||||
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isR()) {
|
||||
|
@ -681,14 +722,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[0] = al0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
|
||||
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
} else if (BASE == Algorithm::CN_1) {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
|
||||
} else {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
|
||||
}
|
||||
|
||||
al0 ^= cl;
|
||||
|
@ -697,11 +738,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
if (props.isHeavy()) {
|
||||
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
|
||||
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
|
||||
int64_t n = ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0];
|
||||
int32_t d = ((int32_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[2];
|
||||
int64_t q = n / (d | 0x5);
|
||||
|
||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
||||
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
|
||||
|
||||
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||
d = ~d;
|
||||
|
@ -722,7 +763,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -810,7 +851,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
|
@ -887,7 +928,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -909,8 +950,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
|
@ -939,8 +980,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
@ -991,8 +1032,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
|
@ -1187,8 +1228,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
@ -1333,7 +1374,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1378,7 +1419,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1407,7 +1448,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1460,7 +1501,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1489,7 +1530,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1550,7 +1591,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
|
|
@ -65,18 +65,13 @@
|
|||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_LINUX
|
||||
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(__FreeBSD__))
|
||||
static inline int hugePagesFlag(size_t size)
|
||||
{
|
||||
return (static_cast<int>(log2(size)) & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
bool xmrig::VirtualMemory::isHugepagesAvailable()
|
||||
{
|
||||
|
@ -165,7 +160,7 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
|
|||
|
||||
void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
|
||||
{
|
||||
# if defined(__APPLE__)
|
||||
# if defined(XMRIG_OS_APPLE)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
# elif defined(__FreeBSD__)
|
||||
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
|
||||
|
|
|
@ -88,7 +88,12 @@ void xmrig::Rx::init(IRxListener *listener)
|
|||
template<typename T>
|
||||
bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu)
|
||||
{
|
||||
if (seed.algorithm().family() != Algorithm::RANDOM_X) {
|
||||
const Algorithm::Family f = seed.algorithm().family();
|
||||
if ((f != Algorithm::RANDOM_X)
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
&& (f != Algorithm::CN_HEAVY)
|
||||
# endif
|
||||
) {
|
||||
# ifdef XMRIG_FEATURE_MSR
|
||||
RxMsr::destroy();
|
||||
# endif
|
||||
|
@ -96,16 +101,22 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
|||
return true;
|
||||
}
|
||||
|
||||
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
|
||||
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
|
||||
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
|
||||
|
||||
# ifdef XMRIG_FEATURE_MSR
|
||||
if (!RxMsr::isInitialized()) {
|
||||
RxMsr::init(config, cpu.threads().get(seed.algorithm()).data());
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
if (f == Algorithm::CN_HEAVY) {
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
|
||||
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
|
||||
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
|
||||
|
||||
if (!osInitialized) {
|
||||
# ifdef XMRIG_FIX_RYZEN
|
||||
RxFix::setupMainLoopExceptionFrame();
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#define APP_ID "xmrig"
|
||||
#define APP_NAME "XMRig"
|
||||
#define APP_DESC "XMRig miner"
|
||||
#define APP_VERSION "6.8.1"
|
||||
#define APP_VERSION "6.8.2-dev"
|
||||
#define APP_DOMAIN "xmrig.com"
|
||||
#define APP_SITE "www.xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
|
||||
|
@ -36,7 +36,7 @@
|
|||
|
||||
#define APP_VER_MAJOR 6
|
||||
#define APP_VER_MINOR 8
|
||||
#define APP_VER_PATCH 1
|
||||
#define APP_VER_PATCH 2
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if (_MSC_VER >= 1920)
|
||||
|
|
Loading…
Reference in a new issue