mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-22 10:45:06 +00:00
Merge branch 'dev'
This commit is contained in:
commit
cb0bba7e10
51 changed files with 1184 additions and 515 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -1,3 +1,14 @@
|
||||||
|
# v6.3.4
|
||||||
|
- [#1823](https://github.com/xmrig/xmrig/pull/1823) RandomX: added new option `scratchpad_prefetch_mode`.
|
||||||
|
- [#1827](https://github.com/xmrig/xmrig/pull/1827) [#1831](https://github.com/xmrig/xmrig/pull/1831) Improved nonce iteration performance.
|
||||||
|
- [#1828](https://github.com/xmrig/xmrig/pull/1828) RandomX: added SSE4.1-optimized Blake2b.
|
||||||
|
- [#1830](https://github.com/xmrig/xmrig/pull/1830) RandomX: added performance profiler (for developers).
|
||||||
|
- [#1835](https://github.com/xmrig/xmrig/pull/1835) RandomX: returned old soft AES implementation and added auto-select between the two.
|
||||||
|
- [#1840](https://github.com/xmrig/xmrig/pull/1840) RandomX: moved more stuff to compile time, small x86 JIT compiler speedup.
|
||||||
|
- [#1841](https://github.com/xmrig/xmrig/pull/1841) Fixed Cryptonight OpenCL for AMD 20.7.2 drivers.
|
||||||
|
- [#1842](https://github.com/xmrig/xmrig/pull/1842) RandomX: AES improvements, a bit faster hardware AES code when compiled with MSVC.
|
||||||
|
- [#1843](https://github.com/xmrig/xmrig/pull/1843) RandomX: improved performance of GCC compiled binaries.
|
||||||
|
|
||||||
# v6.3.3
|
# v6.3.3
|
||||||
- [#1817](https://github.com/xmrig/xmrig/pull/1817) Fixed self-select login sequence.
|
- [#1817](https://github.com/xmrig/xmrig/pull/1817) Fixed self-select login sequence.
|
||||||
- Added brand new [build from source](https://xmrig.com/docs/miner/build) documentation.
|
- Added brand new [build from source](https://xmrig.com/docs/miner/build) documentation.
|
||||||
|
|
|
@ -23,6 +23,7 @@ option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (on
|
||||||
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
|
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
|
||||||
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||||
|
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||||
|
|
||||||
option(BUILD_STATIC "Build static binary" OFF)
|
option(BUILD_STATIC "Build static binary" OFF)
|
||||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||||
|
|
|
@ -29,8 +29,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
|
||||||
|
|
||||||
add_definitions(/DHAVE_ROTR)
|
add_definitions(/DHAVE_ROTR)
|
||||||
endif()
|
endif()
|
||||||
|
@ -87,8 +87,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
|
||||||
|
|
||||||
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
||||||
if (HAVE_ROTR)
|
if (HAVE_ROTR)
|
||||||
|
|
|
@ -66,14 +66,12 @@ public:
|
||||||
|
|
||||||
inline bool nextRound(uint32_t rounds, uint32_t roundSize)
|
inline bool nextRound(uint32_t rounds, uint32_t roundSize)
|
||||||
{
|
{
|
||||||
bool ok = true;
|
|
||||||
m_rounds[index()]++;
|
m_rounds[index()]++;
|
||||||
|
|
||||||
if ((m_rounds[index()] % rounds) == 0) {
|
if ((m_rounds[index()] % rounds) == 0) {
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
*nonce(i) = Nonce::next(index(), *nonce(i), rounds * roundSize, currentJob().isNicehash(), &ok);
|
if (!Nonce::next(index(), nonce(i), rounds * roundSize, nonceMask())) {
|
||||||
if (!ok) {
|
return false;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -83,13 +81,14 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ok;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
inline int32_t nonceOffset() const { return currentJob().nonceOffset(); }
|
inline int32_t nonceOffset() const { return currentJob().nonceOffset(); }
|
||||||
inline size_t nonceSize() const { return currentJob().nonceSize(); }
|
inline size_t nonceSize() const { return currentJob().nonceSize(); }
|
||||||
|
inline uint64_t nonceMask() const { return m_nonce_mask[index()]; }
|
||||||
|
|
||||||
inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend)
|
inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend)
|
||||||
{
|
{
|
||||||
|
@ -97,12 +96,13 @@ private:
|
||||||
const size_t size = job.size();
|
const size_t size = job.size();
|
||||||
m_jobs[index()] = job;
|
m_jobs[index()] = job;
|
||||||
m_rounds[index()] = 0;
|
m_rounds[index()] = 0;
|
||||||
|
m_nonce_mask[index()] = job.nonceMask();
|
||||||
|
|
||||||
m_jobs[index()].setBackend(backend);
|
m_jobs[index()].setBackend(backend);
|
||||||
|
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
memcpy(m_blobs[index()] + (i * size), job.blob(), size);
|
memcpy(m_blobs[index()] + (i * size), job.blob(), size);
|
||||||
*nonce(i) = Nonce::next(index(), *nonce(i), reserveCount, job.isNicehash());
|
Nonce::next(index(), nonce(i), reserveCount, nonceMask());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +110,7 @@ private:
|
||||||
alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{};
|
alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{};
|
||||||
Job m_jobs[2];
|
Job m_jobs[2];
|
||||||
uint32_t m_rounds[2] = { 0, 0 };
|
uint32_t m_rounds[2] = { 0, 0 };
|
||||||
|
uint64_t m_nonce_mask[2];
|
||||||
uint64_t m_sequence = 0;
|
uint64_t m_sequence = 0;
|
||||||
uint8_t m_index = 0;
|
uint8_t m_index = 0;
|
||||||
};
|
};
|
||||||
|
@ -125,41 +126,23 @@ inline uint32_t *xmrig::WorkerJob<1>::nonce(size_t)
|
||||||
template<>
|
template<>
|
||||||
inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
|
inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
|
||||||
{
|
{
|
||||||
bool ok = true;
|
|
||||||
m_rounds[index()]++;
|
m_rounds[index()]++;
|
||||||
|
|
||||||
uint32_t* n = nonce();
|
uint32_t* n = nonce();
|
||||||
const uint32_t prev_nonce = *n;
|
|
||||||
|
|
||||||
if ((m_rounds[index()] % rounds) == 0) {
|
if ((m_rounds[index()] % rounds) == 0) {
|
||||||
*n = Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
|
if (!Nonce::next(index(), n, rounds * roundSize, nonceMask())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (nonceSize() == sizeof(uint64_t)) {
|
||||||
|
m_jobs[index()].nonce()[1] = n[1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
*n += roundSize;
|
*n += roundSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Increment higher 32 bits of a 64-bit nonce when lower 32 bits overflow
|
return true;
|
||||||
if (!currentJob().isNicehash() && (nonceSize() == sizeof(uint64_t))) {
|
|
||||||
const bool wrapped = (*n < prev_nonce);
|
|
||||||
const bool wraps_this_round = (static_cast<uint64_t>(*n) + roundSize > (1ULL << 32));
|
|
||||||
|
|
||||||
// Account for the case when starting nonce hasn't wrapped yet, but some nonces in the current round will wrap
|
|
||||||
if (wrapped || wraps_this_round) {
|
|
||||||
// Set lower 32 bits to 0 when higher 32 bits change
|
|
||||||
Nonce::reset(index());
|
|
||||||
|
|
||||||
// Sets *n to 0 and Nonce::m_nonce[index] to the correct next value
|
|
||||||
*n = 0;
|
|
||||||
Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
|
|
||||||
|
|
||||||
++n[1];
|
|
||||||
|
|
||||||
Job& job = m_jobs[index()];
|
|
||||||
memcpy(job.blob(), blob(), job.size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ok;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,11 +152,12 @@ inline void xmrig::WorkerJob<1>::save(const Job &job, uint32_t reserveCount, Non
|
||||||
m_index = job.index();
|
m_index = job.index();
|
||||||
m_jobs[index()] = job;
|
m_jobs[index()] = job;
|
||||||
m_rounds[index()] = 0;
|
m_rounds[index()] = 0;
|
||||||
|
m_nonce_mask[index()] = job.nonceMask();
|
||||||
|
|
||||||
m_jobs[index()].setBackend(backend);
|
m_jobs[index()].setBackend(backend);
|
||||||
|
|
||||||
memcpy(blob(), job.blob(), job.size());
|
memcpy(blob(), job.blob(), job.size());
|
||||||
*nonce() = Nonce::next(index(), *nonce(), reserveCount, currentJob().isNicehash());
|
Nonce::next(index(), nonce(), reserveCount, nonceMask());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,7 @@ public:
|
||||||
FLAG_PDPE1GB,
|
FLAG_PDPE1GB,
|
||||||
FLAG_SSE2,
|
FLAG_SSE2,
|
||||||
FLAG_SSSE3,
|
FLAG_SSSE3,
|
||||||
|
FLAG_SSE41,
|
||||||
FLAG_XOP,
|
FLAG_XOP,
|
||||||
FLAG_POPCNT,
|
FLAG_POPCNT,
|
||||||
FLAG_CAT_L3,
|
FLAG_CAT_L3,
|
||||||
|
@ -97,6 +98,7 @@ public:
|
||||||
virtual size_t packages() const = 0;
|
virtual size_t packages() const = 0;
|
||||||
virtual size_t threads() const = 0;
|
virtual size_t threads() const = 0;
|
||||||
virtual Vendor vendor() const = 0;
|
virtual Vendor vendor() const = 0;
|
||||||
|
virtual bool jccErratum() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
|
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3" };
|
||||||
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
||||||
|
|
||||||
|
|
||||||
|
@ -141,6 +141,7 @@ static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES,
|
||||||
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
|
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
|
||||||
static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); }
|
static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); }
|
||||||
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
||||||
|
static inline bool has_sse41() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 19); }
|
||||||
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
||||||
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
||||||
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
|
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
|
||||||
|
@ -177,6 +178,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
m_flags.set(FLAG_PDPE1GB, has_pdpe1gb());
|
m_flags.set(FLAG_PDPE1GB, has_pdpe1gb());
|
||||||
m_flags.set(FLAG_SSE2, has_sse2());
|
m_flags.set(FLAG_SSE2, has_sse2());
|
||||||
m_flags.set(FLAG_SSSE3, has_ssse3());
|
m_flags.set(FLAG_SSSE3, has_ssse3());
|
||||||
|
m_flags.set(FLAG_SSE41, has_sse41());
|
||||||
m_flags.set(FLAG_XOP, has_xop());
|
m_flags.set(FLAG_XOP, has_xop());
|
||||||
m_flags.set(FLAG_POPCNT, has_popcnt());
|
m_flags.set(FLAG_POPCNT, has_popcnt());
|
||||||
m_flags.set(FLAG_CAT_L3, has_cat_l3());
|
m_flags.set(FLAG_CAT_L3, has_cat_l3());
|
||||||
|
@ -210,6 +212,37 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||||
m_vendor = VENDOR_INTEL;
|
m_vendor = VENDOR_INTEL;
|
||||||
m_assembly = Assembly::INTEL;
|
m_assembly = Assembly::INTEL;
|
||||||
m_msrMod = MSR_MOD_INTEL;
|
m_msrMod = MSR_MOD_INTEL;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned int stepping : 4;
|
||||||
|
unsigned int model : 4;
|
||||||
|
unsigned int family : 4;
|
||||||
|
unsigned int processor_type : 2;
|
||||||
|
unsigned int reserved1 : 2;
|
||||||
|
unsigned int ext_model : 4;
|
||||||
|
unsigned int ext_family : 8;
|
||||||
|
unsigned int reserved2 : 4;
|
||||||
|
} processor_info;
|
||||||
|
|
||||||
|
cpuid(1, data);
|
||||||
|
memcpy(&processor_info, data, sizeof(processor_info));
|
||||||
|
|
||||||
|
// Intel JCC erratum mitigation
|
||||||
|
if (processor_info.family == 6) {
|
||||||
|
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
||||||
|
const uint32_t stepping = processor_info.stepping;
|
||||||
|
|
||||||
|
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||||
|
m_jccErratum =
|
||||||
|
((model == 0x4E) && (stepping == 0x3)) ||
|
||||||
|
((model == 0x55) && (stepping == 0x4)) ||
|
||||||
|
((model == 0x5E) && (stepping == 0x3)) ||
|
||||||
|
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||||
|
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||||
|
((model == 0xA6) && (stepping == 0x0)) ||
|
||||||
|
((model == 0xAE) && (stepping == 0xA));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
|
@ -61,11 +61,13 @@ protected:
|
||||||
inline size_t packages() const override { return 1; }
|
inline size_t packages() const override { return 1; }
|
||||||
inline size_t threads() const override { return m_threads; }
|
inline size_t threads() const override { return m_threads; }
|
||||||
inline Vendor vendor() const override { return m_vendor; }
|
inline Vendor vendor() const override { return m_vendor; }
|
||||||
|
inline bool jccErratum() const override { return m_jccErratum; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
char m_brand[64 + 6]{};
|
char m_brand[64 + 6]{};
|
||||||
size_t m_threads;
|
size_t m_threads;
|
||||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||||
|
bool m_jccErratum = false;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Assembly m_assembly = Assembly::NONE;
|
Assembly m_assembly = Assembly::NONE;
|
||||||
|
|
|
@ -899,7 +899,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
|
||||||
|
|
||||||
((uint8 *)h)[0] = vload8(0U, c_IV256);
|
((uint8 *)h)[0] = vload8(0U, c_IV256);
|
||||||
|
|
||||||
for (uint i = 0; i < 3; ++i) {
|
for (volatile uint i = 0; i < 3; ++i) {
|
||||||
((uint16 *)m)[0] = vload16(i, (__global uint *)states);
|
((uint16 *)m)[0] = vload16(i, (__global uint *)states);
|
||||||
for (uint x = 0; x < 16; ++x) {
|
for (uint x = 0; x < 16; ++x) {
|
||||||
m[x] = SWAP4(m[x]);
|
m[x] = SWAP4(m[x]);
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
static const char cryptonight_cl[60954] = {
|
static const char cryptonight_cl[60963] = {
|
||||||
0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
|
0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
|
||||||
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
|
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
|
||||||
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
|
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
|
||||||
|
@ -1823,91 +1823,92 @@ static const char cryptonight_cl[60954] = {
|
||||||
0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x6d,0x5b,
|
0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x6d,0x5b,
|
||||||
0x31,0x36,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x76,0x5b,0x31,0x36,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x68,0x5b,
|
0x31,0x36,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x76,0x5b,0x31,0x36,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x68,0x5b,
|
||||||
0x38,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3d,0x30,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,
|
0x38,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3d,0x30,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,
|
||||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,
|
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x76,0x6f,0x6c,0x61,
|
||||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b,
|
0x74,0x69,0x6c,0x65,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,
|
||||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,
|
0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||||
0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29,
|
0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,
|
||||||
0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35,
|
0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,
|
||||||
0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,
|
0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35,0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,
|
||||||
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,
|
0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,
|
||||||
0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,
|
0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,
|
||||||
0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,
|
0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,
|
||||||
0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,
|
0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,
|
||||||
0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,
|
0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,
|
||||||
0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,
|
0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,
|
||||||
0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,
|
0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,
|
||||||
0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,
|
0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,
|
||||||
0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,
|
0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,
|
||||||
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,
|
0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,
|
||||||
0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,
|
0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,
|
||||||
0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57,
|
0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,
|
||||||
0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39,
|
0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,
|
||||||
0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,
|
0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,
|
||||||
0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30,
|
0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,
|
||||||
0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,
|
0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,
|
||||||
0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30,
|
0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,
|
||||||
0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d,
|
0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,
|
||||||
0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28,
|
0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d,0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,
|
||||||
0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,
|
0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,
|
||||||
0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,
|
0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,
|
||||||
0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,
|
0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,
|
||||||
0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,
|
0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,
|
||||||
0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,
|
0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,
|
||||||
0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,
|
0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,
|
||||||
0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,
|
0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,
|
||||||
0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,
|
0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,
|
||||||
0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,
|
0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,
|
||||||
0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,
|
0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,
|
||||||
0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,
|
0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,
|
||||||
0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,
|
0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
|
||||||
0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69,
|
0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,
|
||||||
0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c,
|
0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,
|
||||||
0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,
|
0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,
|
||||||
0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,
|
0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,
|
||||||
0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,
|
0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,
|
||||||
0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,
|
0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,
|
||||||
0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,
|
0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,
|
||||||
0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c,
|
0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,
|
||||||
0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,
|
||||||
0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,
|
0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,
|
||||||
0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67,
|
0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,
|
||||||
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,
|
0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,
|
||||||
0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73,
|
0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,
|
||||||
0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,
|
0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73,0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,
|
||||||
0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,
|
0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
|
||||||
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75,
|
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||||
0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,
|
0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,
|
||||||
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,
|
0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,
|
||||||
0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,
|
0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,
|
||||||
0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,
|
0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,
|
||||||
0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,
|
0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,
|
||||||
0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,
|
0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,
|
||||||
0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,
|
0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,
|
||||||
0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,
|
0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,
|
||||||
0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,
|
0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,
|
||||||
0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,
|
0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,
|
||||||
0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,
|
0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
|
||||||
0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,
|
0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,
|
||||||
0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,
|
0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,
|
||||||
0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,
|
0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,
|
||||||
0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,
|
0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,
|
||||||
0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,
|
0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,
|
||||||
0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73,
|
0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,
|
||||||
0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c,
|
0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,
|
||||||
0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,
|
0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,
|
||||||
0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,
|
||||||
0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,
|
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,
|
||||||
0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,
|
0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,
|
||||||
0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d,
|
0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,
|
||||||
0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,
|
0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d,0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
|
||||||
0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,
|
0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,
|
||||||
0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,
|
0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,
|
||||||
0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70,
|
0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
|
||||||
0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,
|
0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,
|
||||||
0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,
|
0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,
|
||||||
0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,
|
0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,
|
||||||
0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,
|
0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,
|
||||||
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,
|
||||||
|
0x7d,0x0a,0x00
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xmrig
|
} // namespace xmrig
|
||||||
|
|
|
@ -222,3 +222,15 @@ if (WITH_KAWPOW)
|
||||||
src/base/net/stratum/EthStratumClient.cpp
|
src/base/net/stratum/EthStratumClient.cpp
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WITH_PROFILING)
|
||||||
|
add_definitions(/DXMRIG_FEATURE_PROFILING)
|
||||||
|
|
||||||
|
list(APPEND HEADERS_BASE
|
||||||
|
src/base/tools/Profiler.h
|
||||||
|
)
|
||||||
|
|
||||||
|
list(APPEND SOURCES_BASE
|
||||||
|
src/base/tools/Profiler.cpp
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
|
@ -101,3 +101,13 @@ const char *xmrig::Tags::opencl()
|
||||||
return tag;
|
return tag;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
const char* xmrig::Tags::profiler()
|
||||||
|
{
|
||||||
|
static const char* tag = CYAN_BG_BOLD(WHITE_BOLD_S " profile ");
|
||||||
|
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -53,6 +53,10 @@ public:
|
||||||
# ifdef XMRIG_FEATURE_OPENCL
|
# ifdef XMRIG_FEATURE_OPENCL
|
||||||
static const char *opencl();
|
static const char *opencl();
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
static const char* profiler();
|
||||||
|
# endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,7 @@ public:
|
||||||
inline uint32_t backend() const { return m_backend; }
|
inline uint32_t backend() const { return m_backend; }
|
||||||
inline uint64_t diff() const { return m_diff; }
|
inline uint64_t diff() const { return m_diff; }
|
||||||
inline uint64_t height() const { return m_height; }
|
inline uint64_t height() const { return m_height; }
|
||||||
|
inline uint64_t nonceMask() const { return isNicehash() ? 0xFFFFFFULL : (nonceSize() == sizeof(uint64_t) ? (-1ull >> (extraNonce().size() * 4)): 0xFFFFFFFFULL); }
|
||||||
inline uint64_t target() const { return m_target; }
|
inline uint64_t target() const { return m_target; }
|
||||||
inline uint8_t *blob() { return m_blob; }
|
inline uint8_t *blob() { return m_blob; }
|
||||||
inline uint8_t fixedByte() const { return *(m_blob + 42); }
|
inline uint8_t fixedByte() const { return *(m_blob + 42); }
|
||||||
|
|
101
src/base/tools/Profiler.cpp
Normal file
101
src/base/tools/Profiler.cpp
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
#include "base/io/log/Log.h"
|
||||||
|
#include "base/io/log/Tags.h"
|
||||||
|
#include <cstring>
|
||||||
|
#include <sstream>
|
||||||
|
#include <thread>
|
||||||
|
#include <chrono>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
|
||||||
|
|
||||||
|
ProfileScopeData* ProfileScopeData::s_data[MAX_DATA_COUNT] = {};
|
||||||
|
volatile long ProfileScopeData::s_dataCount = 0;
|
||||||
|
double ProfileScopeData::s_tscSpeed = 0.0;
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NOINLINE
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define NOINLINE __attribute__ ((noinline))
|
||||||
|
#elif _MSC_VER
|
||||||
|
#define NOINLINE __declspec(noinline)
|
||||||
|
#else
|
||||||
|
#define NOINLINE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static std::string get_thread_id()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << std::this_thread::get_id();
|
||||||
|
|
||||||
|
std::string s = ss.str();
|
||||||
|
if (s.length() > ProfileScopeData::MAX_THREAD_ID_LENGTH) {
|
||||||
|
s.resize(ProfileScopeData::MAX_THREAD_ID_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NOINLINE void ProfileScopeData::Register(ProfileScopeData* data)
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
const long id = _InterlockedIncrement(&s_dataCount) - 1;
|
||||||
|
#else
|
||||||
|
const long id = __sync_fetch_and_add(&s_dataCount, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (static_cast<unsigned long>(id) < MAX_DATA_COUNT) {
|
||||||
|
s_data[id] = data;
|
||||||
|
|
||||||
|
const std::string s = get_thread_id();
|
||||||
|
memcpy(data->m_threadId, s.c_str(), s.length() + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NOINLINE void ProfileScopeData::Init()
|
||||||
|
{
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
|
const uint64_t t1 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||||
|
const uint64_t count1 = ReadTSC();
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
const uint64_t t2 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||||
|
const uint64_t count2 = ReadTSC();
|
||||||
|
|
||||||
|
if (t2 - t1 > 1000000000) {
|
||||||
|
s_tscSpeed = (count2 - count1) * 1e9 / (t2 - t1);
|
||||||
|
LOG_INFO("%s TSC speed = %.3f GHz", xmrig::Tags::profiler(), s_tscSpeed / 1e9);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* XMRIG_FEATURE_PROFILING */
|
133
src/base/tools/Profiler.h
Normal file
133
src/base/tools/Profiler.h
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XMRIG_PROFILER_H
|
||||||
|
#define XMRIG_PROFILER_H
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef FORCE_INLINE
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define FORCE_INLINE __forceinline
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define FORCE_INLINE __attribute__((always_inline)) inline
|
||||||
|
#elif defined(__clang__)
|
||||||
|
#define FORCE_INLINE __inline__
|
||||||
|
#else
|
||||||
|
#define FORCE_INLINE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static FORCE_INLINE uint64_t ReadTSC()
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
return __rdtsc();
|
||||||
|
#else
|
||||||
|
uint32_t hi, lo;
|
||||||
|
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||||
|
return (((uint64_t)hi) << 32) | lo;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct ProfileScopeData
|
||||||
|
{
|
||||||
|
const char* m_name;
|
||||||
|
uint64_t m_totalCycles;
|
||||||
|
uint32_t m_totalSamples;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
MAX_THREAD_ID_LENGTH = 11,
|
||||||
|
MAX_SAMPLE_COUNT = 128,
|
||||||
|
MAX_DATA_COUNT = 1024
|
||||||
|
};
|
||||||
|
|
||||||
|
char m_threadId[MAX_THREAD_ID_LENGTH + 1];
|
||||||
|
|
||||||
|
static ProfileScopeData* s_data[MAX_DATA_COUNT];
|
||||||
|
static volatile long s_dataCount;
|
||||||
|
static double s_tscSpeed;
|
||||||
|
|
||||||
|
static void Register(ProfileScopeData* data);
|
||||||
|
static void Init();
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(std::is_trivial<ProfileScopeData>::value, "ProfileScopeData must be a trivial struct");
|
||||||
|
static_assert(sizeof(ProfileScopeData) <= 32, "ProfileScopeData struct is too big");
|
||||||
|
|
||||||
|
|
||||||
|
class ProfileScope
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FORCE_INLINE ProfileScope(ProfileScopeData& data)
|
||||||
|
: m_data(data)
|
||||||
|
{
|
||||||
|
if (m_data.m_totalCycles == 0) {
|
||||||
|
ProfileScopeData::Register(&data);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_startCounter = ReadTSC();
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE ~ProfileScope()
|
||||||
|
{
|
||||||
|
m_data.m_totalCycles += ReadTSC() - m_startCounter;
|
||||||
|
++m_data.m_totalSamples;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ProfileScopeData& m_data;
|
||||||
|
uint64_t m_startCounter;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define PROFILE_SCOPE(x) static thread_local ProfileScopeData x##_data{#x}; ProfileScope x(x##_data);
|
||||||
|
|
||||||
|
|
||||||
|
#else /* XMRIG_FEATURE_PROFILING */
|
||||||
|
#define PROFILE_SCOPE(x)
|
||||||
|
#endif /* XMRIG_FEATURE_PROFILING */
|
||||||
|
|
||||||
|
|
||||||
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct rx_blake2b_wrapper
|
||||||
|
{
|
||||||
|
FORCE_INLINE static void run(void* out, size_t outlen, const void* in, size_t inlen)
|
||||||
|
{
|
||||||
|
PROFILE_SCOPE(RandomX_Blake2b);
|
||||||
|
rx_blake2b(out, outlen, in, inlen);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* XMRIG_PROFILER_H */
|
|
@ -21,7 +21,8 @@
|
||||||
"rdmsr": true,
|
"rdmsr": true,
|
||||||
"wrmsr": true,
|
"wrmsr": true,
|
||||||
"cache_qos": false,
|
"cache_qos": false,
|
||||||
"numa": true
|
"numa": true,
|
||||||
|
"scratchpad_prefetch_mode": 1
|
||||||
},
|
},
|
||||||
"cpu": {
|
"cpu": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#include "base/kernel/Platform.h"
|
#include "base/kernel/Platform.h"
|
||||||
#include "base/net/stratum/Job.h"
|
#include "base/net/stratum/Job.h"
|
||||||
#include "base/tools/Object.h"
|
#include "base/tools/Object.h"
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
#include "base/tools/Timer.h"
|
#include "base/tools/Timer.h"
|
||||||
#include "core/config/Config.h"
|
#include "core/config/Config.h"
|
||||||
#include "core/Controller.h"
|
#include "core/Controller.h"
|
||||||
|
@ -267,6 +268,44 @@ public:
|
||||||
h = "MH/s";
|
h = "MH/s";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
ProfileScopeData* data[ProfileScopeData::MAX_DATA_COUNT];
|
||||||
|
|
||||||
|
const uint32_t n = std::min<uint32_t>(ProfileScopeData::s_dataCount, ProfileScopeData::MAX_DATA_COUNT);
|
||||||
|
memcpy(data, ProfileScopeData::s_data, n * sizeof(ProfileScopeData*));
|
||||||
|
|
||||||
|
std::sort(data, data + n, [](ProfileScopeData* a, ProfileScopeData* b) {
|
||||||
|
return strcmp(a->m_threadId, b->m_threadId) < 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < n;)
|
||||||
|
{
|
||||||
|
uint32_t n1 = i;
|
||||||
|
while ((n1 < n) && (strcmp(data[i]->m_threadId, data[n1]->m_threadId) == 0)) {
|
||||||
|
++n1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(data + i, data + n1, [](ProfileScopeData* a, ProfileScopeData* b) {
|
||||||
|
return a->m_totalCycles > b->m_totalCycles;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (uint32_t j = i; j < n1; ++j) {
|
||||||
|
ProfileScopeData* p = data[j];
|
||||||
|
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
|
||||||
|
Tags::profiler(),
|
||||||
|
p->m_threadId,
|
||||||
|
p->m_name,
|
||||||
|
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
|
||||||
|
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
|
||||||
|
|
||||||
|
i = n1;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
|
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
|
||||||
Tags::miner(),
|
Tags::miner(),
|
||||||
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
|
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
|
||||||
|
@ -311,6 +350,10 @@ xmrig::Miner::Miner(Controller *controller)
|
||||||
Platform::setThreadPriority(std::min(priority + 1, 5));
|
Platform::setThreadPriority(std::min(priority + 1, 5));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifdef XMRIG_FEATURE_PROFILING
|
||||||
|
ProfileScopeData::Init();
|
||||||
|
# endif
|
||||||
|
|
||||||
# ifdef XMRIG_ALGO_RANDOMX
|
# ifdef XMRIG_ALGO_RANDOMX
|
||||||
Rx::init(this);
|
Rx::init(this);
|
||||||
# endif
|
# endif
|
||||||
|
|
|
@ -51,7 +51,12 @@ R"===(
|
||||||
"randomx": {
|
"randomx": {
|
||||||
"init": -1,
|
"init": -1,
|
||||||
"mode": "auto",
|
"mode": "auto",
|
||||||
"numa": true
|
"1gb-pages": false,
|
||||||
|
"rdmsr": true,
|
||||||
|
"wrmsr": true,
|
||||||
|
"cache_qos": false,
|
||||||
|
"numa": true,
|
||||||
|
"scratchpad_prefetch_mode": 1
|
||||||
},
|
},
|
||||||
"cpu": {
|
"cpu": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
|
|
|
@ -26,18 +26,14 @@
|
||||||
#include "crypto/common/Nonce.h"
|
#include "crypto/common/Nonce.h"
|
||||||
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
|
||||||
|
|
||||||
std::atomic<bool> Nonce::m_paused;
|
std::atomic<bool> Nonce::m_paused;
|
||||||
std::atomic<uint64_t> Nonce::m_sequence[Nonce::MAX];
|
std::atomic<uint64_t> Nonce::m_sequence[Nonce::MAX];
|
||||||
uint32_t Nonce::m_nonces[2] = { 0, 0 };
|
std::atomic<uint64_t> Nonce::m_nonces[2] = { {0}, {0} };
|
||||||
|
|
||||||
|
|
||||||
static std::mutex mutex;
|
|
||||||
static Nonce nonce;
|
static Nonce nonce;
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,40 +50,34 @@ xmrig::Nonce::Nonce()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t xmrig::Nonce::next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok)
|
bool xmrig::Nonce::next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask)
|
||||||
{
|
{
|
||||||
uint32_t next;
|
mask &= 0x7FFFFFFFFFFFFFFFULL;
|
||||||
|
if (reserveCount == 0 || mask < reserveCount - 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
uint64_t counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
|
||||||
|
while (true) {
|
||||||
if (nicehash) {
|
if (mask < counter) {
|
||||||
if ((m_nonces[index] + reserveCount) > 0x1000000) {
|
return false;
|
||||||
if (ok) {
|
|
||||||
*ok = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
pause(true);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
else if (mask - counter <= reserveCount - 1) {
|
||||||
next = (nonce & 0xFF000000) | m_nonces[index];
|
pause(true);
|
||||||
|
if (mask - counter < reserveCount - 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (0xFFFFFFFFUL - (uint32_t)counter < reserveCount - 1) {
|
||||||
|
counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
*nonce = (nonce[0] & ~mask) | counter;
|
||||||
|
if (mask > 0xFFFFFFFFULL) {
|
||||||
|
nonce[1] = (nonce[1] & (~mask >> 32)) | (counter >> 32);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
next = m_nonces[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
m_nonces[index] += reserveCount;
|
|
||||||
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void xmrig::Nonce::reset(uint8_t index)
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
|
||||||
|
|
||||||
m_nonces[index] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -49,18 +49,18 @@ public:
|
||||||
static inline bool isPaused() { return m_paused.load(std::memory_order_relaxed); }
|
static inline bool isPaused() { return m_paused.load(std::memory_order_relaxed); }
|
||||||
static inline uint64_t sequence(Backend backend) { return m_sequence[backend].load(std::memory_order_relaxed); }
|
static inline uint64_t sequence(Backend backend) { return m_sequence[backend].load(std::memory_order_relaxed); }
|
||||||
static inline void pause(bool paused) { m_paused = paused; }
|
static inline void pause(bool paused) { m_paused = paused; }
|
||||||
|
static inline void reset(uint8_t index) { m_nonces[index] = 0; }
|
||||||
static inline void stop(Backend backend) { m_sequence[backend] = 0; }
|
static inline void stop(Backend backend) { m_sequence[backend] = 0; }
|
||||||
static inline void touch(Backend backend) { m_sequence[backend]++; }
|
static inline void touch(Backend backend) { m_sequence[backend]++; }
|
||||||
|
|
||||||
static uint32_t next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok = nullptr);
|
static bool next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask);
|
||||||
static void reset(uint8_t index);
|
|
||||||
static void stop();
|
static void stop();
|
||||||
static void touch();
|
static void touch();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static std::atomic<bool> m_paused;
|
static std::atomic<bool> m_paused;
|
||||||
static std::atomic<uint64_t> m_sequence[MAX];
|
static std::atomic<uint64_t> m_sequence[MAX];
|
||||||
static uint32_t m_nonces[2];
|
static std::atomic<uint64_t> m_nonces[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/soft_aes.h"
|
||||||
#include "crypto/randomx/randomx.h"
|
#include "crypto/randomx/randomx.h"
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
|
||||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||||
|
@ -49,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||||
*/
|
*/
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||||
const uint8_t* inptr = (uint8_t*)input;
|
const uint8_t* inptr = (uint8_t*)input;
|
||||||
const uint8_t* inputEnd = inptr + inputSize;
|
const uint8_t* inputEnd = inptr + inputSize;
|
||||||
|
@ -117,7 +118,7 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
|
||||||
The modified state is written back to 'state' to allow multiple
|
The modified state is written back to 'state' to allow multiple
|
||||||
calls to this function.
|
calls to this function.
|
||||||
*/
|
*/
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
const uint8_t* outptr = (uint8_t*)buffer;
|
const uint8_t* outptr = (uint8_t*)buffer;
|
||||||
const uint8_t* outputEnd = outptr + outputSize;
|
const uint8_t* outputEnd = outptr + outputSize;
|
||||||
|
@ -158,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
const uint8_t* outptr = (uint8_t*)buffer;
|
const uint8_t* outptr = (uint8_t*)buffer;
|
||||||
const uint8_t* outputEnd = outptr + outputSize;
|
const uint8_t* outputEnd = outptr + outputSize;
|
||||||
|
@ -213,8 +214,10 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||||
|
PROFILE_SCOPE(RandomX_AES);
|
||||||
|
|
||||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||||
|
|
||||||
|
@ -241,38 +244,29 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
//process 64 bytes at a time in 4 lanes
|
//process 64 bytes at a time in 4 lanes
|
||||||
while (scratchpadPtr < scratchpadEnd) {
|
while (scratchpadPtr < scratchpadEnd) {
|
||||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
#define HASH_STATE(k) \
|
||||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \
|
||||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \
|
||||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \
|
||||||
|
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3));
|
||||||
|
|
||||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
#define FILL_STATE(k) \
|
||||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
fill_state0 = aesdec<softAes>(fill_state0, key0); \
|
||||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
fill_state1 = aesenc<softAes>(fill_state1, key1); \
|
||||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
fill_state2 = aesdec<softAes>(fill_state2, key2); \
|
||||||
|
fill_state3 = aesenc<softAes>(fill_state3, key3); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
||||||
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
||||||
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
HASH_STATE(0);
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
HASH_STATE(1);
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
FILL_STATE(0);
|
||||||
|
FILL_STATE(1);
|
||||||
|
|
||||||
rx_prefetch_t0(prefetchPtr);
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
|
|
||||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
|
|
||||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
|
|
||||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
|
|
||||||
|
|
||||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
|
||||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
|
||||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
|
||||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
|
||||||
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
|
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
|
|
||||||
|
|
||||||
rx_prefetch_t0(prefetchPtr + 64);
|
rx_prefetch_t0(prefetchPtr + 64);
|
||||||
|
|
||||||
scratchpadPtr += 128;
|
scratchpadPtr += 128;
|
||||||
|
@ -308,5 +302,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
|
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);
|
||||||
|
|
|
@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||||
|
|
|
@ -92,7 +92,7 @@ extern "C" {
|
||||||
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||||
|
|
||||||
/* Simple API */
|
/* Simple API */
|
||||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen);
|
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
|
||||||
|
|
||||||
/* Argon2 Team - Begin Code */
|
/* Argon2 Team - Begin Code */
|
||||||
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||||
|
|
123
src/crypto/randomx/blake2/blake2b-round.h
Normal file
123
src/crypto/randomx/blake2/blake2b-round.h
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
/*
|
||||||
|
BLAKE2 reference source code package - optimized C implementations
|
||||||
|
|
||||||
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
||||||
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
||||||
|
your option. The terms of these licenses can be found at:
|
||||||
|
|
||||||
|
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
||||||
|
- OpenSSL license : https://www.openssl.org/source/license.html
|
||||||
|
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
More information about the BLAKE2 hash function can be found at
|
||||||
|
https://blake2.net.
|
||||||
|
*/
|
||||||
|
#ifndef BLAKE2B_ROUND_H
|
||||||
|
#define BLAKE2B_ROUND_H
|
||||||
|
|
||||||
|
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
|
||||||
|
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
||||||
|
|
||||||
|
#define TOF(reg) _mm_castsi128_ps((reg))
|
||||||
|
#define TOI(reg) _mm_castps_si128((reg))
|
||||||
|
|
||||||
|
#define LIKELY(x) __builtin_expect((x),1)
|
||||||
|
|
||||||
|
|
||||||
|
/* Microarchitecture-specific macros */
|
||||||
|
#define _mm_roti_epi64(x, c) \
|
||||||
|
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
||||||
|
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
||||||
|
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
||||||
|
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
||||||
|
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||||
|
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||||
|
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||||
|
\
|
||||||
|
row4l = _mm_xor_si128(row4l, row1l); \
|
||||||
|
row4h = _mm_xor_si128(row4h, row1h); \
|
||||||
|
\
|
||||||
|
row4l = _mm_roti_epi64(row4l, -32); \
|
||||||
|
row4h = _mm_roti_epi64(row4h, -32); \
|
||||||
|
\
|
||||||
|
row3l = _mm_add_epi64(row3l, row4l); \
|
||||||
|
row3h = _mm_add_epi64(row3h, row4h); \
|
||||||
|
\
|
||||||
|
row2l = _mm_xor_si128(row2l, row3l); \
|
||||||
|
row2h = _mm_xor_si128(row2h, row3h); \
|
||||||
|
\
|
||||||
|
row2l = _mm_roti_epi64(row2l, -24); \
|
||||||
|
row2h = _mm_roti_epi64(row2h, -24); \
|
||||||
|
|
||||||
|
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
||||||
|
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
||||||
|
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
||||||
|
\
|
||||||
|
row4l = _mm_xor_si128(row4l, row1l); \
|
||||||
|
row4h = _mm_xor_si128(row4h, row1h); \
|
||||||
|
\
|
||||||
|
row4l = _mm_roti_epi64(row4l, -16); \
|
||||||
|
row4h = _mm_roti_epi64(row4h, -16); \
|
||||||
|
\
|
||||||
|
row3l = _mm_add_epi64(row3l, row4l); \
|
||||||
|
row3h = _mm_add_epi64(row3h, row4h); \
|
||||||
|
\
|
||||||
|
row2l = _mm_xor_si128(row2l, row3l); \
|
||||||
|
row2h = _mm_xor_si128(row2h, row3h); \
|
||||||
|
\
|
||||||
|
row2l = _mm_roti_epi64(row2l, -63); \
|
||||||
|
row2h = _mm_roti_epi64(row2h, -63); \
|
||||||
|
|
||||||
|
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||||
|
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||||
|
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||||
|
row2l = t0; \
|
||||||
|
row2h = t1; \
|
||||||
|
\
|
||||||
|
t0 = row3l; \
|
||||||
|
row3l = row3h; \
|
||||||
|
row3h = t0; \
|
||||||
|
\
|
||||||
|
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||||
|
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||||
|
row4l = t1; \
|
||||||
|
row4h = t0;
|
||||||
|
|
||||||
|
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
||||||
|
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
||||||
|
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
||||||
|
row2l = t0; \
|
||||||
|
row2h = t1; \
|
||||||
|
\
|
||||||
|
t0 = row3l; \
|
||||||
|
row3l = row3h; \
|
||||||
|
row3h = t0; \
|
||||||
|
\
|
||||||
|
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
||||||
|
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
||||||
|
row4l = t1; \
|
||||||
|
row4h = t0;
|
||||||
|
|
||||||
|
#define LOAD_MSG(r, i, b0, b1) \
|
||||||
|
do { \
|
||||||
|
b0 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 1]], m[blake2b_sigma_sse41[r][i * 4 + 0]]); \
|
||||||
|
b1 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 3]], m[blake2b_sigma_sse41[r][i * 4 + 2]]); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define ROUND(r) \
|
||||||
|
LOAD_MSG(r, 0, b0, b1); \
|
||||||
|
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||||
|
LOAD_MSG(r, 1, b0, b1); \
|
||||||
|
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||||
|
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
||||||
|
LOAD_MSG(r, 2, b0, b1); \
|
||||||
|
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||||
|
LOAD_MSG(r, 3, b0, b1); \
|
||||||
|
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
||||||
|
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
||||||
|
|
||||||
|
#endif
|
|
@ -39,12 +39,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/blake2/blake2.h"
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
#include "crypto/randomx/blake2/blake2-impl.h"
|
#include "crypto/randomx/blake2/blake2-impl.h"
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#include "blake2b-round.h"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static const uint64_t blake2b_IV[8] = {
|
static const uint64_t blake2b_IV[8] = {
|
||||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
static const uint8_t blake2b_sigma_sse41[12][16] = {
|
||||||
|
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||||
|
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||||
|
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
|
||||||
|
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
|
||||||
|
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
|
||||||
|
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
|
||||||
|
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
|
||||||
|
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
|
||||||
|
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
|
||||||
|
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
|
||||||
|
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||||
|
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
static const uint8_t blake2b_sigma[12][16] = {
|
static const uint8_t blake2b_sigma[12][16] = {
|
||||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||||
|
@ -179,7 +207,47 @@ int rx_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
||||||
|
{
|
||||||
|
__m128i row1l, row1h;
|
||||||
|
__m128i row2l, row2h;
|
||||||
|
__m128i row3l, row3h;
|
||||||
|
__m128i row4l, row4h;
|
||||||
|
__m128i b0, b1;
|
||||||
|
__m128i t0, t1;
|
||||||
|
|
||||||
|
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
||||||
|
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
||||||
|
|
||||||
|
row1l = LOADU(&S->h[0]);
|
||||||
|
row1h = LOADU(&S->h[2]);
|
||||||
|
row2l = LOADU(&S->h[4]);
|
||||||
|
row2h = LOADU(&S->h[6]);
|
||||||
|
row3l = LOADU(&blake2b_IV[0]);
|
||||||
|
row3h = LOADU(&blake2b_IV[2]);
|
||||||
|
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
||||||
|
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
||||||
|
|
||||||
|
const uint64_t* m = (const uint64_t*)(block);
|
||||||
|
|
||||||
|
for (uint32_t r = 0; r < 12; ++r) {
|
||||||
|
ROUND(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
row1l = _mm_xor_si128(row3l, row1l);
|
||||||
|
row1h = _mm_xor_si128(row3h, row1h);
|
||||||
|
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
|
||||||
|
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
|
||||||
|
row2l = _mm_xor_si128(row4l, row2l);
|
||||||
|
row2h = _mm_xor_si128(row4h, row2h);
|
||||||
|
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
|
||||||
|
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
|
||||||
|
}
|
||||||
|
#undef ROUND
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
||||||
uint64_t m[16];
|
uint64_t m[16];
|
||||||
uint64_t v[16];
|
uint64_t v[16];
|
||||||
unsigned int i, r;
|
unsigned int i, r;
|
||||||
|
@ -237,6 +305,20 @@ static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||||
#undef ROUND
|
#undef ROUND
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
|
||||||
|
uint32_t rx_blake2b_use_sse41 = 0;
|
||||||
|
|
||||||
|
#define rx_blake2b_compress(S, block) \
|
||||||
|
if (rx_blake2b_use_sse41) \
|
||||||
|
rx_blake2b_compress_sse41(S, block); \
|
||||||
|
else \
|
||||||
|
rx_blake2b_compress_integer(S, block);
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define rx_blake2b_compress(S, block) rx_blake2b_compress_integer(S, block);
|
||||||
|
#endif
|
||||||
|
|
||||||
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||||
const uint8_t *pin = (const uint8_t *)in;
|
const uint8_t *pin = (const uint8_t *)in;
|
||||||
|
|
||||||
|
@ -260,14 +342,14 @@ int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||||
memcpy(&S->buf[left], pin, fill);
|
memcpy(&S->buf[left], pin, fill);
|
||||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
rx_blake2b_compress(S, S->buf);
|
rx_blake2b_compress(S, S->buf);
|
||||||
S->buflen = 0;
|
S->buflen = 0;
|
||||||
inlen -= fill;
|
inlen -= fill;
|
||||||
pin += fill;
|
pin += fill;
|
||||||
/* Avoid buffer copies when possible */
|
/* Avoid buffer copies when possible */
|
||||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||||
rx_blake2b_compress(S, pin);
|
rx_blake2b_compress(S, pin);
|
||||||
inlen -= BLAKE2B_BLOCKBYTES;
|
inlen -= BLAKE2B_BLOCKBYTES;
|
||||||
pin += BLAKE2B_BLOCKBYTES;
|
pin += BLAKE2B_BLOCKBYTES;
|
||||||
}
|
}
|
||||||
|
@ -294,7 +376,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||||
blake2b_increment_counter(S, S->buflen);
|
blake2b_increment_counter(S, S->buflen);
|
||||||
blake2b_set_lastblock(S);
|
blake2b_set_lastblock(S);
|
||||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||||
rx_blake2b_compress(S, S->buf);
|
rx_blake2b_compress(S, S->buf);
|
||||||
|
|
||||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||||
|
@ -307,8 +389,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
|
||||||
const void *key, size_t keylen) {
|
|
||||||
blake2b_state S;
|
blake2b_state S;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
|
|
||||||
|
@ -321,25 +402,14 @@ int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
if (rx_blake2b_init(&S, outlen) < 0) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (keylen > 0) {
|
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
||||||
if (rx_blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (rx_blake2b_init(&S, outlen) < 0) {
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rx_blake2b_update(&S, in, inlen) < 0) {
|
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
ret = rx_blake2b_final(&S, out, outlen);
|
ret = rx_blake2b_final(&S, out, outlen);
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
//clear_internal_memory(&S, sizeof(S));
|
//clear_internal_memory(&S, sizeof(S));
|
||||||
|
@ -361,43 +431,42 @@ int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||||
store32(outlen_bytes, (uint32_t)outlen);
|
store32(outlen_bytes, (uint32_t)outlen);
|
||||||
|
|
||||||
#define TRY(statement) \
|
#define TRY(statement) \
|
||||||
do { \
|
do { \
|
||||||
ret = statement; \
|
ret = statement; \
|
||||||
if (ret < 0) { \
|
if (ret < 0) { \
|
||||||
goto fail; \
|
goto fail; \
|
||||||
} \
|
} \
|
||||||
} while ((void)0, 0)
|
} while ((void)0, 0)
|
||||||
|
|
||||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||||
TRY(rx_blake2b_init(&blake_state, outlen));
|
TRY(rx_blake2b_init(&blake_state, outlen));
|
||||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||||
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
TRY(rx_blake2b_final(&blake_state, out, outlen));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uint32_t toproduce;
|
uint32_t toproduce;
|
||||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||||
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||||
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||||
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
TRY(rx_blake2b_update(&blake_state, in, inlen));
|
||||||
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
out += BLAKE2B_OUTBYTES / 2;
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||||
|
|
||||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||||
BLAKE2B_OUTBYTES, NULL, 0));
|
BLAKE2B_OUTBYTES));
|
||||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||||
out += BLAKE2B_OUTBYTES / 2;
|
out += BLAKE2B_OUTBYTES / 2;
|
||||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||||
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES));
|
||||||
0));
|
|
||||||
memcpy(out, out_buffer, toproduce);
|
memcpy(out, out_buffer, toproduce);
|
||||||
}
|
}
|
||||||
fail:
|
fail:
|
||||||
|
|
|
@ -55,7 +55,7 @@ namespace randomx {
|
||||||
|
|
||||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||||
rx_blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
rx_blake2b(data, sizeof(data), data, sizeof(data));
|
||||||
dataIndex = 0;
|
dataIndex = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,9 +79,9 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
|
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
|
||||||
int opcode = instr.opcode;
|
uint32_t opcode = instr.opcode;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IADD_RS;
|
ibc.type = InstructionType::IADD_RS;
|
||||||
|
@ -99,8 +99,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IADD_M;
|
ibc.type = InstructionType::IADD_M;
|
||||||
|
@ -117,8 +118,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::ISUB_R;
|
ibc.type = InstructionType::ISUB_R;
|
||||||
|
@ -133,8 +135,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::ISUB_M;
|
ibc.type = InstructionType::ISUB_M;
|
||||||
|
@ -151,8 +154,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IMUL_R;
|
ibc.type = InstructionType::IMUL_R;
|
||||||
|
@ -167,8 +171,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IMUL_M;
|
ibc.type = InstructionType::IMUL_M;
|
||||||
|
@ -185,8 +190,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IMULH_R;
|
ibc.type = InstructionType::IMULH_R;
|
||||||
|
@ -195,8 +201,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IMULH_M;
|
ibc.type = InstructionType::IMULH_M;
|
||||||
|
@ -213,8 +220,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::ISMULH_R;
|
ibc.type = InstructionType::ISMULH_R;
|
||||||
|
@ -223,8 +231,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::ISMULH_M;
|
ibc.type = InstructionType::ISMULH_M;
|
||||||
|
@ -241,8 +250,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP) {
|
||||||
uint64_t divisor = instr.getImm32();
|
uint64_t divisor = instr.getImm32();
|
||||||
if (!isZeroOrPowerOf2(divisor)) {
|
if (!isZeroOrPowerOf2(divisor)) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
|
@ -257,16 +267,18 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
ibc.type = InstructionType::INEG_R;
|
ibc.type = InstructionType::INEG_R;
|
||||||
ibc.idst = &nreg->r[dst];
|
ibc.idst = &nreg->r[dst];
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IXOR_R;
|
ibc.type = InstructionType::IXOR_R;
|
||||||
|
@ -281,8 +293,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IXOR_M;
|
ibc.type = InstructionType::IXOR_M;
|
||||||
|
@ -299,8 +312,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IROR_R;
|
ibc.type = InstructionType::IROR_R;
|
||||||
|
@ -315,8 +329,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IROL_R;
|
ibc.type = InstructionType::IROL_R;
|
||||||
|
@ -331,8 +346,9 @@ namespace randomx {
|
||||||
registerUsage[dst] = i;
|
registerUsage[dst] = i;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
|
@ -347,8 +363,9 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
ibc.type = InstructionType::FSWAP_R;
|
ibc.type = InstructionType::FSWAP_R;
|
||||||
if (dst < RegisterCountFlt)
|
if (dst < RegisterCountFlt)
|
||||||
|
@ -357,8 +374,9 @@ namespace randomx {
|
||||||
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
|
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegisterCountFlt;
|
auto src = instr.src % RegisterCountFlt;
|
||||||
ibc.type = InstructionType::FADD_R;
|
ibc.type = InstructionType::FADD_R;
|
||||||
|
@ -366,8 +384,9 @@ namespace randomx {
|
||||||
ibc.fsrc = &nreg->a[src];
|
ibc.fsrc = &nreg->a[src];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::FADD_M;
|
ibc.type = InstructionType::FADD_M;
|
||||||
|
@ -377,8 +396,9 @@ namespace randomx {
|
||||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegisterCountFlt;
|
auto src = instr.src % RegisterCountFlt;
|
||||||
ibc.type = InstructionType::FSUB_R;
|
ibc.type = InstructionType::FSUB_R;
|
||||||
|
@ -386,8 +406,9 @@ namespace randomx {
|
||||||
ibc.fsrc = &nreg->a[src];
|
ibc.fsrc = &nreg->a[src];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::FSUB_M;
|
ibc.type = InstructionType::FSUB_M;
|
||||||
|
@ -397,15 +418,17 @@ namespace randomx {
|
||||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
ibc.fdst = &nreg->f[dst];
|
ibc.fdst = &nreg->f[dst];
|
||||||
ibc.type = InstructionType::FSCAL_R;
|
ibc.type = InstructionType::FSCAL_R;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegisterCountFlt;
|
auto src = instr.src % RegisterCountFlt;
|
||||||
ibc.type = InstructionType::FMUL_R;
|
ibc.type = InstructionType::FMUL_R;
|
||||||
|
@ -413,8 +436,9 @@ namespace randomx {
|
||||||
ibc.fsrc = &nreg->a[src];
|
ibc.fsrc = &nreg->a[src];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::FDIV_M;
|
ibc.type = InstructionType::FDIV_M;
|
||||||
|
@ -424,41 +448,44 @@ namespace randomx {
|
||||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R) {
|
||||||
auto dst = instr.dst % RegisterCountFlt;
|
auto dst = instr.dst % RegisterCountFlt;
|
||||||
ibc.type = InstructionType::FSQRT_R;
|
ibc.type = InstructionType::FSQRT_R;
|
||||||
ibc.fdst = &nreg->e[dst];
|
ibc.fdst = &nreg->e[dst];
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH) {
|
||||||
ibc.type = InstructionType::CBRANCH;
|
ibc.type = InstructionType::CBRANCH;
|
||||||
//jump condition
|
//jump condition
|
||||||
int creg = instr.dst % RegistersCount;
|
int creg = instr.dst % RegistersCount;
|
||||||
ibc.idst = &nreg->r[creg];
|
ibc.idst = &nreg->r[creg];
|
||||||
ibc.target = registerUsage[creg];
|
ibc.target = registerUsage[creg];
|
||||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
const int shift = instr.getModCond();
|
||||||
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
|
ibc.imm = signExtend2sCompl(instr.getImm32()) | ((1ULL << RandomX_ConfigurationBase::JumpOffset) << shift);
|
||||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
|
ibc.imm &= ~((1ULL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
|
||||||
ibc.imm &= ~(1ULL << (shift - 1));
|
ibc.memMask = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
|
||||||
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
|
|
||||||
//mark all registers as used
|
//mark all registers as used
|
||||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||||
registerUsage[j] = i;
|
registerUsage[j] = i;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND) {
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.isrc = &nreg->r[src];
|
ibc.isrc = &nreg->r[src];
|
||||||
ibc.type = InstructionType::CFROUND;
|
ibc.type = InstructionType::CFROUND;
|
||||||
ibc.imm = instr.getImm32() & 63;
|
ibc.imm = instr.getImm32() & 63;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::ISTORE;
|
ibc.type = InstructionType::ISTORE;
|
||||||
|
@ -471,8 +498,9 @@ namespace randomx {
|
||||||
ibc.memMask = ScratchpadL3Mask;
|
ibc.memMask = ScratchpadL3Mask;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE;
|
||||||
|
|
||||||
if (opcode < RandomX_CurrentConfig.CEIL_NOP) {
|
if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_NOP) {
|
||||||
ibc.type = InstructionType::NOP;
|
ibc.type = InstructionType::NOP;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -225,7 +225,7 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||||
rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4);
|
rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||||
|
|
|
@ -74,8 +74,8 @@ namespace randomx {
|
||||||
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
|
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
|
||||||
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
||||||
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
|
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
|
||||||
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated
|
#define CacheLineAlignMask RandomX_ConfigurationBase::CacheLineAlignMask_Calculated
|
||||||
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated
|
#define DatasetExtraItems RandomX_ConfigurationBase::DatasetExtraItems_Calculated
|
||||||
constexpr int StoreL3Condition = 14;
|
constexpr int StoreL3Condition = 14;
|
||||||
|
|
||||||
//Prevent some unsafe configurations.
|
//Prevent some unsafe configurations.
|
||||||
|
|
|
@ -75,11 +75,11 @@ static size_t CalcDatasetItemSize()
|
||||||
// Prologue
|
// Prologue
|
||||||
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
|
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
|
||||||
// Main loop
|
// Main loop
|
||||||
RandomX_CurrentConfig.CacheAccesses * (
|
RandomX_ConfigurationBase::CacheAccesses * (
|
||||||
// Main loop prologue
|
// Main loop prologue
|
||||||
((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
|
((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
|
||||||
// Inner main loop (instructions)
|
// Inner main loop (instructions)
|
||||||
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 +
|
((RandomX_ConfigurationBase::SuperscalarLatency * 3) + 2) * 16 +
|
||||||
// Main loop epilogue
|
// Main loop epilogue
|
||||||
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
|
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
|
||||||
) +
|
) +
|
||||||
|
@ -235,7 +235,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
|
||||||
num32bitLiterals = 64;
|
num32bitLiterals = 64;
|
||||||
constexpr uint32_t tmp_reg = 12;
|
constexpr uint32_t tmp_reg = 12;
|
||||||
|
|
||||||
for (size_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i)
|
for (size_t i = 0; i < RandomX_ConfigurationBase::CacheAccesses; ++i)
|
||||||
{
|
{
|
||||||
// and x11, x10, CacheSize / CacheLineSize - 1
|
// and x11, x10, CacheSize / CacheLineSize - 1
|
||||||
emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos);
|
emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos);
|
||||||
|
@ -946,7 +946,7 @@ void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos)
|
||||||
|
|
||||||
const uint32_t dst = IntRegMap[instr.dst];
|
const uint32_t dst = IntRegMap[instr.dst];
|
||||||
const uint32_t modCond = instr.getModCond();
|
const uint32_t modCond = instr.getModCond();
|
||||||
const uint32_t shift = modCond + RandomX_CurrentConfig.JumpOffset;
|
const uint32_t shift = modCond + RandomX_ConfigurationBase::JumpOffset;
|
||||||
const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
|
const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
|
||||||
|
|
||||||
emitAddImmediate(dst, dst, imm, code, k);
|
emitAddImmediate(dst, dst, imm, code, k);
|
||||||
|
|
|
@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/program.hpp"
|
#include "crypto/randomx/program.hpp"
|
||||||
#include "crypto/randomx/reciprocal.h"
|
#include "crypto/randomx/reciprocal.h"
|
||||||
#include "crypto/randomx/virtual_memory.hpp"
|
#include "crypto/randomx/virtual_memory.hpp"
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
#include "backend/cpu/Cpu.h"
|
||||||
|
|
||||||
#ifdef XMRIG_FIX_RYZEN
|
#ifdef XMRIG_FIX_RYZEN
|
||||||
# include "crypto/rx/Rx.h"
|
# include "crypto/rx/Rx.h"
|
||||||
|
@ -166,55 +168,16 @@ namespace randomx {
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// CPU-specific tweaks
|
# ifdef _MSC_VER
|
||||||
void JitCompilerX86::applyTweaks() {
|
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
|
||||||
int32_t info[4];
|
# else
|
||||||
cpuid(0, info);
|
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); }
|
||||||
|
# endif
|
||||||
int32_t manufacturer[4];
|
|
||||||
manufacturer[0] = info[1];
|
|
||||||
manufacturer[1] = info[3];
|
|
||||||
manufacturer[2] = info[2];
|
|
||||||
manufacturer[3] = 0;
|
|
||||||
|
|
||||||
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
unsigned int stepping : 4;
|
|
||||||
unsigned int model : 4;
|
|
||||||
unsigned int family : 4;
|
|
||||||
unsigned int processor_type : 2;
|
|
||||||
unsigned int reserved1 : 2;
|
|
||||||
unsigned int ext_model : 4;
|
|
||||||
unsigned int ext_family : 8;
|
|
||||||
unsigned int reserved2 : 4;
|
|
||||||
} processor_info;
|
|
||||||
|
|
||||||
cpuid(1, info);
|
|
||||||
memcpy(&processor_info, info, sizeof(processor_info));
|
|
||||||
|
|
||||||
// Intel JCC erratum mitigation
|
|
||||||
if (processor_info.family == 6) {
|
|
||||||
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
|
||||||
const uint32_t stepping = processor_info.stepping;
|
|
||||||
|
|
||||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
||||||
BranchesWithin32B =
|
|
||||||
((model == 0x4E) && (stepping == 0x3)) ||
|
|
||||||
((model == 0x55) && (stepping == 0x4)) ||
|
|
||||||
((model == 0x5E) && (stepping == 0x3)) ||
|
|
||||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
|
||||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
|
||||||
((model == 0xA6) && (stepping == 0x0)) ||
|
|
||||||
((model == 0xAE) && (stepping == 0xA));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::atomic<size_t> codeOffset;
|
static std::atomic<size_t> codeOffset;
|
||||||
|
|
||||||
JitCompilerX86::JitCompilerX86() {
|
JitCompilerX86::JitCompilerX86() {
|
||||||
applyTweaks();
|
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
|
||||||
|
|
||||||
int32_t info[4];
|
int32_t info[4];
|
||||||
cpuid(1, info);
|
cpuid(1, info);
|
||||||
|
@ -255,6 +218,8 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||||
|
PROFILE_SCOPE(RandomX_JIT_compile);
|
||||||
|
|
||||||
vm_flags = flags;
|
vm_flags = flags;
|
||||||
|
|
||||||
generateProgramPrologue(prog, pcfg);
|
generateProgramPrologue(prog, pcfg);
|
||||||
|
@ -340,7 +305,6 @@ namespace randomx {
|
||||||
r[j] = k;
|
r[j] = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
|
|
||||||
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
|
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
|
||||||
Instruction& instr1 = prog(i);
|
Instruction& instr1 = prog(i);
|
||||||
Instruction& instr2 = prog(i + 1);
|
Instruction& instr2 = prog(i + 1);
|
||||||
|
@ -352,17 +316,10 @@ namespace randomx {
|
||||||
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
|
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
|
||||||
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
|
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
|
||||||
|
|
||||||
*((uint64_t*)&instr1) &= instr_mask;
|
(*gen1)(this, instr1);
|
||||||
(this->*gen1)(instr1);
|
(*gen2)(this, instr2);
|
||||||
|
(*gen3)(this, instr3);
|
||||||
*((uint64_t*)&instr2) &= instr_mask;
|
(*gen4)(this, instr4);
|
||||||
(this->*gen2)(instr2);
|
|
||||||
|
|
||||||
*((uint64_t*)&instr3) &= instr_mask;
|
|
||||||
(this->*gen3)(instr3);
|
|
||||||
|
|
||||||
*((uint64_t*)&instr4) &= instr_mask;
|
|
||||||
(this->*gen4)(instr4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
|
*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
|
||||||
|
@ -515,7 +472,7 @@ namespace randomx {
|
||||||
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||||
|
|
||||||
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
||||||
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
|
||||||
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||||
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||||
|
|
||||||
|
@ -537,8 +494,8 @@ namespace randomx {
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
uint8_t* const p = code + pos;
|
uint8_t* const p = code + pos;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
|
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
|
||||||
|
|
||||||
uint32_t k = 0x048d4f + (dst << 19);
|
uint32_t k = 0x048d4f + (dst << 19);
|
||||||
if (dst == RegisterNeedsDisplacement)
|
if (dst == RegisterNeedsDisplacement)
|
||||||
|
@ -557,8 +514,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -582,8 +539,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
|
||||||
|
@ -603,8 +560,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -624,8 +581,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
|
||||||
|
@ -644,8 +601,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -665,8 +622,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
|
||||||
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
|
||||||
|
@ -681,8 +638,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
|
||||||
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
|
||||||
|
@ -696,8 +653,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -720,8 +677,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -743,8 +700,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
|
||||||
pos += 8;
|
pos += 8;
|
||||||
|
@ -758,8 +715,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<false>(instr, src, p, pos);
|
genAddressReg<false>(instr, src, p, pos);
|
||||||
|
@ -789,7 +746,7 @@ namespace randomx {
|
||||||
|
|
||||||
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
emit64(randomx_reciprocal_fast(divisor), p, pos);
|
||||||
|
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
emit32(0xc0af0f4c + (dst << 27), p, pos);
|
||||||
|
|
||||||
registerUsage[dst] = pos;
|
registerUsage[dst] = pos;
|
||||||
|
@ -802,7 +759,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
|
||||||
pos += 3;
|
pos += 3;
|
||||||
|
|
||||||
|
@ -814,8 +771,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
|
||||||
|
@ -835,8 +792,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -856,8 +813,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
|
||||||
|
@ -877,8 +834,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
|
||||||
|
@ -898,8 +855,8 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst;
|
const uint32_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
if (src != dst) {
|
if (src != dst) {
|
||||||
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
|
||||||
|
@ -915,7 +872,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t dst = instr.dst;
|
const uint64_t dst = instr.dst % RegistersCount;
|
||||||
|
|
||||||
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
|
||||||
pos += 5;
|
pos += 5;
|
||||||
|
@ -940,7 +897,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -968,7 +925,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1007,7 +964,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
const uint64_t dst = instr.dst % RegisterCountFlt;
|
const uint64_t dst = instr.dst % RegisterCountFlt;
|
||||||
|
|
||||||
genAddressReg<true>(instr, src, p, pos);
|
genAddressReg<true>(instr, src, p, pos);
|
||||||
|
@ -1043,7 +1000,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint32_t src = instr.src;
|
const uint32_t src = instr.src % RegistersCount;
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
||||||
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||||
|
@ -1067,7 +1024,7 @@ namespace randomx {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const uint64_t src = instr.src;
|
const uint64_t src = instr.src % RegistersCount;
|
||||||
|
|
||||||
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||||
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
|
||||||
|
@ -1086,14 +1043,15 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool jccErratum>
|
||||||
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
const int reg = instr.dst;
|
const int reg = instr.dst % RegistersCount;
|
||||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||||
|
|
||||||
if (BranchesWithin32B) {
|
if (jccErratum) {
|
||||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
||||||
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
|
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
|
||||||
|
|
||||||
|
@ -1106,10 +1064,12 @@ namespace randomx {
|
||||||
}
|
}
|
||||||
|
|
||||||
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
|
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
|
||||||
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
const int shift = instr.getModCond();
|
||||||
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
|
const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
|
||||||
|
const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift);
|
||||||
|
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
|
||||||
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
|
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
|
||||||
*(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
|
*(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
|
||||||
pos += 14;
|
pos += 14;
|
||||||
|
|
||||||
if (jmp_offset >= -128) {
|
if (jmp_offset >= -128) {
|
||||||
|
@ -1132,12 +1092,15 @@ namespace randomx {
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void JitCompilerX86::h_CBRANCH<false>(const Instruction&);
|
||||||
|
template void JitCompilerX86::h_CBRANCH<true>(const Instruction&);
|
||||||
|
|
||||||
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
|
void JitCompilerX86::h_ISTORE(const Instruction& instr) {
|
||||||
uint8_t* const p = code;
|
uint8_t* const p = code;
|
||||||
uint32_t pos = codePos;
|
uint32_t pos = codePos;
|
||||||
|
|
||||||
genAddressRegDst(instr, p, pos);
|
genAddressRegDst(instr, p, pos);
|
||||||
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
|
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
|
||||||
|
|
||||||
codePos = pos;
|
codePos = pos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ namespace randomx {
|
||||||
class JitCompilerX86;
|
class JitCompilerX86;
|
||||||
class Instruction;
|
class Instruction;
|
||||||
|
|
||||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&);
|
typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&);
|
||||||
|
|
||||||
constexpr uint32_t CodeSize = 64 * 1024;
|
constexpr uint32_t CodeSize = 64 * 1024;
|
||||||
|
|
||||||
|
@ -84,7 +84,6 @@ namespace randomx {
|
||||||
|
|
||||||
uint8_t* allocatedCode;
|
uint8_t* allocatedCode;
|
||||||
|
|
||||||
void applyTweaks();
|
|
||||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||||
template<bool rax>
|
template<bool rax>
|
||||||
|
@ -148,11 +147,13 @@ namespace randomx {
|
||||||
void h_FMUL_R(const Instruction&);
|
void h_FMUL_R(const Instruction&);
|
||||||
void h_FDIV_M(const Instruction&);
|
void h_FDIV_M(const Instruction&);
|
||||||
void h_FSQRT_R(const Instruction&);
|
void h_FSQRT_R(const Instruction&);
|
||||||
|
|
||||||
|
template<bool jccErratum>
|
||||||
void h_CBRANCH(const Instruction&);
|
void h_CBRANCH(const Instruction&);
|
||||||
|
|
||||||
void h_CFROUND(const Instruction&);
|
void h_CFROUND(const Instruction&);
|
||||||
void h_CFROUND_BMI2(const Instruction&);
|
void h_CFROUND_BMI2(const Instruction&);
|
||||||
void h_ISTORE(const Instruction&);
|
void h_ISTORE(const Instruction&);
|
||||||
void h_NOP(const Instruction&);
|
void h_NOP(const Instruction&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
|
||||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||||
{
|
{
|
||||||
ArgonSalt = "RandomWOW\x01";
|
ArgonSalt = "RandomWOW\x01";
|
||||||
|
@ -109,22 +111,15 @@ RandomX_ConfigurationKeva::RandomX_ConfigurationKeva()
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
: ArgonMemory(262144)
|
: ArgonIterations(3)
|
||||||
, ArgonIterations(3)
|
|
||||||
, ArgonLanes(1)
|
, ArgonLanes(1)
|
||||||
, ArgonSalt("RandomX\x03")
|
, ArgonSalt("RandomX\x03")
|
||||||
, CacheAccesses(8)
|
|
||||||
, SuperscalarLatency(170)
|
|
||||||
, DatasetBaseSize(2147483648)
|
|
||||||
, DatasetExtraSize(33554368)
|
|
||||||
, ScratchpadL1_Size(16384)
|
, ScratchpadL1_Size(16384)
|
||||||
, ScratchpadL2_Size(262144)
|
, ScratchpadL2_Size(262144)
|
||||||
, ScratchpadL3_Size(2097152)
|
, ScratchpadL3_Size(2097152)
|
||||||
, ProgramSize(256)
|
, ProgramSize(256)
|
||||||
, ProgramIterations(2048)
|
, ProgramIterations(2048)
|
||||||
, ProgramCount(8)
|
, ProgramCount(8)
|
||||||
, JumpBits(8)
|
|
||||||
, JumpOffset(8)
|
|
||||||
, RANDOMX_FREQ_IADD_RS(16)
|
, RANDOMX_FREQ_IADD_RS(16)
|
||||||
, RANDOMX_FREQ_IADD_M(7)
|
, RANDOMX_FREQ_IADD_M(7)
|
||||||
, RANDOMX_FREQ_ISUB_R(16)
|
, RANDOMX_FREQ_ISUB_R(16)
|
||||||
|
@ -211,6 +206,13 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int scratchpadPrefetchMode = 1;
|
||||||
|
|
||||||
|
void randomx_set_scratchpad_prefetch_mode(int mode)
|
||||||
|
{
|
||||||
|
scratchpadPrefetchMode = mode;
|
||||||
|
}
|
||||||
|
|
||||||
void RandomX_ConfigurationBase::Apply()
|
void RandomX_ConfigurationBase::Apply()
|
||||||
{
|
{
|
||||||
const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
|
||||||
|
@ -224,11 +226,6 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
|
||||||
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
|
||||||
|
|
||||||
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
|
|
||||||
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
|
|
||||||
|
|
||||||
ConditionMask_Calculated = (1 << JumpBits) - 1;
|
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
|
||||||
// Not needed right now because all variants use default dataset base size
|
// Not needed right now because all variants use default dataset base size
|
||||||
|
@ -240,7 +237,42 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||||
|
|
||||||
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
|
// Apply scratchpad prefetch mode
|
||||||
|
{
|
||||||
|
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8);
|
||||||
|
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22);
|
||||||
|
|
||||||
|
switch (scratchpadPrefetchMode)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
*a = 0x00401F0FUL; // 4-byte nop
|
||||||
|
*b = 0x00401F0FUL; // 4-byte nop
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
default:
|
||||||
|
*a = 0x060C180FUL; // prefetcht0 [rsi+rax]
|
||||||
|
*b = 0x160C180FUL; // prefetcht0 [rsi+rdx]
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
*a = 0x0604180FUL; // prefetchnta [rsi+rax]
|
||||||
|
*b = 0x1604180FUL; // prefetchnta [rsi+rdx]
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
*a = 0x060C8B48UL; // mov rcx, [rsi+rax]
|
||||||
|
*b = 0x160C8B48UL; // mov rcx, [rsi+rdx]
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&);
|
||||||
|
|
||||||
|
#define JIT_HANDLE(x, prev) do { \
|
||||||
|
const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \
|
||||||
|
memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#elif defined(XMRIG_ARMv8)
|
#elif defined(XMRIG_ARMv8)
|
||||||
|
|
||||||
|
@ -256,16 +288,16 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
#define JIT_HANDLE(x, prev)
|
#define JIT_HANDLE(x, prev)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
constexpr int CEIL_NULL = 0;
|
uint32_t k = 0;
|
||||||
int k = 0;
|
uint32_t freq_sum = 0;
|
||||||
|
|
||||||
#define INST_HANDLE(x, prev) \
|
#define INST_HANDLE(x, prev) \
|
||||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
freq_sum += RANDOMX_FREQ_##x; \
|
||||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
|
for (; k < freq_sum; ++k) { JIT_HANDLE(x, prev); }
|
||||||
|
|
||||||
#define INST_HANDLE2(x, func_name, prev) \
|
#define INST_HANDLE2(x, func_name, prev) \
|
||||||
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
|
freq_sum += RANDOMX_FREQ_##x; \
|
||||||
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); }
|
for (; k < freq_sum; ++k) { JIT_HANDLE(func_name, prev); }
|
||||||
|
|
||||||
INST_HANDLE(IADD_RS, NULL);
|
INST_HANDLE(IADD_RS, NULL);
|
||||||
INST_HANDLE(IADD_M, IADD_RS);
|
INST_HANDLE(IADD_M, IADD_RS);
|
||||||
|
@ -304,7 +336,13 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
INST_HANDLE(FMUL_R, FSCAL_R);
|
INST_HANDLE(FMUL_R, FSCAL_R);
|
||||||
INST_HANDLE(FDIV_M, FMUL_R);
|
INST_HANDLE(FDIV_M, FMUL_R);
|
||||||
INST_HANDLE(FSQRT_R, FDIV_M);
|
INST_HANDLE(FSQRT_R, FDIV_M);
|
||||||
INST_HANDLE(CBRANCH, FSQRT_R);
|
|
||||||
|
if (xmrig::Cpu::info()->jccErratum()) {
|
||||||
|
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||||
|
@ -537,33 +575,35 @@ extern "C" {
|
||||||
assert(inputSize == 0 || input != nullptr);
|
assert(inputSize == 0 || input != nullptr);
|
||||||
assert(output != nullptr);
|
assert(output != nullptr);
|
||||||
alignas(16) uint64_t tempHash[8];
|
alignas(16) uint64_t tempHash[8];
|
||||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||||
machine->initScratchpad(&tempHash);
|
machine->initScratchpad(&tempHash);
|
||||||
machine->resetRoundingMode();
|
machine->resetRoundingMode();
|
||||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||||
machine->run(&tempHash);
|
machine->run(&tempHash);
|
||||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||||
}
|
}
|
||||||
machine->run(&tempHash);
|
machine->run(&tempHash);
|
||||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
machine->getFinalResult(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
|
||||||
machine->initScratchpad(tempHash);
|
machine->initScratchpad(tempHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||||
|
PROFILE_SCOPE(RandomX_hash);
|
||||||
|
|
||||||
machine->resetRoundingMode();
|
machine->resetRoundingMode();
|
||||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||||
machine->run(&tempHash);
|
machine->run(&tempHash);
|
||||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
|
||||||
}
|
}
|
||||||
machine->run(&tempHash);
|
machine->run(&tempHash);
|
||||||
|
|
||||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||||
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), nextInput, nextInputSize);
|
||||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
machine->hashAndFill(output, tempHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,15 +64,24 @@ struct RandomX_ConfigurationBase
|
||||||
|
|
||||||
void Apply();
|
void Apply();
|
||||||
|
|
||||||
uint32_t ArgonMemory;
|
// Common parameters for all RandomX variants
|
||||||
|
enum Params : uint64_t
|
||||||
|
{
|
||||||
|
ArgonMemory = 262144,
|
||||||
|
CacheAccesses = 8,
|
||||||
|
SuperscalarLatency = 170,
|
||||||
|
DatasetBaseSize = 2147483648,
|
||||||
|
DatasetExtraSize = 33554368,
|
||||||
|
JumpBits = 8,
|
||||||
|
JumpOffset = 8,
|
||||||
|
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1),
|
||||||
|
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE,
|
||||||
|
ConditionMask_Calculated = ((1 << JumpBits) - 1) << JumpOffset,
|
||||||
|
};
|
||||||
|
|
||||||
uint32_t ArgonIterations;
|
uint32_t ArgonIterations;
|
||||||
uint32_t ArgonLanes;
|
uint32_t ArgonLanes;
|
||||||
const char* ArgonSalt;
|
const char* ArgonSalt;
|
||||||
uint32_t CacheAccesses;
|
|
||||||
uint32_t SuperscalarLatency;
|
|
||||||
|
|
||||||
uint32_t DatasetBaseSize;
|
|
||||||
uint32_t DatasetExtraSize;
|
|
||||||
|
|
||||||
uint32_t ScratchpadL1_Size;
|
uint32_t ScratchpadL1_Size;
|
||||||
uint32_t ScratchpadL2_Size;
|
uint32_t ScratchpadL2_Size;
|
||||||
|
@ -82,9 +91,6 @@ struct RandomX_ConfigurationBase
|
||||||
uint32_t ProgramIterations;
|
uint32_t ProgramIterations;
|
||||||
uint32_t ProgramCount;
|
uint32_t ProgramCount;
|
||||||
|
|
||||||
uint32_t JumpBits;
|
|
||||||
uint32_t JumpOffset;
|
|
||||||
|
|
||||||
uint32_t RANDOMX_FREQ_IADD_RS;
|
uint32_t RANDOMX_FREQ_IADD_RS;
|
||||||
uint32_t RANDOMX_FREQ_IADD_M;
|
uint32_t RANDOMX_FREQ_IADD_M;
|
||||||
uint32_t RANDOMX_FREQ_ISUB_R;
|
uint32_t RANDOMX_FREQ_ISUB_R;
|
||||||
|
@ -126,15 +132,10 @@ struct RandomX_ConfigurationBase
|
||||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||||
uint8_t codePrefetchScratchpadTweaked[32];
|
uint8_t codePrefetchScratchpadTweaked[32];
|
||||||
|
|
||||||
uint32_t CacheLineAlignMask_Calculated;
|
|
||||||
uint32_t DatasetExtraItems_Calculated;
|
|
||||||
|
|
||||||
uint32_t AddressMask_Calculated[4];
|
uint32_t AddressMask_Calculated[4];
|
||||||
uint32_t ScratchpadL3Mask_Calculated;
|
uint32_t ScratchpadL3Mask_Calculated;
|
||||||
uint32_t ScratchpadL3Mask64_Calculated;
|
uint32_t ScratchpadL3Mask64_Calculated;
|
||||||
|
|
||||||
uint32_t ConditionMask_Calculated;
|
|
||||||
|
|
||||||
#if defined(XMRIG_ARMv8)
|
#if defined(XMRIG_ARMv8)
|
||||||
uint32_t Log2_ScratchpadL1;
|
uint32_t Log2_ScratchpadL1;
|
||||||
uint32_t Log2_ScratchpadL2;
|
uint32_t Log2_ScratchpadL2;
|
||||||
|
@ -142,37 +143,6 @@ struct RandomX_ConfigurationBase
|
||||||
uint32_t Log2_DatasetBaseSize;
|
uint32_t Log2_DatasetBaseSize;
|
||||||
uint32_t Log2_CacheSize;
|
uint32_t Log2_CacheSize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int CEIL_IADD_RS;
|
|
||||||
int CEIL_IADD_M;
|
|
||||||
int CEIL_ISUB_R;
|
|
||||||
int CEIL_ISUB_M;
|
|
||||||
int CEIL_IMUL_R;
|
|
||||||
int CEIL_IMUL_M;
|
|
||||||
int CEIL_IMULH_R;
|
|
||||||
int CEIL_IMULH_M;
|
|
||||||
int CEIL_ISMULH_R;
|
|
||||||
int CEIL_ISMULH_M;
|
|
||||||
int CEIL_IMUL_RCP;
|
|
||||||
int CEIL_INEG_R;
|
|
||||||
int CEIL_IXOR_R;
|
|
||||||
int CEIL_IXOR_M;
|
|
||||||
int CEIL_IROR_R;
|
|
||||||
int CEIL_IROL_R;
|
|
||||||
int CEIL_ISWAP_R;
|
|
||||||
int CEIL_FSWAP_R;
|
|
||||||
int CEIL_FADD_R;
|
|
||||||
int CEIL_FADD_M;
|
|
||||||
int CEIL_FSUB_R;
|
|
||||||
int CEIL_FSUB_M;
|
|
||||||
int CEIL_FSCAL_R;
|
|
||||||
int CEIL_FMUL_R;
|
|
||||||
int CEIL_FDIV_M;
|
|
||||||
int CEIL_FSQRT_R;
|
|
||||||
int CEIL_CBRANCH;
|
|
||||||
int CEIL_CFROUND;
|
|
||||||
int CEIL_ISTORE;
|
|
||||||
int CEIL_NOP;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
|
||||||
|
@ -200,6 +170,8 @@ void randomx_apply_config(const T& config)
|
||||||
RandomX_CurrentConfig.Apply();
|
RandomX_CurrentConfig.Apply();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void randomx_set_scratchpad_prefetch_mode(int mode);
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "crypto/randomx/soft_aes.h"
|
#include "crypto/randomx/soft_aes.h"
|
||||||
|
#include "crypto/randomx/aes_hash.hpp"
|
||||||
|
#include "base/tools/Chrono.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
alignas(64) uint32_t lutEnc0[256];
|
alignas(64) uint32_t lutEnc0[256];
|
||||||
alignas(64) uint32_t lutEnc1[256];
|
alignas(64) uint32_t lutEnc1[256];
|
||||||
|
@ -117,3 +120,47 @@ static struct SAESInitializer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} aes_initializer;
|
} aes_initializer;
|
||||||
|
|
||||||
|
static uint32_t softAESImpl = 1;
|
||||||
|
|
||||||
|
uint32_t GetSoftAESImpl()
|
||||||
|
{
|
||||||
|
return softAESImpl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SelectSoftAESImpl()
|
||||||
|
{
|
||||||
|
constexpr int test_length_ms = 100;
|
||||||
|
double speed[2] = {};
|
||||||
|
|
||||||
|
for (int run = 0; run < 3; ++run) {
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||||
|
uint8_t hash[64] = {};
|
||||||
|
uint8_t state[64] = {};
|
||||||
|
|
||||||
|
uint64_t t1, t2;
|
||||||
|
|
||||||
|
uint32_t count = 0;
|
||||||
|
t1 = xmrig::Chrono::highResolutionMSecs();
|
||||||
|
do {
|
||||||
|
if (i == 0) {
|
||||||
|
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||||
|
}
|
||||||
|
++count;
|
||||||
|
|
||||||
|
t2 = xmrig::Chrono::highResolutionMSecs();
|
||||||
|
} while (t2 - t1 < test_length_ms);
|
||||||
|
|
||||||
|
const double x = count * 1e3 / (t2 - t1);
|
||||||
|
if (x > speed[i]) {
|
||||||
|
speed[i] = x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
||||||
|
}
|
||||||
|
|
|
@ -41,11 +41,14 @@ extern uint32_t lutDec1[256];
|
||||||
extern uint32_t lutDec2[256];
|
extern uint32_t lutDec2[256];
|
||||||
extern uint32_t lutDec3[256];
|
extern uint32_t lutDec3[256];
|
||||||
|
|
||||||
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
uint32_t GetSoftAESImpl();
|
||||||
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
void SelectSoftAESImpl();
|
||||||
|
|
||||||
|
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
|
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
volatile uint8_t s[16];
|
volatile uint8_t s[16];
|
||||||
memcpy((void*) s, &in, 16);
|
memcpy((void*) s, &in, 16);
|
||||||
|
|
||||||
|
@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
volatile uint8_t s[16];
|
volatile uint8_t s[16];
|
||||||
memcpy((void*) s, &in, 16);
|
memcpy((void*) s, &in, 16);
|
||||||
|
|
||||||
|
@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
|
uint32_t s0, s1, s2, s3;
|
||||||
|
|
||||||
|
s0 = rx_vec_i128_w(in);
|
||||||
|
s1 = rx_vec_i128_z(in);
|
||||||
|
s2 = rx_vec_i128_y(in);
|
||||||
|
s3 = rx_vec_i128_x(in);
|
||||||
|
|
||||||
|
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||||
|
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||||
|
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||||
|
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||||
|
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||||
|
);
|
||||||
|
|
||||||
|
return rx_xor_vec_i128(out, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
|
uint32_t s0, s1, s2, s3;
|
||||||
|
|
||||||
|
s0 = rx_vec_i128_w(in);
|
||||||
|
s1 = rx_vec_i128_z(in);
|
||||||
|
s2 = rx_vec_i128_y(in);
|
||||||
|
s3 = rx_vec_i128_x(in);
|
||||||
|
|
||||||
|
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||||
|
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||||
|
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||||
|
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||||
|
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||||
|
);
|
||||||
|
|
||||||
|
return rx_xor_vec_i128(out, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
return rx_aesenc_vec_i128(in, key);
|
return rx_aesenc_vec_i128(in, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) {
|
FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||||
return rx_aesdec_vec_i128(in, key);
|
return rx_aesdec_vec_i128(in, key);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/blake2/blake2.h"
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
#include "crypto/randomx/intrin_portable.h"
|
#include "crypto/randomx/intrin_portable.h"
|
||||||
#include "crypto/randomx/allocator.hpp"
|
#include "crypto/randomx/allocator.hpp"
|
||||||
|
#include "crypto/randomx/soft_aes.h"
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
|
||||||
randomx_vm::~randomx_vm() {
|
randomx_vm::~randomx_vm() {
|
||||||
|
|
||||||
|
@ -95,11 +97,11 @@ void randomx_vm::initialize() {
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
VmBase<softAes>::~VmBase() {
|
VmBase<softAes>::~VmBase() {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
|
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
|
||||||
if (datasetPtr == nullptr) {
|
if (datasetPtr == nullptr) {
|
||||||
throw std::invalid_argument("Cache/Dataset not set");
|
throw std::invalid_argument("Cache/Dataset not set");
|
||||||
|
@ -108,25 +110,37 @@ namespace randomx {
|
||||||
this->scratchpad = scratchpad;
|
this->scratchpad = scratchpad;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) {
|
void VmBase<softAes>::getFinalResult(void* out) {
|
||||||
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a);
|
||||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
|
void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
|
||||||
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
if (!softAes) {
|
||||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (GetSoftAESImpl() == 1) {
|
||||||
|
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, ®, sizeof(RegisterFile));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::initScratchpad(void* seed) {
|
void VmBase<softAes>::initScratchpad(void* seed) {
|
||||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void VmBase<softAes>::generateProgram(void* seed) {
|
void VmBase<softAes>::generateProgram(void* seed) {
|
||||||
|
PROFILE_SCOPE(RandomX_generate_program);
|
||||||
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
|
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,8 +38,8 @@ class randomx_vm
|
||||||
public:
|
public:
|
||||||
virtual ~randomx_vm() = 0;
|
virtual ~randomx_vm() = 0;
|
||||||
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
||||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
virtual void getFinalResult(void* out) = 0;
|
||||||
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
|
virtual void hashAndFill(void* out, uint64_t (&fill_state)[8]) = 0;
|
||||||
virtual void setDataset(randomx_dataset* dataset) { }
|
virtual void setDataset(randomx_dataset* dataset) { }
|
||||||
virtual void setCache(randomx_cache* cache) { }
|
virtual void setCache(randomx_cache* cache) { }
|
||||||
virtual void initScratchpad(void* seed) = 0;
|
virtual void initScratchpad(void* seed) = 0;
|
||||||
|
@ -79,15 +79,15 @@ protected:
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
class VmBase : public randomx_vm
|
class VmBase : public randomx_vm
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
~VmBase() override;
|
~VmBase() override;
|
||||||
void setScratchpad(uint8_t *scratchpad) override;
|
void setScratchpad(uint8_t *scratchpad) override;
|
||||||
void initScratchpad(void* seed) override;
|
void initScratchpad(void* seed) override;
|
||||||
void getFinalResult(void* out, size_t outSize) override;
|
void getFinalResult(void* out) override;
|
||||||
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
|
void hashAndFill(void* out, uint64_t (&fill_state)[8]) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void generateProgram(void* seed);
|
void generateProgram(void* seed);
|
||||||
|
|
|
@ -28,19 +28,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "crypto/randomx/vm_compiled.hpp"
|
#include "crypto/randomx/vm_compiled.hpp"
|
||||||
#include "crypto/randomx/common.hpp"
|
#include "crypto/randomx/common.hpp"
|
||||||
|
#include "base/tools/Profiler.h"
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
|
||||||
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
|
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||||
datasetPtr = dataset;
|
datasetPtr = dataset;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void CompiledVm<softAes>::run(void* seed) {
|
void CompiledVm<softAes>::run(void* seed) {
|
||||||
|
PROFILE_SCOPE(RandomX_run);
|
||||||
|
|
||||||
compiler.prepare();
|
compiler.prepare();
|
||||||
VmBase<softAes>::generateProgram(seed);
|
VmBase<softAes>::generateProgram(seed);
|
||||||
randomx_vm::initialize();
|
randomx_vm::initialize();
|
||||||
|
@ -49,8 +52,10 @@ namespace randomx {
|
||||||
execute();
|
execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void CompiledVm<softAes>::execute() {
|
void CompiledVm<softAes>::execute() {
|
||||||
|
PROFILE_SCOPE(RandomX_JIT_execute);
|
||||||
|
|
||||||
#ifdef XMRIG_ARM
|
#ifdef XMRIG_ARM
|
||||||
memcpy(reg.f, config.eMask, sizeof(config.eMask));
|
memcpy(reg.f, config.eMask, sizeof(config.eMask));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
class CompiledVm : public VmBase<softAes>
|
class CompiledVm : public VmBase<softAes>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -61,6 +61,6 @@ namespace randomx {
|
||||||
JitCompiler compiler;
|
JitCompiler compiler;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CompiledVmDefault = CompiledVm<true>;
|
using CompiledVmDefault = CompiledVm<1>;
|
||||||
using CompiledVmHardAes = CompiledVm<false>;
|
using CompiledVmHardAes = CompiledVm<0>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
|
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||||
cachePtr = cache;
|
cachePtr = cache;
|
||||||
mem.memory = cache->memory;
|
mem.memory = cache->memory;
|
||||||
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void CompiledLightVm<softAes>::run(void* seed) {
|
void CompiledLightVm<softAes>::run(void* seed) {
|
||||||
VmBase<softAes>::generateProgram(seed);
|
VmBase<softAes>::generateProgram(seed);
|
||||||
randomx_vm::initialize();
|
randomx_vm::initialize();
|
||||||
|
|
|
@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
class CompiledLightVm : public CompiledVm<softAes>
|
class CompiledLightVm : public CompiledVm<softAes>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -52,6 +52,6 @@ namespace randomx {
|
||||||
using CompiledVm<softAes>::datasetOffset;
|
using CompiledVm<softAes>::datasetOffset;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CompiledLightVmDefault = CompiledLightVm<true>;
|
using CompiledLightVmDefault = CompiledLightVm<1>;
|
||||||
using CompiledLightVmHardAes = CompiledLightVm<false>;
|
using CompiledLightVmHardAes = CompiledLightVm<0>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
|
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
|
||||||
datasetPtr = dataset;
|
datasetPtr = dataset;
|
||||||
mem.memory = dataset->memory;
|
mem.memory = dataset->memory;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedVm<softAes>::run(void* seed) {
|
void InterpretedVm<softAes>::run(void* seed) {
|
||||||
VmBase<softAes>::generateProgram(seed);
|
VmBase<softAes>::generateProgram(seed);
|
||||||
randomx_vm::initialize();
|
randomx_vm::initialize();
|
||||||
execute();
|
execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedVm<softAes>::execute() {
|
void InterpretedVm<softAes>::execute() {
|
||||||
|
|
||||||
NativeRegisterFile nreg;
|
NativeRegisterFile nreg;
|
||||||
|
@ -106,14 +106,14 @@ namespace randomx {
|
||||||
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
||||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||||
for (int i = 0; i < RegistersCount; ++i)
|
for (int i = 0; i < RegistersCount; ++i)
|
||||||
r[i] ^= datasetLine[i];
|
r[i] ^= datasetLine[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
|
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
|
||||||
rx_prefetch_nta(mem.memory + address);
|
rx_prefetch_nta(mem.memory + address);
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
|
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
|
||||||
public:
|
public:
|
||||||
using VmBase<softAes>::mem;
|
using VmBase<softAes>::mem;
|
||||||
|
@ -65,6 +65,6 @@ namespace randomx {
|
||||||
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
|
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
using InterpretedVmDefault = InterpretedVm<true>;
|
using InterpretedVmDefault = InterpretedVm<1>;
|
||||||
using InterpretedVmHardAes = InterpretedVm<false>;
|
using InterpretedVmHardAes = InterpretedVm<0>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
|
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||||
cachePtr = cache;
|
cachePtr = cache;
|
||||||
mem.memory = cache->memory;
|
mem.memory = cache->memory;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
|
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
|
||||||
uint32_t itemNumber = address / CacheLineSize;
|
uint32_t itemNumber = address / CacheLineSize;
|
||||||
int_reg_t rl[8];
|
int_reg_t rl[8];
|
||||||
|
|
|
@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
||||||
template<bool softAes>
|
template<int softAes>
|
||||||
class InterpretedLightVm : public InterpretedVm<softAes> {
|
class InterpretedLightVm : public InterpretedVm<softAes> {
|
||||||
public:
|
public:
|
||||||
using VmBase<softAes>::mem;
|
using VmBase<softAes>::mem;
|
||||||
|
@ -50,6 +50,6 @@ namespace randomx {
|
||||||
void datasetPrefetch(uint64_t address) override { }
|
void datasetPrefetch(uint64_t address) override { }
|
||||||
};
|
};
|
||||||
|
|
||||||
using InterpretedLightVmDefault = InterpretedLightVm<true>;
|
using InterpretedLightVmDefault = InterpretedLightVm<1>;
|
||||||
using InterpretedLightVmHardAes = InterpretedLightVm<false>;
|
using InterpretedLightVmHardAes = InterpretedLightVm<0>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@
|
||||||
#include "base/io/log/Log.h"
|
#include "base/io/log/Log.h"
|
||||||
#include "crypto/rx/RxConfig.h"
|
#include "crypto/rx/RxConfig.h"
|
||||||
#include "crypto/rx/RxQueue.h"
|
#include "crypto/rx/RxQueue.h"
|
||||||
|
#include "crypto/randomx/randomx.h"
|
||||||
|
#include "crypto/randomx/soft_aes.h"
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
@ -99,6 +101,8 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
|
||||||
|
|
||||||
if (isReady(seed)) {
|
if (isReady(seed)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -110,6 +114,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
|
||||||
|
|
||||||
if (!osInitialized) {
|
if (!osInitialized) {
|
||||||
setupMainLoopExceptionFrame();
|
setupMainLoopExceptionFrame();
|
||||||
|
if (!cpu.isHwAES()) {
|
||||||
|
SelectSoftAESImpl();
|
||||||
|
}
|
||||||
osInitialized = true;
|
osInitialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,8 @@ static const char *kCacheQoS = "cache_qos";
|
||||||
static const char *kNUMA = "numa";
|
static const char *kNUMA = "numa";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static const char *kScratchpadPrefetchMode = "scratchpad_prefetch_mode";
|
||||||
|
|
||||||
static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "fast", "light" };
|
static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "fast", "light" };
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,6 +120,11 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
const uint32_t mode = static_cast<uint32_t>(Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode)));
|
||||||
|
if (mode < ScratchpadPrefetchMax) {
|
||||||
|
m_scratchpadPrefetchMode = static_cast<ScratchpadPrefetchMode>(mode);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,6 +178,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
obj.AddMember(StringRef(kScratchpadPrefetchMode), static_cast<int>(m_scratchpadPrefetchMode), allocator);
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,14 @@ public:
|
||||||
ModeMax
|
ModeMax
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ScratchpadPrefetchMode : uint32_t {
|
||||||
|
ScratchpadPrefetchOff,
|
||||||
|
ScratchpadPrefetchT0,
|
||||||
|
ScratchpadPrefetchNTA,
|
||||||
|
ScratchpadPrefetchMov,
|
||||||
|
ScratchpadPrefetchMax,
|
||||||
|
};
|
||||||
|
|
||||||
bool read(const rapidjson::Value &value);
|
bool read(const rapidjson::Value &value);
|
||||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||||
|
|
||||||
|
@ -68,6 +76,8 @@ public:
|
||||||
inline bool cacheQoS() const { return m_cacheQoS; }
|
inline bool cacheQoS() const { return m_cacheQoS; }
|
||||||
inline Mode mode() const { return m_mode; }
|
inline Mode mode() const { return m_mode; }
|
||||||
|
|
||||||
|
inline ScratchpadPrefetchMode scratchpadPrefetchMode() const { return m_scratchpadPrefetchMode; }
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_MSR
|
# ifdef XMRIG_FEATURE_MSR
|
||||||
const char *msrPresetName() const;
|
const char *msrPresetName() const;
|
||||||
const MsrItems &msrPreset() const;
|
const MsrItems &msrPreset() const;
|
||||||
|
@ -94,6 +104,8 @@ private:
|
||||||
int m_threads = -1;
|
int m_threads = -1;
|
||||||
Mode m_mode = AutoMode;
|
Mode m_mode = AutoMode;
|
||||||
|
|
||||||
|
ScratchpadPrefetchMode m_scratchpadPrefetchMode = ScratchpadPrefetchT0;
|
||||||
|
|
||||||
# ifdef XMRIG_FEATURE_HWLOC
|
# ifdef XMRIG_FEATURE_HWLOC
|
||||||
std::vector<uint32_t> m_nodeset;
|
std::vector<uint32_t> m_nodeset;
|
||||||
# endif
|
# endif
|
||||||
|
|
|
@ -31,6 +31,11 @@
|
||||||
#include "crypto/rx/RxVm.h"
|
#include "crypto/rx/RxVm.h"
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
extern "C" uint32_t rx_blake2b_use_sse41;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
|
randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
|
||||||
{
|
{
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
|
@ -55,6 +60,10 @@ randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
|
||||||
flags |= RANDOMX_FLAG_AMD;
|
flags |= RANDOMX_FLAG_AMD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
rx_blake2b_use_sse41 = Cpu::info()->has(ICpuInfo::FLAG_SSE41) ? 1 : 0;
|
||||||
|
# endif
|
||||||
|
|
||||||
return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
|
return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#define APP_ID "xmrig"
|
#define APP_ID "xmrig"
|
||||||
#define APP_NAME "XMRig"
|
#define APP_NAME "XMRig"
|
||||||
#define APP_DESC "XMRig miner"
|
#define APP_DESC "XMRig miner"
|
||||||
#define APP_VERSION "6.3.3"
|
#define APP_VERSION "6.3.4-dev"
|
||||||
#define APP_DOMAIN "xmrig.com"
|
#define APP_DOMAIN "xmrig.com"
|
||||||
#define APP_SITE "www.xmrig.com"
|
#define APP_SITE "www.xmrig.com"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
||||||
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#define APP_VER_MAJOR 6
|
#define APP_VER_MAJOR 6
|
||||||
#define APP_VER_MINOR 3
|
#define APP_VER_MINOR 3
|
||||||
#define APP_VER_PATCH 3
|
#define APP_VER_PATCH 4
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1920)
|
# if (_MSC_VER >= 1920)
|
||||||
|
|
Loading…
Reference in a new issue