Merge branch 'dev'

This commit is contained in:
XMRig 2020-09-23 05:59:35 +07:00
commit cb0bba7e10
No known key found for this signature in database
GPG key ID: 446A53638BE94409
51 changed files with 1184 additions and 515 deletions

View file

@ -1,3 +1,14 @@
# v6.3.4
- [#1823](https://github.com/xmrig/xmrig/pull/1823) RandomX: added new option `scratchpad_prefetch_mode`.
- [#1827](https://github.com/xmrig/xmrig/pull/1827) [#1831](https://github.com/xmrig/xmrig/pull/1831) Improved nonce iteration performance.
- [#1828](https://github.com/xmrig/xmrig/pull/1828) RandomX: added SSE4.1-optimized Blake2b.
- [#1830](https://github.com/xmrig/xmrig/pull/1830) RandomX: added performance profiler (for developers).
- [#1835](https://github.com/xmrig/xmrig/pull/1835) RandomX: returned old soft AES implementation and added auto-select between the two.
- [#1840](https://github.com/xmrig/xmrig/pull/1840) RandomX: moved more stuff to compile time, small x86 JIT compiler speedup.
- [#1841](https://github.com/xmrig/xmrig/pull/1841) Fixed Cryptonight OpenCL for AMD 20.7.2 drivers.
- [#1842](https://github.com/xmrig/xmrig/pull/1842) RandomX: AES improvements, a bit faster hardware AES code when compiled with MSVC.
- [#1843](https://github.com/xmrig/xmrig/pull/1843) RandomX: improved performance of GCC compiled binaries.
# v6.3.3 # v6.3.3
- [#1817](https://github.com/xmrig/xmrig/pull/1817) Fixed self-select login sequence. - [#1817](https://github.com/xmrig/xmrig/pull/1817) Fixed self-select login sequence.
- Added brand new [build from source](https://xmrig.com/docs/miner/build) documentation. - Added brand new [build from source](https://xmrig.com/docs/miner/build) documentation.

View file

@ -23,6 +23,7 @@ option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (on
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON) option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON) option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF) option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
option(WITH_PROFILING "Enable profiling for developers" OFF)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)

View file

@ -29,8 +29,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
else() else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
add_definitions(/DHAVE_ROTR) add_definitions(/DHAVE_ROTR)
endif() endif()
@ -87,8 +87,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
else() else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR) check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
if (HAVE_ROTR) if (HAVE_ROTR)

View file

@ -66,14 +66,12 @@ public:
inline bool nextRound(uint32_t rounds, uint32_t roundSize) inline bool nextRound(uint32_t rounds, uint32_t roundSize)
{ {
bool ok = true;
m_rounds[index()]++; m_rounds[index()]++;
if ((m_rounds[index()] % rounds) == 0) { if ((m_rounds[index()] % rounds) == 0) {
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
*nonce(i) = Nonce::next(index(), *nonce(i), rounds * roundSize, currentJob().isNicehash(), &ok); if (!Nonce::next(index(), nonce(i), rounds * roundSize, nonceMask())) {
if (!ok) { return false;
break;
} }
} }
} }
@ -83,13 +81,14 @@ public:
} }
} }
return ok; return true;
} }
private: private:
inline int32_t nonceOffset() const { return currentJob().nonceOffset(); } inline int32_t nonceOffset() const { return currentJob().nonceOffset(); }
inline size_t nonceSize() const { return currentJob().nonceSize(); } inline size_t nonceSize() const { return currentJob().nonceSize(); }
inline uint64_t nonceMask() const { return m_nonce_mask[index()]; }
inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend) inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend)
{ {
@ -97,12 +96,13 @@ private:
const size_t size = job.size(); const size_t size = job.size();
m_jobs[index()] = job; m_jobs[index()] = job;
m_rounds[index()] = 0; m_rounds[index()] = 0;
m_nonce_mask[index()] = job.nonceMask();
m_jobs[index()].setBackend(backend); m_jobs[index()].setBackend(backend);
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
memcpy(m_blobs[index()] + (i * size), job.blob(), size); memcpy(m_blobs[index()] + (i * size), job.blob(), size);
*nonce(i) = Nonce::next(index(), *nonce(i), reserveCount, job.isNicehash()); Nonce::next(index(), nonce(i), reserveCount, nonceMask());
} }
} }
@ -110,6 +110,7 @@ private:
alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{}; alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{};
Job m_jobs[2]; Job m_jobs[2];
uint32_t m_rounds[2] = { 0, 0 }; uint32_t m_rounds[2] = { 0, 0 };
uint64_t m_nonce_mask[2];
uint64_t m_sequence = 0; uint64_t m_sequence = 0;
uint8_t m_index = 0; uint8_t m_index = 0;
}; };
@ -125,41 +126,23 @@ inline uint32_t *xmrig::WorkerJob<1>::nonce(size_t)
template<> template<>
inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize) inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
{ {
bool ok = true;
m_rounds[index()]++; m_rounds[index()]++;
uint32_t* n = nonce(); uint32_t* n = nonce();
const uint32_t prev_nonce = *n;
if ((m_rounds[index()] % rounds) == 0) { if ((m_rounds[index()] % rounds) == 0) {
*n = Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok); if (!Nonce::next(index(), n, rounds * roundSize, nonceMask())) {
return false;
}
if (nonceSize() == sizeof(uint64_t)) {
m_jobs[index()].nonce()[1] = n[1];
}
} }
else { else {
*n += roundSize; *n += roundSize;
} }
// Increment higher 32 bits of a 64-bit nonce when lower 32 bits overflow return true;
if (!currentJob().isNicehash() && (nonceSize() == sizeof(uint64_t))) {
const bool wrapped = (*n < prev_nonce);
const bool wraps_this_round = (static_cast<uint64_t>(*n) + roundSize > (1ULL << 32));
// Account for the case when starting nonce hasn't wrapped yet, but some nonces in the current round will wrap
if (wrapped || wraps_this_round) {
// Set lower 32 bits to 0 when higher 32 bits change
Nonce::reset(index());
// Sets *n to 0 and Nonce::m_nonce[index] to the correct next value
*n = 0;
Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
++n[1];
Job& job = m_jobs[index()];
memcpy(job.blob(), blob(), job.size());
}
}
return ok;
} }
@ -169,11 +152,12 @@ inline void xmrig::WorkerJob<1>::save(const Job &job, uint32_t reserveCount, Non
m_index = job.index(); m_index = job.index();
m_jobs[index()] = job; m_jobs[index()] = job;
m_rounds[index()] = 0; m_rounds[index()] = 0;
m_nonce_mask[index()] = job.nonceMask();
m_jobs[index()].setBackend(backend); m_jobs[index()].setBackend(backend);
memcpy(blob(), job.blob(), job.size()); memcpy(blob(), job.blob(), job.size());
*nonce() = Nonce::next(index(), *nonce(), reserveCount, currentJob().isNicehash()); Nonce::next(index(), nonce(), reserveCount, nonceMask());
} }

View file

@ -63,6 +63,7 @@ public:
FLAG_PDPE1GB, FLAG_PDPE1GB,
FLAG_SSE2, FLAG_SSE2,
FLAG_SSSE3, FLAG_SSSE3,
FLAG_SSE41,
FLAG_XOP, FLAG_XOP,
FLAG_POPCNT, FLAG_POPCNT,
FLAG_CAT_L3, FLAG_CAT_L3,
@ -97,6 +98,7 @@ public:
virtual size_t packages() const = 0; virtual size_t packages() const = 0;
virtual size_t threads() const = 0; virtual size_t threads() const = 0;
virtual Vendor vendor() const = 0; virtual Vendor vendor() const = 0;
virtual bool jccErratum() const = 0;
}; };

View file

@ -57,7 +57,7 @@
namespace xmrig { namespace xmrig {
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" }; static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3" };
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" }; static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
@ -141,6 +141,7 @@ static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES,
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); } static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); } static inline bool has_sse2() { return has_feature(PROCESSOR_INFO, EDX_Reg, 1 << 26); }
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); } static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
static inline bool has_sse41() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 19); }
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); } static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); } static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); } static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
@ -177,6 +178,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_flags.set(FLAG_PDPE1GB, has_pdpe1gb()); m_flags.set(FLAG_PDPE1GB, has_pdpe1gb());
m_flags.set(FLAG_SSE2, has_sse2()); m_flags.set(FLAG_SSE2, has_sse2());
m_flags.set(FLAG_SSSE3, has_ssse3()); m_flags.set(FLAG_SSSE3, has_ssse3());
m_flags.set(FLAG_SSE41, has_sse41());
m_flags.set(FLAG_XOP, has_xop()); m_flags.set(FLAG_XOP, has_xop());
m_flags.set(FLAG_POPCNT, has_popcnt()); m_flags.set(FLAG_POPCNT, has_popcnt());
m_flags.set(FLAG_CAT_L3, has_cat_l3()); m_flags.set(FLAG_CAT_L3, has_cat_l3());
@ -210,6 +212,37 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_vendor = VENDOR_INTEL; m_vendor = VENDOR_INTEL;
m_assembly = Assembly::INTEL; m_assembly = Assembly::INTEL;
m_msrMod = MSR_MOD_INTEL; m_msrMod = MSR_MOD_INTEL;
struct
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
} processor_info;
cpuid(1, data);
memcpy(&processor_info, data, sizeof(processor_info));
// Intel JCC erratum mitigation
if (processor_info.family == 6) {
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
const uint32_t stepping = processor_info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
m_jccErratum =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
} }
} }
# endif # endif

View file

@ -61,11 +61,13 @@ protected:
inline size_t packages() const override { return 1; } inline size_t packages() const override { return 1; }
inline size_t threads() const override { return m_threads; } inline size_t threads() const override { return m_threads; }
inline Vendor vendor() const override { return m_vendor; } inline Vendor vendor() const override { return m_vendor; }
inline bool jccErratum() const override { return m_jccErratum; }
protected: protected:
char m_brand[64 + 6]{}; char m_brand[64 + 6]{};
size_t m_threads; size_t m_threads;
Vendor m_vendor = VENDOR_UNKNOWN; Vendor m_vendor = VENDOR_UNKNOWN;
bool m_jccErratum = false;
private: private:
Assembly m_assembly = Assembly::NONE; Assembly m_assembly = Assembly::NONE;

View file

@ -899,7 +899,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
((uint8 *)h)[0] = vload8(0U, c_IV256); ((uint8 *)h)[0] = vload8(0U, c_IV256);
for (uint i = 0; i < 3; ++i) { for (volatile uint i = 0; i < 3; ++i) {
((uint16 *)m)[0] = vload16(i, (__global uint *)states); ((uint16 *)m)[0] = vload16(i, (__global uint *)states);
for (uint x = 0; x < 16; ++x) { for (uint x = 0; x < 16; ++x) {
m[x] = SWAP4(m[x]); m[x] = SWAP4(m[x]);

View file

@ -2,7 +2,7 @@
namespace xmrig { namespace xmrig {
static const char cryptonight_cl[60954] = { static const char cryptonight_cl[60963] = {
0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70, 0x23,0x69,0x66,0x64,0x65,0x66,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,
0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f, 0x65,0x63,0x69,0x66,0x69,0x65,0x72,0x73,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x4f,0x50,0x45,0x4e,0x43,0x4c,0x20,0x45,0x58,0x54,0x45,0x4e,0x53,0x49,0x4f,
0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69, 0x4e,0x20,0x63,0x6c,0x5f,0x63,0x6c,0x61,0x6e,0x67,0x5f,0x73,0x74,0x6f,0x72,0x61,0x67,0x65,0x5f,0x63,0x6c,0x61,0x73,0x73,0x5f,0x73,0x70,0x65,0x63,0x69,0x66,0x69,
@ -1823,91 +1823,92 @@ static const char cryptonight_cl[60954] = {
0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x6d,0x5b, 0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x6d,0x5b,
0x31,0x36,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x76,0x5b,0x31,0x36,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x68,0x5b, 0x31,0x36,0x5d,0x3b,0x0a,0x75,0x6e,0x73,0x69,0x67,0x6e,0x65,0x64,0x20,0x69,0x6e,0x74,0x20,0x76,0x5b,0x31,0x36,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x68,0x5b,
0x38,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3d,0x30,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b, 0x38,0x5d,0x3b,0x0a,0x75,0x69,0x6e,0x74,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3d,0x30,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74, 0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x49,0x56,0x32,0x35,0x36,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x76,0x6f,0x6c,0x61,
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b, 0x74,0x69,0x6c,0x65,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x33,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x28,0x28,0x75,0x69,0x6e,
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74, 0x74,0x31,0x36,0x20,0x2a,0x29,0x6d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x31,0x36,0x28,0x69,0x2c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29, 0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,
0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35, 0x3c,0x31,0x36,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x6d,0x5b,0x78,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x6d,0x5b,0x78,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,
0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20, 0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x35,0x31,0x32,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f, 0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,
0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b, 0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,
0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b, 0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,
0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30, 0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,
0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30, 0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,
0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29, 0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,
0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78, 0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,
0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b, 0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,
0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20, 0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,
0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e, 0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,
0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f, 0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x7d,0x0a,0x6d,0x5b,0x30,0x5d,0x3d,0x53,0x57,0x41,0x50,
0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57, 0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x38,0x5d,0x29,
0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39, 0x3b,0x0a,0x6d,0x5b,0x31,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x28,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x29,0x73,
0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55, 0x74,0x61,0x74,0x65,0x73,0x29,0x5b,0x34,0x39,0x5d,0x29,0x3b,0x0a,0x6d,0x5b,0x32,0x5d,0x3d,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x3b,0x0a,0x6d,
0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30, 0x5b,0x33,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x34,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x35,0x5d,0x3d,0x30,0x78,0x30,0x30,
0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b, 0x55,0x3b,0x0a,0x6d,0x5b,0x36,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x38,0x5d,0x3d,
0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30, 0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x39,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x30,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,
0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d, 0x6d,0x5b,0x31,0x31,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x32,0x5d,0x3d,0x30,0x78,0x30,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x33,0x5d,0x3d,
0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28, 0x31,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x34,0x5d,0x3d,0x30,0x55,0x3b,0x0a,0x6d,0x5b,0x31,0x35,0x5d,0x3d,0x30,0x78,0x36,0x34,0x30,0x3b,0x0a,0x62,0x69,0x74,0x6c,0x65,
0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30, 0x6e,0x2b,0x3d,0x36,0x34,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x6c,0x6f,0x3d,0x28,0x28,0x75,0x69,0x6e,
0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55, 0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x3b,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x31,0x36,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x2e,0x68,0x69,0x3d,
0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d, 0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x55,0x2c,0x63,0x5f,0x75,0x32,0x35,0x36,0x29,0x3b,0x0a,0x76,0x5b,0x31,0x32,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,
0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b, 0x65,0x6e,0x3b,0x0a,0x76,0x5b,0x31,0x33,0x5d,0x20,0x5e,0x3d,0x20,0x62,0x69,0x74,0x6c,0x65,0x6e,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x72,
0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28, 0x3d,0x30,0x3b,0x20,0x72,0x3c,0x31,0x34,0x3b,0x20,0x72,0x2b,0x2b,0x29,0x20,0x7b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x34,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x43,0x2c,
0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45, 0x30,0x78,0x30,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x35,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x32,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,
0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30, 0x36,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x34,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x37,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x46,0x2c,0x30,
0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c, 0x78,0x36,0x29,0x3b,0x0a,0x47,0x53,0x28,0x30,0x2c,0x35,0x2c,0x30,0x78,0x41,0x2c,0x30,0x78,0x46,0x2c,0x30,0x78,0x38,0x29,0x3b,0x0a,0x47,0x53,0x28,0x31,0x2c,0x36,
0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c, 0x2c,0x30,0x78,0x42,0x2c,0x30,0x78,0x43,0x2c,0x30,0x78,0x41,0x29,0x3b,0x0a,0x47,0x53,0x28,0x32,0x2c,0x37,0x2c,0x30,0x78,0x38,0x2c,0x30,0x78,0x44,0x2c,0x30,0x78,
0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30, 0x43,0x29,0x3b,0x0a,0x47,0x53,0x28,0x33,0x2c,0x34,0x2c,0x30,0x78,0x39,0x2c,0x30,0x78,0x45,0x2c,0x30,0x78,0x45,0x29,0x3b,0x0a,0x7d,0x0a,0x28,0x28,0x75,0x69,0x6e,
0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76, 0x74,0x38,0x20,0x2a,0x29,0x68,0x29,0x5b,0x30,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x28,0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x30,0x5d,0x5e,0x28,0x28,
0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b, 0x75,0x69,0x6e,0x74,0x38,0x20,0x2a,0x29,0x76,0x29,0x5b,0x31,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69, 0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x68,0x5b,0x69,0x5d,0x3d,0x53,0x57,0x41,0x50,0x34,0x28,0x68,0x5b,0x69,0x5d,0x29,0x3b,0x0a,0x7d,0x0a,0x75,0x69,
0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c, 0x6e,0x74,0x32,0x20,0x74,0x3d,0x28,0x75,0x69,0x6e,0x74,0x32,0x29,0x28,0x68,0x5b,0x36,0x5d,0x2c,0x68,0x5b,0x37,0x5d,0x29,0x3b,0x0a,0x69,0x66,0x28,0x61,0x73,0x5f,
0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69, 0x75,0x6c,0x6f,0x6e,0x67,0x28,0x74,0x29,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,
0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29, 0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,
0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d, 0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,
0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a, 0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,
0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64, 0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x23,0x75,0x6e,0x64,0x65,0x66,0x20,0x53,0x57,0x41,0x50,0x34,0x0a,0x5f,0x5f,0x6b,0x65,
0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c, 0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x47,0x72,0x6f,0x65,0x73,0x74,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x6c,0x6f,0x6e,0x67,
0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61, 0x20,0x2a,0x73,0x74,0x61,0x74,0x65,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,
0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74, 0x66,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x20,0x2a,0x6f,0x75,0x74,0x70,0x75,0x74,0x2c,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x54,0x61,
0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67, 0x72,0x67,0x65,0x74,0x2c,0x75,0x69,0x6e,0x74,0x20,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x20,
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29, 0x69,0x64,0x78,0x3d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x2d,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,
0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73, 0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x69,0x64,0x78,0x3c,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x54,0x68,0x72,0x65,
0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20, 0x61,0x64,0x73,0x5d,0x29,0x20,0x7b,0x0a,0x73,0x74,0x61,0x74,0x65,0x73,0x2b,0x3d,0x32,0x35,0x2a,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,
0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30, 0x5d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x53,0x74,0x61,0x74,0x65,0x5b,0x38,0x5d,0x3d,0x7b,0x20,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75, 0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x31,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b, 0x30,0x30,0x55,0x4c,0x20,0x7d,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x48,0x5b,0x38,0x5d,0x2c,0x4d,0x5b,0x38,0x5d,0x3b,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,
0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78, 0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x30,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,
0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b, 0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,
0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c, 0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,
0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29, 0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,
0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a, 0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,
0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73, 0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,
0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b, 0x28,0x31,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,
0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50, 0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,
0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20, 0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,
0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78, 0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76, 0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7b,0x0a,0x28,0x28,0x75,0x6c,0x6f,0x6e,0x67,0x38,0x20,
0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20, 0x2a,0x29,0x4d,0x29,0x5b,0x30,0x5d,0x3d,0x76,0x6c,0x6f,0x61,0x64,0x38,0x28,0x32,0x2c,0x73,0x74,0x61,0x74,0x65,0x73,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,
0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a, 0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,
0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28, 0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,
0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53, 0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,
0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73, 0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x78,0x5d,0x5e,0x4d,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,
0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c, 0x0a,0x7d,0x0a,0x4d,0x5b,0x30,0x5d,0x3d,0x73,0x74,0x61,0x74,0x65,0x73,0x5b,0x32,0x34,0x5d,0x3b,0x0a,0x4d,0x5b,0x31,0x5d,0x3d,0x30,0x78,0x38,0x30,0x55,0x4c,0x3b,
0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a, 0x0a,0x4d,0x5b,0x32,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x33,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x34,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,
0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30, 0x5b,0x35,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x36,0x5d,0x3d,0x30,0x55,0x4c,0x3b,0x0a,0x4d,0x5b,0x37,0x5d,0x3d,0x30,0x78,0x30,0x34,0x30,0x30,0x30,0x30,
0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a, 0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x78,0x3d,0x30,0x3b,0x20,0x78,0x3c,0x38,
0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c, 0x3b,0x20,0x2b,0x2b,0x78,0x29,0x20,0x7b,0x0a,0x48,0x5b,0x78,0x5d,0x3d,0x4d,0x5b,0x78,0x5d,0x5e,0x53,0x74,0x61,0x74,0x65,0x5b,0x78,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,
0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d, 0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x48,0x29,0x3b,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x51,0x28,0x4d,0x29,0x3b,
0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b, 0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x74,0x6d,0x70,0x5b,0x38,0x5d,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,
0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d, 0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3d,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x48,0x5b,0x69,
0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20, 0x5d,0x5e,0x4d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x50,0x45,0x52,0x4d,0x5f,0x53,0x4d,0x41,0x4c,0x4c,0x5f,0x50,0x28,0x53,0x74,0x61,0x74,0x65,0x29,0x3b,0x0a,0x66,
0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70, 0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x38,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x20,0x7b,0x0a,0x53,0x74,0x61,0x74,0x65,0x5b,
0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c, 0x69,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x6d,0x70,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x53,0x74,0x61,0x74,0x65,0x5b,0x37,0x5d,0x3c,0x3d,0x54,0x61,0x72,
0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46, 0x67,0x65,0x74,0x29,0x20,0x7b,0x0a,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x6f,0x75,0x74,0x49,0x64,0x78,0x3d,0x61,0x74,0x6f,0x6d,0x69,0x63,0x5f,0x69,0x6e,0x63,0x28,0x6f,
0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74, 0x75,0x74,0x70,0x75,0x74,0x2b,0x30,0x78,0x46,0x46,0x29,0x3b,0x0a,0x69,0x66,0x28,0x6f,0x75,0x74,0x49,0x64,0x78,0x3c,0x30,0x78,0x46,0x46,0x29,0x20,0x7b,0x0a,0x6f,
0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67, 0x75,0x74,0x70,0x75,0x74,0x5b,0x6f,0x75,0x74,0x49,0x64,0x78,0x5d,0x3d,0x42,0x72,0x61,0x6e,0x63,0x68,0x42,0x75,0x66,0x5b,0x69,0x64,0x78,0x5d,0x2b,0x28,0x75,0x69,
0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,0x00 0x6e,0x74,0x29,0x20,0x67,0x65,0x74,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x28,0x30,0x29,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x7d,0x0a,
0x7d,0x0a,0x00
}; };
} // namespace xmrig } // namespace xmrig

View file

@ -222,3 +222,15 @@ if (WITH_KAWPOW)
src/base/net/stratum/EthStratumClient.cpp src/base/net/stratum/EthStratumClient.cpp
) )
endif() endif()
if (WITH_PROFILING)
add_definitions(/DXMRIG_FEATURE_PROFILING)
list(APPEND HEADERS_BASE
src/base/tools/Profiler.h
)
list(APPEND SOURCES_BASE
src/base/tools/Profiler.cpp
)
endif()

View file

@ -101,3 +101,13 @@ const char *xmrig::Tags::opencl()
return tag; return tag;
} }
#endif #endif
#ifdef XMRIG_FEATURE_PROFILING
const char* xmrig::Tags::profiler()
{
static const char* tag = CYAN_BG_BOLD(WHITE_BOLD_S " profile ");
return tag;
}
#endif

View file

@ -53,6 +53,10 @@ public:
# ifdef XMRIG_FEATURE_OPENCL # ifdef XMRIG_FEATURE_OPENCL
static const char *opencl(); static const char *opencl();
# endif # endif
# ifdef XMRIG_FEATURE_PROFILING
static const char* profiler();
# endif
}; };

View file

@ -82,6 +82,7 @@ public:
inline uint32_t backend() const { return m_backend; } inline uint32_t backend() const { return m_backend; }
inline uint64_t diff() const { return m_diff; } inline uint64_t diff() const { return m_diff; }
inline uint64_t height() const { return m_height; } inline uint64_t height() const { return m_height; }
inline uint64_t nonceMask() const { return isNicehash() ? 0xFFFFFFULL : (nonceSize() == sizeof(uint64_t) ? (-1ull >> (extraNonce().size() * 4)): 0xFFFFFFFFULL); }
inline uint64_t target() const { return m_target; } inline uint64_t target() const { return m_target; }
inline uint8_t *blob() { return m_blob; } inline uint8_t *blob() { return m_blob; }
inline uint8_t fixedByte() const { return *(m_blob + 42); } inline uint8_t fixedByte() const { return *(m_blob + 42); }

101
src/base/tools/Profiler.cpp Normal file
View file

@ -0,0 +1,101 @@
/* XMRig
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "base/tools/Profiler.h"
#include "base/io/log/Log.h"
#include "base/io/log/Tags.h"
#include <cstring>
#include <sstream>
#include <thread>
#include <chrono>
#include <algorithm>
#ifdef XMRIG_FEATURE_PROFILING
ProfileScopeData* ProfileScopeData::s_data[MAX_DATA_COUNT] = {};
volatile long ProfileScopeData::s_dataCount = 0;
double ProfileScopeData::s_tscSpeed = 0.0;
#ifndef NOINLINE
#ifdef __GNUC__
#define NOINLINE __attribute__ ((noinline))
#elif _MSC_VER
#define NOINLINE __declspec(noinline)
#else
#define NOINLINE
#endif
#endif
static std::string get_thread_id()
{
std::stringstream ss;
ss << std::this_thread::get_id();
std::string s = ss.str();
if (s.length() > ProfileScopeData::MAX_THREAD_ID_LENGTH) {
s.resize(ProfileScopeData::MAX_THREAD_ID_LENGTH);
}
return s;
}
NOINLINE void ProfileScopeData::Register(ProfileScopeData* data)
{
#ifdef _MSC_VER
const long id = _InterlockedIncrement(&s_dataCount) - 1;
#else
const long id = __sync_fetch_and_add(&s_dataCount, 1);
#endif
if (static_cast<unsigned long>(id) < MAX_DATA_COUNT) {
s_data[id] = data;
const std::string s = get_thread_id();
memcpy(data->m_threadId, s.c_str(), s.length() + 1);
}
}
NOINLINE void ProfileScopeData::Init()
{
using namespace std::chrono;
const uint64_t t1 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
const uint64_t count1 = ReadTSC();
for (;;)
{
const uint64_t t2 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
const uint64_t count2 = ReadTSC();
if (t2 - t1 > 1000000000) {
s_tscSpeed = (count2 - count1) * 1e9 / (t2 - t1);
LOG_INFO("%s TSC speed = %.3f GHz", xmrig::Tags::profiler(), s_tscSpeed / 1e9);
return;
}
}
}
#endif /* XMRIG_FEATURE_PROFILING */

133
src/base/tools/Profiler.h Normal file
View file

@ -0,0 +1,133 @@
/* XMRig
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_PROFILER_H
#define XMRIG_PROFILER_H
#ifndef FORCE_INLINE
#if defined(_MSC_VER)
#define FORCE_INLINE __forceinline
#elif defined(__GNUC__)
#define FORCE_INLINE __attribute__((always_inline)) inline
#elif defined(__clang__)
#define FORCE_INLINE __inline__
#else
#define FORCE_INLINE
#endif
#endif
#ifdef XMRIG_FEATURE_PROFILING
#include <cstdint>
#include <cstddef>
#include <type_traits>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
static FORCE_INLINE uint64_t ReadTSC()
{
#ifdef _MSC_VER
return __rdtsc();
#else
uint32_t hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return (((uint64_t)hi) << 32) | lo;
#endif
}
struct ProfileScopeData
{
const char* m_name;
uint64_t m_totalCycles;
uint32_t m_totalSamples;
enum
{
MAX_THREAD_ID_LENGTH = 11,
MAX_SAMPLE_COUNT = 128,
MAX_DATA_COUNT = 1024
};
char m_threadId[MAX_THREAD_ID_LENGTH + 1];
static ProfileScopeData* s_data[MAX_DATA_COUNT];
static volatile long s_dataCount;
static double s_tscSpeed;
static void Register(ProfileScopeData* data);
static void Init();
};
static_assert(std::is_trivial<ProfileScopeData>::value, "ProfileScopeData must be a trivial struct");
static_assert(sizeof(ProfileScopeData) <= 32, "ProfileScopeData struct is too big");
class ProfileScope
{
public:
FORCE_INLINE ProfileScope(ProfileScopeData& data)
: m_data(data)
{
if (m_data.m_totalCycles == 0) {
ProfileScopeData::Register(&data);
}
m_startCounter = ReadTSC();
}
FORCE_INLINE ~ProfileScope()
{
m_data.m_totalCycles += ReadTSC() - m_startCounter;
++m_data.m_totalSamples;
}
private:
ProfileScopeData& m_data;
uint64_t m_startCounter;
};
#define PROFILE_SCOPE(x) static thread_local ProfileScopeData x##_data{#x}; ProfileScope x(x##_data);
#else /* XMRIG_FEATURE_PROFILING */
#define PROFILE_SCOPE(x)
#endif /* XMRIG_FEATURE_PROFILING */
#include "crypto/randomx/blake2/blake2.h"
struct rx_blake2b_wrapper
{
FORCE_INLINE static void run(void* out, size_t outlen, const void* in, size_t inlen)
{
PROFILE_SCOPE(RandomX_Blake2b);
rx_blake2b(out, outlen, in, inlen);
}
};
#endif /* XMRIG_PROFILER_H */

View file

@ -21,7 +21,8 @@
"rdmsr": true, "rdmsr": true,
"wrmsr": true, "wrmsr": true,
"cache_qos": false, "cache_qos": false,
"numa": true "numa": true,
"scratchpad_prefetch_mode": 1
}, },
"cpu": { "cpu": {
"enabled": true, "enabled": true,

View file

@ -38,6 +38,7 @@
#include "base/kernel/Platform.h" #include "base/kernel/Platform.h"
#include "base/net/stratum/Job.h" #include "base/net/stratum/Job.h"
#include "base/tools/Object.h" #include "base/tools/Object.h"
#include "base/tools/Profiler.h"
#include "base/tools/Timer.h" #include "base/tools/Timer.h"
#include "core/config/Config.h" #include "core/config/Config.h"
#include "core/Controller.h" #include "core/Controller.h"
@ -267,6 +268,44 @@ public:
h = "MH/s"; h = "MH/s";
} }
# ifdef XMRIG_FEATURE_PROFILING
ProfileScopeData* data[ProfileScopeData::MAX_DATA_COUNT];
const uint32_t n = std::min<uint32_t>(ProfileScopeData::s_dataCount, ProfileScopeData::MAX_DATA_COUNT);
memcpy(data, ProfileScopeData::s_data, n * sizeof(ProfileScopeData*));
std::sort(data, data + n, [](ProfileScopeData* a, ProfileScopeData* b) {
return strcmp(a->m_threadId, b->m_threadId) < 0;
});
for (uint32_t i = 0; i < n;)
{
uint32_t n1 = i;
while ((n1 < n) && (strcmp(data[i]->m_threadId, data[n1]->m_threadId) == 0)) {
++n1;
}
std::sort(data + i, data + n1, [](ProfileScopeData* a, ProfileScopeData* b) {
return a->m_totalCycles > b->m_totalCycles;
});
for (uint32_t j = i; j < n1; ++j) {
ProfileScopeData* p = data[j];
LOG_INFO("%s Thread %6s | %-30s | %7.3f%% | %9.0f ns",
Tags::profiler(),
p->m_threadId,
p->m_name,
p->m_totalCycles * 100.0 / data[i]->m_totalCycles,
p->m_totalCycles / p->m_totalSamples * 1e9 / ProfileScopeData::s_tscSpeed
);
}
LOG_INFO("%s --------------|--------------------------------|----------|-------------", Tags::profiler());
i = n1;
}
# endif
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"), LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
Tags::miner(), Tags::miner(),
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4), Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
@ -311,6 +350,10 @@ xmrig::Miner::Miner(Controller *controller)
Platform::setThreadPriority(std::min(priority + 1, 5)); Platform::setThreadPriority(std::min(priority + 1, 5));
} }
# ifdef XMRIG_FEATURE_PROFILING
ProfileScopeData::Init();
# endif
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
Rx::init(this); Rx::init(this);
# endif # endif

View file

@ -51,7 +51,12 @@ R"===(
"randomx": { "randomx": {
"init": -1, "init": -1,
"mode": "auto", "mode": "auto",
"numa": true "1gb-pages": false,
"rdmsr": true,
"wrmsr": true,
"cache_qos": false,
"numa": true,
"scratchpad_prefetch_mode": 1
}, },
"cpu": { "cpu": {
"enabled": true, "enabled": true,

View file

@ -26,18 +26,14 @@
#include "crypto/common/Nonce.h" #include "crypto/common/Nonce.h"
#include <mutex>
namespace xmrig { namespace xmrig {
std::atomic<bool> Nonce::m_paused; std::atomic<bool> Nonce::m_paused;
std::atomic<uint64_t> Nonce::m_sequence[Nonce::MAX]; std::atomic<uint64_t> Nonce::m_sequence[Nonce::MAX];
uint32_t Nonce::m_nonces[2] = { 0, 0 }; std::atomic<uint64_t> Nonce::m_nonces[2] = { {0}, {0} };
static std::mutex mutex;
static Nonce nonce; static Nonce nonce;
@ -54,40 +50,34 @@ xmrig::Nonce::Nonce()
} }
uint32_t xmrig::Nonce::next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok) bool xmrig::Nonce::next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask)
{ {
uint32_t next; mask &= 0x7FFFFFFFFFFFFFFFULL;
if (reserveCount == 0 || mask < reserveCount - 1) {
return false;
}
std::lock_guard<std::mutex> lock(mutex); uint64_t counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
while (true) {
if (nicehash) { if (mask < counter) {
if ((m_nonces[index] + reserveCount) > 0x1000000) { return false;
if (ok) {
*ok = false;
}
pause(true);
return 0;
} }
else if (mask - counter <= reserveCount - 1) {
next = (nonce & 0xFF000000) | m_nonces[index]; pause(true);
if (mask - counter < reserveCount - 1) {
return false;
}
}
else if (0xFFFFFFFFUL - (uint32_t)counter < reserveCount - 1) {
counter = m_nonces[index].fetch_add(reserveCount, std::memory_order_relaxed);
continue;
}
*nonce = (nonce[0] & ~mask) | counter;
if (mask > 0xFFFFFFFFULL) {
nonce[1] = (nonce[1] & (~mask >> 32)) | (counter >> 32);
}
return true;
} }
else {
next = m_nonces[index];
}
m_nonces[index] += reserveCount;
return next;
}
void xmrig::Nonce::reset(uint8_t index)
{
std::lock_guard<std::mutex> lock(mutex);
m_nonces[index] = 0;
} }

View file

@ -49,18 +49,18 @@ public:
static inline bool isPaused() { return m_paused.load(std::memory_order_relaxed); } static inline bool isPaused() { return m_paused.load(std::memory_order_relaxed); }
static inline uint64_t sequence(Backend backend) { return m_sequence[backend].load(std::memory_order_relaxed); } static inline uint64_t sequence(Backend backend) { return m_sequence[backend].load(std::memory_order_relaxed); }
static inline void pause(bool paused) { m_paused = paused; } static inline void pause(bool paused) { m_paused = paused; }
static inline void reset(uint8_t index) { m_nonces[index] = 0; }
static inline void stop(Backend backend) { m_sequence[backend] = 0; } static inline void stop(Backend backend) { m_sequence[backend] = 0; }
static inline void touch(Backend backend) { m_sequence[backend]++; } static inline void touch(Backend backend) { m_sequence[backend]++; }
static uint32_t next(uint8_t index, uint32_t nonce, uint32_t reserveCount, bool nicehash, bool *ok = nullptr); static bool next(uint8_t index, uint32_t *nonce, uint32_t reserveCount, uint64_t mask);
static void reset(uint8_t index);
static void stop(); static void stop();
static void touch(); static void touch();
private: private:
static std::atomic<bool> m_paused; static std::atomic<bool> m_paused;
static std::atomic<uint64_t> m_sequence[MAX]; static std::atomic<uint64_t> m_sequence[MAX];
static uint32_t m_nonces[2]; static std::atomic<uint64_t> m_nonces[2];
}; };

View file

@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/soft_aes.h" #include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/randomx.h" #include "crypto/randomx/randomx.h"
#include "base/tools/Profiler.h"
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e #define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
@ -49,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Hashing throughput: >20 GiB/s per CPU core with hardware AES Hashing throughput: >20 GiB/s per CPU core with hardware AES
*/ */
template<bool softAes> template<int softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (uint8_t*)input; const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize; const uint8_t* inputEnd = inptr + inputSize;
@ -117,7 +118,7 @@ template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash)
The modified state is written back to 'state' to allow multiple The modified state is written back to 'state' to allow multiple
calls to this function. calls to this function.
*/ */
template<bool softAes> template<int softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize; const uint8_t* outputEnd = outptr + outputSize;
@ -158,7 +159,7 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer; const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize; const uint8_t* outputEnd = outptr + outputSize;
@ -213,8 +214,10 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer); template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<int softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) { void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
PROFILE_SCOPE(RandomX_AES);
uint8_t* scratchpadPtr = (uint8_t*)scratchpad; uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize; const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
@ -241,38 +244,29 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
//process 64 bytes at a time in 4 lanes //process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) { while (scratchpadPtr < scratchpadEnd) {
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0)); #define HASH_STATE(k) \
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1)); hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0)); \
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2)); hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1)); \
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3)); hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2)); \
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3));
fill_state0 = aesdec<softAes>(fill_state0, key0); #define FILL_STATE(k) \
fill_state1 = aesenc<softAes>(fill_state1, key1); fill_state0 = aesdec<softAes>(fill_state0, key0); \
fill_state2 = aesdec<softAes>(fill_state2, key2); fill_state1 = aesenc<softAes>(fill_state1, key1); \
fill_state3 = aesenc<softAes>(fill_state3, key3); fill_state2 = aesdec<softAes>(fill_state2, key2); \
fill_state3 = aesenc<softAes>(fill_state3, key3); \
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 0, fill_state0); \
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 1, fill_state1); \
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0); HASH_STATE(0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1); HASH_STATE(1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3); FILL_STATE(0);
FILL_STATE(1);
rx_prefetch_t0(prefetchPtr); rx_prefetch_t0(prefetchPtr);
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
rx_prefetch_t0(prefetchPtr + 64); rx_prefetch_t0(prefetchPtr + 64);
scratchpadPtr += 128; scratchpadPtr += 128;
@ -308,5 +302,6 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3); rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
} }
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); template void hashAndFillAes1Rx4<0>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); template void hashAndFillAes1Rx4<1>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<2>(void* scratchpad, size_t scratchpadSize, void* hash, void* fill_state);

View file

@ -30,14 +30,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef> #include <cstddef>
template<bool softAes> template<int softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash); void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
template<bool softAes> template<int softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer); void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<int softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer); void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes> template<int softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state); void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);

View file

@ -92,7 +92,7 @@ extern "C" {
int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen); int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen);
/* Simple API */ /* Simple API */
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen); int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen);
/* Argon2 Team - Begin Code */ /* Argon2 Team - Begin Code */
int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); int rxa2_blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);

View file

@ -0,0 +1,123 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_ROUND_H
#define BLAKE2B_ROUND_H
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#define LOAD_MSG(r, i, b0, b1) \
do { \
b0 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 1]], m[blake2b_sigma_sse41[r][i * 4 + 0]]); \
b1 = _mm_set_epi64x(m[blake2b_sigma_sse41[r][i * 4 + 3]], m[blake2b_sigma_sse41[r][i * 4 + 2]]); \
} while(0)
#define ROUND(r) \
LOAD_MSG(r, 0, b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG(r, 1, b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG(r, 2, b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG(r, 3, b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif

View file

@ -39,12 +39,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/blake2/blake2.h" #include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/blake2/blake2-impl.h" #include "crypto/randomx/blake2/blake2-impl.h"
#if defined(_M_X64) || defined(__x86_64__)
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <smmintrin.h>
#include "blake2b-round.h"
#endif
static const uint64_t blake2b_IV[8] = { static const uint64_t blake2b_IV[8] = {
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
#if defined(_M_X64) || defined(__x86_64__)
static const uint8_t blake2b_sigma_sse41[12][16] = {
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
};
#endif
static const uint8_t blake2b_sigma[12][16] = { static const uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
@ -179,7 +207,47 @@ int rx_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t
return 0; return 0;
} }
static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) { #if defined(_M_X64) || defined(__x86_64__)
static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
row1l = LOADU(&S->h[0]);
row1h = LOADU(&S->h[2]);
row2l = LOADU(&S->h[4]);
row2h = LOADU(&S->h[6]);
row3l = LOADU(&blake2b_IV[0]);
row3h = LOADU(&blake2b_IV[2]);
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
const uint64_t* m = (const uint64_t*)(block);
for (uint32_t r = 0; r < 12; ++r) {
ROUND(r);
}
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
}
#undef ROUND
#endif
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
uint64_t m[16]; uint64_t m[16];
uint64_t v[16]; uint64_t v[16];
unsigned int i, r; unsigned int i, r;
@ -237,6 +305,20 @@ static void rx_blake2b_compress(blake2b_state *S, const uint8_t *block) {
#undef ROUND #undef ROUND
} }
#if defined(_M_X64) || defined(__x86_64__)
uint32_t rx_blake2b_use_sse41 = 0;
#define rx_blake2b_compress(S, block) \
if (rx_blake2b_use_sse41) \
rx_blake2b_compress_sse41(S, block); \
else \
rx_blake2b_compress_integer(S, block);
#else
#define rx_blake2b_compress(S, block) rx_blake2b_compress_integer(S, block);
#endif
int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) { int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
const uint8_t *pin = (const uint8_t *)in; const uint8_t *pin = (const uint8_t *)in;
@ -260,14 +342,14 @@ int rx_blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
size_t fill = BLAKE2B_BLOCKBYTES - left; size_t fill = BLAKE2B_BLOCKBYTES - left;
memcpy(&S->buf[left], pin, fill); memcpy(&S->buf[left], pin, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
rx_blake2b_compress(S, S->buf); rx_blake2b_compress(S, S->buf);
S->buflen = 0; S->buflen = 0;
inlen -= fill; inlen -= fill;
pin += fill; pin += fill;
/* Avoid buffer copies when possible */ /* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) { while (inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
rx_blake2b_compress(S, pin); rx_blake2b_compress(S, pin);
inlen -= BLAKE2B_BLOCKBYTES; inlen -= BLAKE2B_BLOCKBYTES;
pin += BLAKE2B_BLOCKBYTES; pin += BLAKE2B_BLOCKBYTES;
} }
@ -294,7 +376,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
blake2b_increment_counter(S, S->buflen); blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S); blake2b_set_lastblock(S);
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
rx_blake2b_compress(S, S->buf); rx_blake2b_compress(S, S->buf);
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
store64(buffer + sizeof(S->h[i]) * i, S->h[i]); store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
@ -307,8 +389,7 @@ int rx_blake2b_final(blake2b_state *S, void *out, size_t outlen) {
return 0; return 0;
} }
int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen, int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen) {
const void *key, size_t keylen) {
blake2b_state S; blake2b_state S;
int ret = -1; int ret = -1;
@ -321,25 +402,14 @@ int rx_blake2b(void *out, size_t outlen, const void *in, size_t inlen,
goto fail; goto fail;
} }
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { if (rx_blake2b_init(&S, outlen) < 0) {
goto fail; goto fail;
} }
if (keylen > 0) { if (rx_blake2b_update(&S, in, inlen) < 0) {
if (rx_blake2b_init_key(&S, outlen, key, keylen) < 0) {
goto fail;
}
}
else {
if (rx_blake2b_init(&S, outlen) < 0) {
goto fail;
}
}
if (rx_blake2b_update(&S, in, inlen) < 0) {
goto fail; goto fail;
} }
ret = rx_blake2b_final(&S, out, outlen); ret = rx_blake2b_final(&S, out, outlen);
fail: fail:
//clear_internal_memory(&S, sizeof(S)); //clear_internal_memory(&S, sizeof(S));
@ -361,43 +431,42 @@ int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
store32(outlen_bytes, (uint32_t)outlen); store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \ #define TRY(statement) \
do { \ do { \
ret = statement; \ ret = statement; \
if (ret < 0) { \ if (ret < 0) { \
goto fail; \ goto fail; \
} \ } \
} while ((void)0, 0) } while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) { if (outlen <= BLAKE2B_OUTBYTES) {
TRY(rx_blake2b_init(&blake_state, outlen)); TRY(rx_blake2b_init(&blake_state, outlen));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen)); TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out, outlen)); TRY(rx_blake2b_final(&blake_state, out, outlen));
} }
else { else {
uint32_t toproduce; uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES]; uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES]; uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); TRY(rx_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); TRY(rx_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(rx_blake2b_update(&blake_state, in, inlen)); TRY(rx_blake2b_update(&blake_state, in, inlen));
TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); TRY(rx_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2; out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) { while (toproduce > BLAKE2B_OUTBYTES) {
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, TRY(rx_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES, NULL, 0)); BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2; out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2; toproduce -= BLAKE2B_OUTBYTES / 2;
} }
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, TRY(rx_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES));
0));
memcpy(out, out_buffer, toproduce); memcpy(out, out_buffer, toproduce);
} }
fail: fail:

View file

@ -55,7 +55,7 @@ namespace randomx {
void Blake2Generator::checkData(const size_t bytesNeeded) { void Blake2Generator::checkData(const size_t bytesNeeded) {
if (dataIndex + bytesNeeded > sizeof(data)) { if (dataIndex + bytesNeeded > sizeof(data)) {
rx_blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); rx_blake2b(data, sizeof(data), data, sizeof(data));
dataIndex = 0; dataIndex = 0;
} }
} }

View file

@ -79,9 +79,9 @@ namespace randomx {
} }
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) { void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
int opcode = instr.opcode; uint32_t opcode = instr.opcode;
if (opcode < RandomX_CurrentConfig.CEIL_IADD_RS) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS; ibc.type = InstructionType::IADD_RS;
@ -99,8 +99,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_RS;
if (opcode < RandomX_CurrentConfig.CEIL_IADD_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M; ibc.type = InstructionType::IADD_M;
@ -117,8 +118,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IADD_M;
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R; ibc.type = InstructionType::ISUB_R;
@ -133,8 +135,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISUB_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M; ibc.type = InstructionType::ISUB_M;
@ -151,8 +154,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISUB_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R; ibc.type = InstructionType::IMUL_R;
@ -167,8 +171,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_R;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M; ibc.type = InstructionType::IMUL_M;
@ -185,8 +190,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R; ibc.type = InstructionType::IMULH_R;
@ -195,8 +201,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_R;
if (opcode < RandomX_CurrentConfig.CEIL_IMULH_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M; ibc.type = InstructionType::IMULH_M;
@ -213,8 +220,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMULH_M;
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R; ibc.type = InstructionType::ISMULH_R;
@ -223,8 +231,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISMULH_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M; ibc.type = InstructionType::ISMULH_M;
@ -241,8 +250,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISMULH_M;
if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP) {
uint64_t divisor = instr.getImm32(); uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) { if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
@ -257,16 +267,18 @@ namespace randomx {
} }
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IMUL_RCP;
if (opcode < RandomX_CurrentConfig.CEIL_INEG_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R; ibc.type = InstructionType::INEG_R;
ibc.idst = &nreg->r[dst]; ibc.idst = &nreg->r[dst];
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_INEG_R;
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R; ibc.type = InstructionType::IXOR_R;
@ -281,8 +293,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_R;
if (opcode < RandomX_CurrentConfig.CEIL_IXOR_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M; ibc.type = InstructionType::IXOR_M;
@ -299,8 +312,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IXOR_M;
if (opcode < RandomX_CurrentConfig.CEIL_IROR_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R; ibc.type = InstructionType::IROR_R;
@ -315,8 +329,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROR_R;
if (opcode < RandomX_CurrentConfig.CEIL_IROL_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R; ibc.type = InstructionType::IROL_R;
@ -331,8 +346,9 @@ namespace randomx {
registerUsage[dst] = i; registerUsage[dst] = i;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_IROL_R;
if (opcode < RandomX_CurrentConfig.CEIL_ISWAP_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
if (src != dst) { if (src != dst) {
@ -347,8 +363,9 @@ namespace randomx {
} }
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISWAP_R;
if (opcode < RandomX_CurrentConfig.CEIL_FSWAP_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R; ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt) if (dst < RegisterCountFlt)
@ -357,8 +374,9 @@ namespace randomx {
ibc.fdst = &nreg->e[dst - RegisterCountFlt]; ibc.fdst = &nreg->e[dst - RegisterCountFlt];
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSWAP_R;
if (opcode < RandomX_CurrentConfig.CEIL_FADD_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt; auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R; ibc.type = InstructionType::FADD_R;
@ -366,8 +384,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src]; ibc.fsrc = &nreg->a[src];
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_R;
if (opcode < RandomX_CurrentConfig.CEIL_FADD_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M; ibc.type = InstructionType::FADD_M;
@ -377,8 +396,9 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.imm = signExtend2sCompl(instr.getImm32());
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FADD_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt; auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R; ibc.type = InstructionType::FSUB_R;
@ -386,8 +406,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src]; ibc.fsrc = &nreg->a[src];
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_R;
if (opcode < RandomX_CurrentConfig.CEIL_FSUB_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M; ibc.type = InstructionType::FSUB_M;
@ -397,15 +418,17 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.imm = signExtend2sCompl(instr.getImm32());
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSUB_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSCAL_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &nreg->f[dst]; ibc.fdst = &nreg->f[dst];
ibc.type = InstructionType::FSCAL_R; ibc.type = InstructionType::FSCAL_R;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSCAL_R;
if (opcode < RandomX_CurrentConfig.CEIL_FMUL_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt; auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R; ibc.type = InstructionType::FMUL_R;
@ -413,8 +436,9 @@ namespace randomx {
ibc.fsrc = &nreg->a[src]; ibc.fsrc = &nreg->a[src];
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FMUL_R;
if (opcode < RandomX_CurrentConfig.CEIL_FDIV_M) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M; ibc.type = InstructionType::FDIV_M;
@ -424,41 +448,44 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.imm = signExtend2sCompl(instr.getImm32());
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FDIV_M;
if (opcode < RandomX_CurrentConfig.CEIL_FSQRT_R) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt; auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R; ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &nreg->e[dst]; ibc.fdst = &nreg->e[dst];
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_FSQRT_R;
if (opcode < RandomX_CurrentConfig.CEIL_CBRANCH) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH) {
ibc.type = InstructionType::CBRANCH; ibc.type = InstructionType::CBRANCH;
//jump condition //jump condition
int creg = instr.dst % RegistersCount; int creg = instr.dst % RegistersCount;
ibc.idst = &nreg->r[creg]; ibc.idst = &nreg->r[creg];
ibc.target = registerUsage[creg]; ibc.target = registerUsage[creg];
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; const int shift = instr.getModCond();
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); ibc.imm = signExtend2sCompl(instr.getImm32()) | ((1ULL << RandomX_ConfigurationBase::JumpOffset) << shift);
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 ibc.imm &= ~((1ULL << (RandomX_ConfigurationBase::JumpOffset - 1)) << shift);
ibc.imm &= ~(1ULL << (shift - 1)); ibc.memMask = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
ibc.memMask = RandomX_CurrentConfig.ConditionMask_Calculated << shift;
//mark all registers as used //mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) { for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i; registerUsage[j] = i;
} }
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CBRANCH;
if (opcode < RandomX_CurrentConfig.CEIL_CFROUND) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND) {
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.isrc = &nreg->r[src]; ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::CFROUND; ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63; ibc.imm = instr.getImm32() & 63;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_CFROUND;
if (opcode < RandomX_CurrentConfig.CEIL_ISTORE) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE) {
auto dst = instr.dst % RegistersCount; auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount; auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE; ibc.type = InstructionType::ISTORE;
@ -471,8 +498,9 @@ namespace randomx {
ibc.memMask = ScratchpadL3Mask; ibc.memMask = ScratchpadL3Mask;
return; return;
} }
opcode -= RandomX_CurrentConfig.RANDOMX_FREQ_ISTORE;
if (opcode < RandomX_CurrentConfig.CEIL_NOP) { if (opcode < RandomX_CurrentConfig.RANDOMX_FREQ_NOP) {
ibc.type = InstructionType::NOP; ibc.type = InstructionType::NOP;
return; return;
} }

View file

@ -225,7 +225,7 @@ namespace randomx {
} }
static void exe_CFROUND(RANDOMX_EXE_ARGS) { static void exe_CFROUND(RANDOMX_EXE_ARGS) {
rx_set_rounding_mode(rotr64(*ibc.isrc, ibc.imm) % 4); rx_set_rounding_mode(rotr64(*ibc.isrc, static_cast<uint32_t>(ibc.imm)) % 4);
} }
static void exe_ISTORE(RANDOMX_EXE_ARGS) { static void exe_ISTORE(RANDOMX_EXE_ARGS) {

View file

@ -74,8 +74,8 @@ namespace randomx {
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2; constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_MAX_LATENCY + 2;
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
#define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size #define ScratchpadSize RandomX_CurrentConfig.ScratchpadL3_Size
#define CacheLineAlignMask RandomX_CurrentConfig.CacheLineAlignMask_Calculated #define CacheLineAlignMask RandomX_ConfigurationBase::CacheLineAlignMask_Calculated
#define DatasetExtraItems RandomX_CurrentConfig.DatasetExtraItems_Calculated #define DatasetExtraItems RandomX_ConfigurationBase::DatasetExtraItems_Calculated
constexpr int StoreL3Condition = 14; constexpr int StoreL3Condition = 14;
//Prevent some unsafe configurations. //Prevent some unsafe configurations.

View file

@ -75,11 +75,11 @@ static size_t CalcDatasetItemSize()
// Prologue // Prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
// Main loop // Main loop
RandomX_CurrentConfig.CacheAccesses * ( RandomX_ConfigurationBase::CacheAccesses * (
// Main loop prologue // Main loop prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 + ((uint8_t*)randomx_calc_dataset_item_aarch64_mix - ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch)) + 4 +
// Inner main loop (instructions) // Inner main loop (instructions)
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 + ((RandomX_ConfigurationBase::SuperscalarLatency * 3) + 2) * 16 +
// Main loop epilogue // Main loop epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4 ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
) + ) +
@ -235,7 +235,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
num32bitLiterals = 64; num32bitLiterals = 64;
constexpr uint32_t tmp_reg = 12; constexpr uint32_t tmp_reg = 12;
for (size_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) for (size_t i = 0; i < RandomX_ConfigurationBase::CacheAccesses; ++i)
{ {
// and x11, x10, CacheSize / CacheLineSize - 1 // and x11, x10, CacheSize / CacheLineSize - 1
emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos); emit32(0x92400000 | 11 | (10 << 5) | ((RandomX_CurrentConfig.Log2_CacheSize - 1) << 10), code, codePos);
@ -946,7 +946,7 @@ void JitCompilerA64::h_CBRANCH(Instruction& instr, uint32_t& codePos)
const uint32_t dst = IntRegMap[instr.dst]; const uint32_t dst = IntRegMap[instr.dst];
const uint32_t modCond = instr.getModCond(); const uint32_t modCond = instr.getModCond();
const uint32_t shift = modCond + RandomX_CurrentConfig.JumpOffset; const uint32_t shift = modCond + RandomX_ConfigurationBase::JumpOffset;
const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1)); const uint32_t imm = (instr.getImm32() | (1U << shift)) & ~(1U << (shift - 1));
emitAddImmediate(dst, dst, imm, code, k); emitAddImmediate(dst, dst, imm, code, k);

View file

@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/program.hpp" #include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h" #include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/virtual_memory.hpp" #include "crypto/randomx/virtual_memory.hpp"
#include "base/tools/Profiler.h"
#include "backend/cpu/Cpu.h"
#ifdef XMRIG_FIX_RYZEN #ifdef XMRIG_FIX_RYZEN
# include "crypto/rx/Rx.h" # include "crypto/rx/Rx.h"
@ -166,55 +168,16 @@ namespace randomx {
# endif # endif
} }
// CPU-specific tweaks # ifdef _MSC_VER
void JitCompilerX86::applyTweaks() { static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
int32_t info[4]; # else
cpuid(0, info); static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return (a << shift) | (a >> (-shift & 31)); }
# endif
int32_t manufacturer[4];
manufacturer[0] = info[1];
manufacturer[1] = info[3];
manufacturer[2] = info[2];
manufacturer[3] = 0;
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
struct
{
unsigned int stepping : 4;
unsigned int model : 4;
unsigned int family : 4;
unsigned int processor_type : 2;
unsigned int reserved1 : 2;
unsigned int ext_model : 4;
unsigned int ext_family : 8;
unsigned int reserved2 : 4;
} processor_info;
cpuid(1, info);
memcpy(&processor_info, info, sizeof(processor_info));
// Intel JCC erratum mitigation
if (processor_info.family == 6) {
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
const uint32_t stepping = processor_info.stepping;
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
BranchesWithin32B =
((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && (stepping == 0x4)) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA));
}
}
}
static std::atomic<size_t> codeOffset; static std::atomic<size_t> codeOffset;
JitCompilerX86::JitCompilerX86() { JitCompilerX86::JitCompilerX86() {
applyTweaks(); BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
int32_t info[4]; int32_t info[4];
cpuid(1, info); cpuid(1, info);
@ -255,6 +218,8 @@ namespace randomx {
} }
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) { void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
PROFILE_SCOPE(RandomX_JIT_compile);
vm_flags = flags; vm_flags = flags;
generateProgramPrologue(prog, pcfg); generateProgramPrologue(prog, pcfg);
@ -340,7 +305,6 @@ namespace randomx {
r[j] = k; r[j] = k;
} }
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) { for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
Instruction& instr1 = prog(i); Instruction& instr1 = prog(i);
Instruction& instr2 = prog(i + 1); Instruction& instr2 = prog(i + 1);
@ -352,17 +316,10 @@ namespace randomx {
InstructionGeneratorX86 gen3 = engine[instr3.opcode]; InstructionGeneratorX86 gen3 = engine[instr3.opcode];
InstructionGeneratorX86 gen4 = engine[instr4.opcode]; InstructionGeneratorX86 gen4 = engine[instr4.opcode];
*((uint64_t*)&instr1) &= instr_mask; (*gen1)(this, instr1);
(this->*gen1)(instr1); (*gen2)(this, instr2);
(*gen3)(this, instr3);
*((uint64_t*)&instr2) &= instr_mask; (*gen4)(this, instr4);
(this->*gen2)(instr2);
*((uint64_t*)&instr3) &= instr_mask;
(this->*gen3)(instr3);
*((uint64_t*)&instr4) &= instr_mask;
(this->*gen4)(instr4);
} }
*(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40); *(uint64_t*)(code + codePos) = 0xc03341c08b41ull + (static_cast<uint64_t>(pcfg.readReg2) << 16) + (static_cast<uint64_t>(pcfg.readReg3) << 40);
@ -515,7 +472,7 @@ namespace randomx {
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos); template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) { FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16; const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
*(uint32_t*)(code + codePos) = 0x24808d41 + dst; *(uint32_t*)(code + codePos) = 0x24808d41 + dst;
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3; codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
@ -537,8 +494,8 @@ namespace randomx {
uint32_t pos = codePos; uint32_t pos = codePos;
uint8_t* const p = code + pos; uint8_t* const p = code + pos;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst; const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
uint32_t k = 0x048d4f + (dst << 19); uint32_t k = 0x048d4f + (dst << 19);
if (dst == RegisterNeedsDisplacement) if (dst == RegisterNeedsDisplacement)
@ -557,8 +514,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -582,8 +539,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16); *(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
@ -603,8 +560,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -624,8 +581,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos); emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
@ -644,8 +601,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -665,8 +622,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16); *(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16); *(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
@ -681,8 +638,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
@ -696,8 +653,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<false>(instr, src, p, pos); genAddressReg<false>(instr, src, p, pos);
@ -720,8 +677,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<false>(instr, src, p, pos); genAddressReg<false>(instr, src, p, pos);
@ -743,8 +700,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40); *(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
pos += 8; pos += 8;
@ -758,8 +715,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<false>(instr, src, p, pos); genAddressReg<false>(instr, src, p, pos);
@ -789,7 +746,7 @@ namespace randomx {
emit64(randomx_reciprocal_fast(divisor), p, pos); emit64(randomx_reciprocal_fast(divisor), p, pos);
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
emit32(0xc0af0f4c + (dst << 27), p, pos); emit32(0xc0af0f4c + (dst << 27), p, pos);
registerUsage[dst] = pos; registerUsage[dst] = pos;
@ -802,7 +759,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16); *(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
pos += 3; pos += 3;
@ -814,8 +771,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16); *(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
@ -835,8 +792,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -856,8 +813,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40); *(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
@ -877,8 +834,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40); *(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
@ -898,8 +855,8 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst; const uint32_t dst = instr.dst % RegistersCount;
if (src != dst) { if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16); *(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
@ -915,7 +872,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t dst = instr.dst; const uint64_t dst = instr.dst % RegistersCount;
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24); *(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
pos += 5; pos += 5;
@ -940,7 +897,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -968,7 +925,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegisterCountFlt; const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -1007,7 +964,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegisterCountFlt; const uint64_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos); genAddressReg<true>(instr, src, p, pos);
@ -1043,7 +1000,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint32_t src = instr.src; const uint32_t src = instr.src % RegistersCount;
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16); *(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63; const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
@ -1067,7 +1024,7 @@ namespace randomx {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const uint64_t src = instr.src; const uint64_t src = instr.src % RegistersCount;
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63; const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40); *(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
@ -1086,14 +1043,15 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
template<bool jccErratum>
void JitCompilerX86::h_CBRANCH(const Instruction& instr) { void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
const int reg = instr.dst; const int reg = instr.dst % RegistersCount;
int32_t jmp_offset = registerUsage[reg] - (pos + 16); int32_t jmp_offset = registerUsage[reg] - (pos + 16);
if (BranchesWithin32B) { if (jccErratum) {
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7); const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13)); const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
@ -1106,10 +1064,12 @@ namespace randomx {
} }
*(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16); *(uint32_t*)(p + pos) = 0x00c08149 + (reg << 16);
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; const int shift = instr.getModCond();
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1)); const uint32_t or_mask = (1UL << RandomX_ConfigurationBase::JumpOffset) << shift;
const uint32_t and_mask = rotl32(~static_cast<uint32_t>(1UL << (RandomX_ConfigurationBase::JumpOffset - 1)), shift);
*(uint32_t*)(p + pos + 3) = (instr.getImm32() | or_mask) & and_mask;
*(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16); *(uint32_t*)(p + pos + 7) = 0x00c0f749 + (reg << 16);
*(uint32_t*)(p + pos + 10) = RandomX_CurrentConfig.ConditionMask_Calculated << shift; *(uint32_t*)(p + pos + 10) = RandomX_ConfigurationBase::ConditionMask_Calculated << shift;
pos += 14; pos += 14;
if (jmp_offset >= -128) { if (jmp_offset >= -128) {
@ -1132,12 +1092,15 @@ namespace randomx {
codePos = pos; codePos = pos;
} }
template void JitCompilerX86::h_CBRANCH<false>(const Instruction&);
template void JitCompilerX86::h_CBRANCH<true>(const Instruction&);
void JitCompilerX86::h_ISTORE(const Instruction& instr) { void JitCompilerX86::h_ISTORE(const Instruction& instr) {
uint8_t* const p = code; uint8_t* const p = code;
uint32_t pos = codePos; uint32_t pos = codePos;
genAddressRegDst(instr, p, pos); genAddressRegDst(instr, p, pos);
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos); emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
codePos = pos; codePos = pos;
} }

View file

@ -41,7 +41,7 @@ namespace randomx {
class JitCompilerX86; class JitCompilerX86;
class Instruction; class Instruction;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(const Instruction&); typedef void(*InstructionGeneratorX86)(JitCompilerX86*, const Instruction&);
constexpr uint32_t CodeSize = 64 * 1024; constexpr uint32_t CodeSize = 64 * 1024;
@ -84,7 +84,6 @@ namespace randomx {
uint8_t* allocatedCode; uint8_t* allocatedCode;
void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&, ProgramConfiguration&);
template<bool rax> template<bool rax>
@ -148,11 +147,13 @@ namespace randomx {
void h_FMUL_R(const Instruction&); void h_FMUL_R(const Instruction&);
void h_FDIV_M(const Instruction&); void h_FDIV_M(const Instruction&);
void h_FSQRT_R(const Instruction&); void h_FSQRT_R(const Instruction&);
template<bool jccErratum>
void h_CBRANCH(const Instruction&); void h_CBRANCH(const Instruction&);
void h_CFROUND(const Instruction&); void h_CFROUND(const Instruction&);
void h_CFROUND_BMI2(const Instruction&); void h_CFROUND_BMI2(const Instruction&);
void h_ISTORE(const Instruction&); void h_ISTORE(const Instruction&);
void h_NOP(const Instruction&); void h_NOP(const Instruction&);
}; };
} }

View file

@ -47,6 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cassert> #include <cassert>
#include "base/tools/Profiler.h"
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
{ {
ArgonSalt = "RandomWOW\x01"; ArgonSalt = "RandomWOW\x01";
@ -109,22 +111,15 @@ RandomX_ConfigurationKeva::RandomX_ConfigurationKeva()
} }
RandomX_ConfigurationBase::RandomX_ConfigurationBase() RandomX_ConfigurationBase::RandomX_ConfigurationBase()
: ArgonMemory(262144) : ArgonIterations(3)
, ArgonIterations(3)
, ArgonLanes(1) , ArgonLanes(1)
, ArgonSalt("RandomX\x03") , ArgonSalt("RandomX\x03")
, CacheAccesses(8)
, SuperscalarLatency(170)
, DatasetBaseSize(2147483648)
, DatasetExtraSize(33554368)
, ScratchpadL1_Size(16384) , ScratchpadL1_Size(16384)
, ScratchpadL2_Size(262144) , ScratchpadL2_Size(262144)
, ScratchpadL3_Size(2097152) , ScratchpadL3_Size(2097152)
, ProgramSize(256) , ProgramSize(256)
, ProgramIterations(2048) , ProgramIterations(2048)
, ProgramCount(8) , ProgramCount(8)
, JumpBits(8)
, JumpOffset(8)
, RANDOMX_FREQ_IADD_RS(16) , RANDOMX_FREQ_IADD_RS(16)
, RANDOMX_FREQ_IADD_M(7) , RANDOMX_FREQ_IADD_M(7)
, RANDOMX_FREQ_ISUB_R(16) , RANDOMX_FREQ_ISUB_R(16)
@ -211,6 +206,13 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; } static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
#endif #endif
static int scratchpadPrefetchMode = 1;
void randomx_set_scratchpad_prefetch_mode(int mode)
{
scratchpadPrefetchMode = mode;
}
void RandomX_ConfigurationBase::Apply() void RandomX_ConfigurationBase::Apply()
{ {
const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8; const uint32_t ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
@ -224,11 +226,6 @@ void RandomX_ConfigurationBase::Apply()
ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8); ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64; ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
ConditionMask_Calculated = (1 << JumpBits) - 1;
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1; *(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
// Not needed right now because all variants use default dataset base size // Not needed right now because all variants use default dataset base size
@ -240,7 +237,42 @@ void RandomX_ConfigurationBase::Apply()
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated; *(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
#define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x // Apply scratchpad prefetch mode
{
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8);
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22);
switch (scratchpadPrefetchMode)
{
case 0:
*a = 0x00401F0FUL; // 4-byte nop
*b = 0x00401F0FUL; // 4-byte nop
break;
case 1:
default:
*a = 0x060C180FUL; // prefetcht0 [rsi+rax]
*b = 0x160C180FUL; // prefetcht0 [rsi+rdx]
break;
case 2:
*a = 0x0604180FUL; // prefetchnta [rsi+rax]
*b = 0x1604180FUL; // prefetchnta [rsi+rdx]
break;
case 3:
*a = 0x060C8B48UL; // mov rcx, [rsi+rax]
*b = 0x160C8B48UL; // mov rcx, [rsi+rdx]
break;
}
}
typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx::Instruction&);
#define JIT_HANDLE(x, prev) do { \
const InstructionGeneratorX86_2 p = &randomx::JitCompilerX86::h_##x; \
memcpy(randomx::JitCompilerX86::engine + k, &p, sizeof(p)); \
} while (0)
#elif defined(XMRIG_ARMv8) #elif defined(XMRIG_ARMv8)
@ -256,16 +288,16 @@ void RandomX_ConfigurationBase::Apply()
#define JIT_HANDLE(x, prev) #define JIT_HANDLE(x, prev)
#endif #endif
constexpr int CEIL_NULL = 0; uint32_t k = 0;
int k = 0; uint32_t freq_sum = 0;
#define INST_HANDLE(x, prev) \ #define INST_HANDLE(x, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ freq_sum += RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } for (; k < freq_sum; ++k) { JIT_HANDLE(x, prev); }
#define INST_HANDLE2(x, func_name, prev) \ #define INST_HANDLE2(x, func_name, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ freq_sum += RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); } for (; k < freq_sum; ++k) { JIT_HANDLE(func_name, prev); }
INST_HANDLE(IADD_RS, NULL); INST_HANDLE(IADD_RS, NULL);
INST_HANDLE(IADD_M, IADD_RS); INST_HANDLE(IADD_M, IADD_RS);
@ -304,7 +336,13 @@ void RandomX_ConfigurationBase::Apply()
INST_HANDLE(FMUL_R, FSCAL_R); INST_HANDLE(FMUL_R, FSCAL_R);
INST_HANDLE(FDIV_M, FMUL_R); INST_HANDLE(FDIV_M, FMUL_R);
INST_HANDLE(FSQRT_R, FDIV_M); INST_HANDLE(FSQRT_R, FDIV_M);
INST_HANDLE(CBRANCH, FSQRT_R);
if (xmrig::Cpu::info()->jccErratum()) {
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
}
else {
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
}
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
if (xmrig::Cpu::info()->hasBMI2()) { if (xmrig::Cpu::info()->hasBMI2()) {
@ -537,33 +575,35 @@ extern "C" {
assert(inputSize == 0 || input != nullptr); assert(inputSize == 0 || input != nullptr);
assert(output != nullptr); assert(output != nullptr);
alignas(16) uint64_t tempHash[8]; alignas(16) uint64_t tempHash[8];
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
machine->initScratchpad(&tempHash); machine->initScratchpad(&tempHash);
machine->resetRoundingMode(); machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) { for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash); machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
} }
machine->run(&tempHash); machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE); machine->getFinalResult(output);
} }
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) { void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), input, inputSize);
machine->initScratchpad(tempHash); machine->initScratchpad(tempHash);
} }
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) { void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
PROFILE_SCOPE(RandomX_hash);
machine->resetRoundingMode(); machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) { for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash); machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
} }
machine->run(&tempHash); machine->run(&tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time // Finish current hash and fill the scratchpad for the next hash at the same time
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0); rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), nextInput, nextInputSize);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash); machine->hashAndFill(output, tempHash);
} }
} }

View file

@ -64,15 +64,24 @@ struct RandomX_ConfigurationBase
void Apply(); void Apply();
uint32_t ArgonMemory; // Common parameters for all RandomX variants
enum Params : uint64_t
{
ArgonMemory = 262144,
CacheAccesses = 8,
SuperscalarLatency = 170,
DatasetBaseSize = 2147483648,
DatasetExtraSize = 33554368,
JumpBits = 8,
JumpOffset = 8,
CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1),
DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE,
ConditionMask_Calculated = ((1 << JumpBits) - 1) << JumpOffset,
};
uint32_t ArgonIterations; uint32_t ArgonIterations;
uint32_t ArgonLanes; uint32_t ArgonLanes;
const char* ArgonSalt; const char* ArgonSalt;
uint32_t CacheAccesses;
uint32_t SuperscalarLatency;
uint32_t DatasetBaseSize;
uint32_t DatasetExtraSize;
uint32_t ScratchpadL1_Size; uint32_t ScratchpadL1_Size;
uint32_t ScratchpadL2_Size; uint32_t ScratchpadL2_Size;
@ -82,9 +91,6 @@ struct RandomX_ConfigurationBase
uint32_t ProgramIterations; uint32_t ProgramIterations;
uint32_t ProgramCount; uint32_t ProgramCount;
uint32_t JumpBits;
uint32_t JumpOffset;
uint32_t RANDOMX_FREQ_IADD_RS; uint32_t RANDOMX_FREQ_IADD_RS;
uint32_t RANDOMX_FREQ_IADD_M; uint32_t RANDOMX_FREQ_IADD_M;
uint32_t RANDOMX_FREQ_ISUB_R; uint32_t RANDOMX_FREQ_ISUB_R;
@ -126,15 +132,10 @@ struct RandomX_ConfigurationBase
uint8_t codeReadDatasetLightSshInitTweaked[68]; uint8_t codeReadDatasetLightSshInitTweaked[68];
uint8_t codePrefetchScratchpadTweaked[32]; uint8_t codePrefetchScratchpadTweaked[32];
uint32_t CacheLineAlignMask_Calculated;
uint32_t DatasetExtraItems_Calculated;
uint32_t AddressMask_Calculated[4]; uint32_t AddressMask_Calculated[4];
uint32_t ScratchpadL3Mask_Calculated; uint32_t ScratchpadL3Mask_Calculated;
uint32_t ScratchpadL3Mask64_Calculated; uint32_t ScratchpadL3Mask64_Calculated;
uint32_t ConditionMask_Calculated;
#if defined(XMRIG_ARMv8) #if defined(XMRIG_ARMv8)
uint32_t Log2_ScratchpadL1; uint32_t Log2_ScratchpadL1;
uint32_t Log2_ScratchpadL2; uint32_t Log2_ScratchpadL2;
@ -142,37 +143,6 @@ struct RandomX_ConfigurationBase
uint32_t Log2_DatasetBaseSize; uint32_t Log2_DatasetBaseSize;
uint32_t Log2_CacheSize; uint32_t Log2_CacheSize;
#endif #endif
int CEIL_IADD_RS;
int CEIL_IADD_M;
int CEIL_ISUB_R;
int CEIL_ISUB_M;
int CEIL_IMUL_R;
int CEIL_IMUL_M;
int CEIL_IMULH_R;
int CEIL_IMULH_M;
int CEIL_ISMULH_R;
int CEIL_ISMULH_M;
int CEIL_IMUL_RCP;
int CEIL_INEG_R;
int CEIL_IXOR_R;
int CEIL_IXOR_M;
int CEIL_IROR_R;
int CEIL_IROL_R;
int CEIL_ISWAP_R;
int CEIL_FSWAP_R;
int CEIL_FADD_R;
int CEIL_FADD_M;
int CEIL_FSUB_R;
int CEIL_FSUB_M;
int CEIL_FSCAL_R;
int CEIL_FMUL_R;
int CEIL_FDIV_M;
int CEIL_FSQRT_R;
int CEIL_CBRANCH;
int CEIL_CFROUND;
int CEIL_ISTORE;
int CEIL_NOP;
}; };
struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {}; struct RandomX_ConfigurationMonero : public RandomX_ConfigurationBase {};
@ -200,6 +170,8 @@ void randomx_apply_config(const T& config)
RandomX_CurrentConfig.Apply(); RandomX_CurrentConfig.Apply();
} }
void randomx_set_scratchpad_prefetch_mode(int mode);
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif #endif

View file

@ -28,6 +28,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "crypto/randomx/soft_aes.h" #include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/aes_hash.hpp"
#include "base/tools/Chrono.h"
#include <vector>
alignas(64) uint32_t lutEnc0[256]; alignas(64) uint32_t lutEnc0[256];
alignas(64) uint32_t lutEnc1[256]; alignas(64) uint32_t lutEnc1[256];
@ -117,3 +120,47 @@ static struct SAESInitializer
} }
} }
} aes_initializer; } aes_initializer;
static uint32_t softAESImpl = 1;
uint32_t GetSoftAESImpl()
{
return softAESImpl;
}
void SelectSoftAESImpl()
{
constexpr int test_length_ms = 100;
double speed[2] = {};
for (int run = 0; run < 3; ++run) {
for (int i = 0; i < 2; ++i) {
std::vector<uint8_t> scratchpad(10 * 1024);
uint8_t hash[64] = {};
uint8_t state[64] = {};
uint64_t t1, t2;
uint32_t count = 0;
t1 = xmrig::Chrono::highResolutionMSecs();
do {
if (i == 0) {
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
}
else {
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
}
++count;
t2 = xmrig::Chrono::highResolutionMSecs();
} while (t2 - t1 < test_length_ms);
const double x = count * 1e3 / (t2 - t1);
if (x > speed[i]) {
speed[i] = x;
}
}
}
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
}

View file

@ -41,11 +41,14 @@ extern uint32_t lutDec1[256];
extern uint32_t lutDec2[256]; extern uint32_t lutDec2[256];
extern uint32_t lutDec3[256]; extern uint32_t lutDec3[256];
template<bool soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key); uint32_t GetSoftAESImpl();
template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key); void SelectSoftAESImpl();
template<int soft> rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key);
template<int soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
template<> template<>
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) { FORCE_INLINE rx_vec_i128 aesenc<1>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16]; volatile uint8_t s[16];
memcpy((void*) s, &in, 16); memcpy((void*) s, &in, 16);
@ -73,7 +76,7 @@ FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
} }
template<> template<>
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) { FORCE_INLINE rx_vec_i128 aesdec<1>(rx_vec_i128 in, rx_vec_i128 key) {
volatile uint8_t s[16]; volatile uint8_t s[16];
memcpy((void*) s, &in, 16); memcpy((void*) s, &in, 16);
@ -101,11 +104,49 @@ FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
} }
template<> template<>
FORCE_INLINE rx_vec_i128 aesenc<false>(rx_vec_i128 in, rx_vec_i128 key) { FORCE_INLINE rx_vec_i128 aesenc<2>(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
rx_vec_i128 out = rx_set_int_vec_i128(
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
);
return rx_xor_vec_i128(out, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesdec<2>(rx_vec_i128 in, rx_vec_i128 key) {
uint32_t s0, s1, s2, s3;
s0 = rx_vec_i128_w(in);
s1 = rx_vec_i128_z(in);
s2 = rx_vec_i128_y(in);
s3 = rx_vec_i128_x(in);
rx_vec_i128 out = rx_set_int_vec_i128(
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
);
return rx_xor_vec_i128(out, key);
}
template<>
FORCE_INLINE rx_vec_i128 aesenc<0>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesenc_vec_i128(in, key); return rx_aesenc_vec_i128(in, key);
} }
template<> template<>
FORCE_INLINE rx_vec_i128 aesdec<false>(rx_vec_i128 in, rx_vec_i128 key) { FORCE_INLINE rx_vec_i128 aesdec<0>(rx_vec_i128 in, rx_vec_i128 key) {
return rx_aesdec_vec_i128(in, key); return rx_aesdec_vec_i128(in, key);
} }

View file

@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/blake2/blake2.h" #include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/intrin_portable.h" #include "crypto/randomx/intrin_portable.h"
#include "crypto/randomx/allocator.hpp" #include "crypto/randomx/allocator.hpp"
#include "crypto/randomx/soft_aes.h"
#include "base/tools/Profiler.h"
randomx_vm::~randomx_vm() { randomx_vm::~randomx_vm() {
@ -95,11 +97,11 @@ void randomx_vm::initialize() {
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
VmBase<softAes>::~VmBase() { VmBase<softAes>::~VmBase() {
} }
template<bool softAes> template<int softAes>
void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) { void VmBase<softAes>::setScratchpad(uint8_t *scratchpad) {
if (datasetPtr == nullptr) { if (datasetPtr == nullptr) {
throw std::invalid_argument("Cache/Dataset not set"); throw std::invalid_argument("Cache/Dataset not set");
@ -108,25 +110,37 @@ namespace randomx {
this->scratchpad = scratchpad; this->scratchpad = scratchpad;
} }
template<bool softAes> template<int softAes>
void VmBase<softAes>::getFinalResult(void* out, size_t outSize) { void VmBase<softAes>::getFinalResult(void* out) {
hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a); hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a);
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0); rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
} }
template<bool softAes> template<int softAes>
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) { void VmBase<softAes>::hashAndFill(void* out, uint64_t (&fill_state)[8]) {
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a, fill_state); if (!softAes) {
rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0); hashAndFillAes1Rx4<0>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
if (GetSoftAESImpl() == 1) {
hashAndFillAes1Rx4<1>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
else {
hashAndFillAes1Rx4<2>(scratchpad, ScratchpadSize, &reg.a, fill_state);
}
}
rx_blake2b_wrapper::run(out, RANDOMX_HASH_SIZE, &reg, sizeof(RegisterFile));
} }
template<bool softAes> template<int softAes>
void VmBase<softAes>::initScratchpad(void* seed) { void VmBase<softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad); fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
} }
template<bool softAes> template<int softAes>
void VmBase<softAes>::generateProgram(void* seed) { void VmBase<softAes>::generateProgram(void* seed) {
PROFILE_SCOPE(RandomX_generate_program);
fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program); fillAes4Rx4<softAes>(seed, 128 + RandomX_CurrentConfig.ProgramSize * 8, &program);
} }

View file

@ -38,8 +38,8 @@ class randomx_vm
public: public:
virtual ~randomx_vm() = 0; virtual ~randomx_vm() = 0;
virtual void setScratchpad(uint8_t *scratchpad) = 0; virtual void setScratchpad(uint8_t *scratchpad) = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0; virtual void getFinalResult(void* out) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0; virtual void hashAndFill(void* out, uint64_t (&fill_state)[8]) = 0;
virtual void setDataset(randomx_dataset* dataset) { } virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { } virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0; virtual void initScratchpad(void* seed) = 0;
@ -79,15 +79,15 @@ protected:
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
class VmBase : public randomx_vm class VmBase : public randomx_vm
{ {
public: public:
~VmBase() override; ~VmBase() override;
void setScratchpad(uint8_t *scratchpad) override; void setScratchpad(uint8_t *scratchpad) override;
void initScratchpad(void* seed) override; void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override; void getFinalResult(void* out) override;
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override; void hashAndFill(void* out, uint64_t (&fill_state)[8]) override;
protected: protected:
void generateProgram(void* seed); void generateProgram(void* seed);

View file

@ -28,19 +28,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/vm_compiled.hpp" #include "crypto/randomx/vm_compiled.hpp"
#include "crypto/randomx/common.hpp" #include "crypto/randomx/common.hpp"
#include "base/tools/Profiler.h"
namespace randomx { namespace randomx {
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters"); static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile"); static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
template<bool softAes> template<int softAes>
void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) { void CompiledVm<softAes>::setDataset(randomx_dataset* dataset) {
datasetPtr = dataset; datasetPtr = dataset;
} }
template<bool softAes> template<int softAes>
void CompiledVm<softAes>::run(void* seed) { void CompiledVm<softAes>::run(void* seed) {
PROFILE_SCOPE(RandomX_run);
compiler.prepare(); compiler.prepare();
VmBase<softAes>::generateProgram(seed); VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize(); randomx_vm::initialize();
@ -49,8 +52,10 @@ namespace randomx {
execute(); execute();
} }
template<bool softAes> template<int softAes>
void CompiledVm<softAes>::execute() { void CompiledVm<softAes>::execute() {
PROFILE_SCOPE(RandomX_JIT_execute);
#ifdef XMRIG_ARM #ifdef XMRIG_ARM
memcpy(reg.f, config.eMask, sizeof(config.eMask)); memcpy(reg.f, config.eMask, sizeof(config.eMask));
#endif #endif

View file

@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
class CompiledVm : public VmBase<softAes> class CompiledVm : public VmBase<softAes>
{ {
public: public:
@ -61,6 +61,6 @@ namespace randomx {
JitCompiler compiler; JitCompiler compiler;
}; };
using CompiledVmDefault = CompiledVm<true>; using CompiledVmDefault = CompiledVm<1>;
using CompiledVmHardAes = CompiledVm<false>; using CompiledVmHardAes = CompiledVm<0>;
} }

View file

@ -32,14 +32,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) { void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
cachePtr = cache; cachePtr = cache;
mem.memory = cache->memory; mem.memory = cache->memory;
compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
} }
template<bool softAes> template<int softAes>
void CompiledLightVm<softAes>::run(void* seed) { void CompiledLightVm<softAes>::run(void* seed) {
VmBase<softAes>::generateProgram(seed); VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize(); randomx_vm::initialize();

View file

@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
class CompiledLightVm : public CompiledVm<softAes> class CompiledLightVm : public CompiledVm<softAes>
{ {
public: public:
@ -52,6 +52,6 @@ namespace randomx {
using CompiledVm<softAes>::datasetOffset; using CompiledVm<softAes>::datasetOffset;
}; };
using CompiledLightVmDefault = CompiledLightVm<true>; using CompiledLightVmDefault = CompiledLightVm<1>;
using CompiledLightVmHardAes = CompiledLightVm<false>; using CompiledLightVmHardAes = CompiledLightVm<0>;
} }

View file

@ -33,20 +33,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) { void InterpretedVm<softAes>::setDataset(randomx_dataset* dataset) {
datasetPtr = dataset; datasetPtr = dataset;
mem.memory = dataset->memory; mem.memory = dataset->memory;
} }
template<bool softAes> template<int softAes>
void InterpretedVm<softAes>::run(void* seed) { void InterpretedVm<softAes>::run(void* seed) {
VmBase<softAes>::generateProgram(seed); VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize(); randomx_vm::initialize();
execute(); execute();
} }
template<bool softAes> template<int softAes>
void InterpretedVm<softAes>::execute() { void InterpretedVm<softAes>::execute() {
NativeRegisterFile nreg; NativeRegisterFile nreg;
@ -106,14 +106,14 @@ namespace randomx {
rx_store_vec_f128(&reg.e[i].lo, nreg.e[i]); rx_store_vec_f128(&reg.e[i].lo, nreg.e[i]);
} }
template<bool softAes> template<int softAes>
void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) { void InterpretedVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
uint64_t* datasetLine = (uint64_t*)(mem.memory + address); uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
for (int i = 0; i < RegistersCount; ++i) for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i]; r[i] ^= datasetLine[i];
} }
template<bool softAes> template<int softAes>
void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) { void InterpretedVm<softAes>::datasetPrefetch(uint64_t address) {
rx_prefetch_nta(mem.memory + address); rx_prefetch_nta(mem.memory + address);
} }

View file

@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
class InterpretedVm : public VmBase<softAes>, public BytecodeMachine { class InterpretedVm : public VmBase<softAes>, public BytecodeMachine {
public: public:
using VmBase<softAes>::mem; using VmBase<softAes>::mem;
@ -65,6 +65,6 @@ namespace randomx {
InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE]; InstructionByteCode bytecode[RANDOMX_PROGRAM_MAX_SIZE];
}; };
using InterpretedVmDefault = InterpretedVm<true>; using InterpretedVmDefault = InterpretedVm<1>;
using InterpretedVmHardAes = InterpretedVm<false>; using InterpretedVmHardAes = InterpretedVm<0>;
} }

View file

@ -31,13 +31,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) { void InterpretedLightVm<softAes>::setCache(randomx_cache* cache) {
cachePtr = cache; cachePtr = cache;
mem.memory = cache->memory; mem.memory = cache->memory;
} }
template<bool softAes> template<int softAes>
void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) { void InterpretedLightVm<softAes>::datasetRead(uint64_t address, int_reg_t(&r)[8]) {
uint32_t itemNumber = address / CacheLineSize; uint32_t itemNumber = address / CacheLineSize;
int_reg_t rl[8]; int_reg_t rl[8];

View file

@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx { namespace randomx {
template<bool softAes> template<int softAes>
class InterpretedLightVm : public InterpretedVm<softAes> { class InterpretedLightVm : public InterpretedVm<softAes> {
public: public:
using VmBase<softAes>::mem; using VmBase<softAes>::mem;
@ -50,6 +50,6 @@ namespace randomx {
void datasetPrefetch(uint64_t address) override { } void datasetPrefetch(uint64_t address) override { }
}; };
using InterpretedLightVmDefault = InterpretedLightVm<true>; using InterpretedLightVmDefault = InterpretedLightVm<1>;
using InterpretedLightVmHardAes = InterpretedLightVm<false>; using InterpretedLightVmHardAes = InterpretedLightVm<0>;
} }

View file

@ -32,6 +32,8 @@
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "crypto/rx/RxConfig.h" #include "crypto/rx/RxConfig.h"
#include "crypto/rx/RxQueue.h" #include "crypto/rx/RxQueue.h"
#include "crypto/randomx/randomx.h"
#include "crypto/randomx/soft_aes.h"
namespace xmrig { namespace xmrig {
@ -99,6 +101,8 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
return true; return true;
} }
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
if (isReady(seed)) { if (isReady(seed)) {
return true; return true;
} }
@ -110,6 +114,9 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
if (!osInitialized) { if (!osInitialized) {
setupMainLoopExceptionFrame(); setupMainLoopExceptionFrame();
if (!cpu.isHwAES()) {
SelectSoftAESImpl();
}
osInitialized = true; osInitialized = true;
} }

View file

@ -57,6 +57,8 @@ static const char *kCacheQoS = "cache_qos";
static const char *kNUMA = "numa"; static const char *kNUMA = "numa";
#endif #endif
static const char *kScratchpadPrefetchMode = "scratchpad_prefetch_mode";
static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "fast", "light" }; static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "fast", "light" };
@ -118,6 +120,11 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
} }
# endif # endif
const uint32_t mode = static_cast<uint32_t>(Json::getInt(value, kScratchpadPrefetchMode, static_cast<int>(m_scratchpadPrefetchMode)));
if (mode < ScratchpadPrefetchMax) {
m_scratchpadPrefetchMode = static_cast<ScratchpadPrefetchMode>(mode);
}
return true; return true;
} }
@ -171,6 +178,8 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
} }
# endif # endif
obj.AddMember(StringRef(kScratchpadPrefetchMode), static_cast<int>(m_scratchpadPrefetchMode), allocator);
return obj; return obj;
} }

View file

@ -50,6 +50,14 @@ public:
ModeMax ModeMax
}; };
enum ScratchpadPrefetchMode : uint32_t {
ScratchpadPrefetchOff,
ScratchpadPrefetchT0,
ScratchpadPrefetchNTA,
ScratchpadPrefetchMov,
ScratchpadPrefetchMax,
};
bool read(const rapidjson::Value &value); bool read(const rapidjson::Value &value);
rapidjson::Value toJSON(rapidjson::Document &doc) const; rapidjson::Value toJSON(rapidjson::Document &doc) const;
@ -68,6 +76,8 @@ public:
inline bool cacheQoS() const { return m_cacheQoS; } inline bool cacheQoS() const { return m_cacheQoS; }
inline Mode mode() const { return m_mode; } inline Mode mode() const { return m_mode; }
inline ScratchpadPrefetchMode scratchpadPrefetchMode() const { return m_scratchpadPrefetchMode; }
# ifdef XMRIG_FEATURE_MSR # ifdef XMRIG_FEATURE_MSR
const char *msrPresetName() const; const char *msrPresetName() const;
const MsrItems &msrPreset() const; const MsrItems &msrPreset() const;
@ -94,6 +104,8 @@ private:
int m_threads = -1; int m_threads = -1;
Mode m_mode = AutoMode; Mode m_mode = AutoMode;
ScratchpadPrefetchMode m_scratchpadPrefetchMode = ScratchpadPrefetchT0;
# ifdef XMRIG_FEATURE_HWLOC # ifdef XMRIG_FEATURE_HWLOC
std::vector<uint32_t> m_nodeset; std::vector<uint32_t> m_nodeset;
# endif # endif

View file

@ -31,6 +31,11 @@
#include "crypto/rx/RxVm.h" #include "crypto/rx/RxVm.h"
#if defined(_M_X64) || defined(__x86_64__)
extern "C" uint32_t rx_blake2b_use_sse41;
#endif
randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node) randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::Assembly assembly, uint32_t node)
{ {
int flags = 0; int flags = 0;
@ -55,6 +60,10 @@ randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
flags |= RANDOMX_FLAG_AMD; flags |= RANDOMX_FLAG_AMD;
} }
# if defined(_M_X64) || defined(__x86_64__)
rx_blake2b_use_sse41 = Cpu::info()->has(ICpuInfo::FLAG_SSE41) ? 1 : 0;
# endif
return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node); return randomx_create_vm(static_cast<randomx_flags>(flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
} }

View file

@ -28,7 +28,7 @@
#define APP_ID "xmrig" #define APP_ID "xmrig"
#define APP_NAME "XMRig" #define APP_NAME "XMRig"
#define APP_DESC "XMRig miner" #define APP_DESC "XMRig miner"
#define APP_VERSION "6.3.3" #define APP_VERSION "6.3.4-dev"
#define APP_DOMAIN "xmrig.com" #define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com" #define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
@ -36,7 +36,7 @@
#define APP_VER_MAJOR 6 #define APP_VER_MAJOR 6
#define APP_VER_MINOR 3 #define APP_VER_MINOR 3
#define APP_VER_PATCH 3 #define APP_VER_PATCH 4
#ifdef _MSC_VER #ifdef _MSC_VER
# if (_MSC_VER >= 1920) # if (_MSC_VER >= 1920)