diff --git a/src/backend/cpu/CpuBackend.cpp b/src/backend/cpu/CpuBackend.cpp index 16db4e5e2..529b14e23 100644 --- a/src/backend/cpu/CpuBackend.cpp +++ b/src/backend/cpu/CpuBackend.cpp @@ -436,10 +436,6 @@ rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const out.AddMember("argon2-impl", argon2::Impl::name().toJSON(), allocator); # endif -# ifdef XMRIG_ALGO_ASTROBWT - out.AddMember("astrobwt-max-size", cpu.astrobwtMaxSize(), allocator); -# endif - out.AddMember("hugepages", d_ptr->hugePages(2, doc), allocator); out.AddMember("memory", static_cast(d_ptr->algo.isValid() ? (d_ptr->ways() * d_ptr->algo.l3()) : 0), allocator); diff --git a/src/backend/cpu/CpuConfig.cpp b/src/backend/cpu/CpuConfig.cpp index e20c8e9af..c186dbeb5 100644 --- a/src/backend/cpu/CpuConfig.cpp +++ b/src/backend/cpu/CpuConfig.cpp @@ -45,11 +45,6 @@ const char *CpuConfig::kAsm = "asm"; const char *CpuConfig::kArgon2Impl = "argon2-impl"; #endif -#ifdef XMRIG_ALGO_ASTROBWT -const char *CpuConfig::kAstroBWTMaxSize = "astrobwt-max-size"; -const char *CpuConfig::kAstroBWTAVX2 = "astrobwt-avx2"; -#endif - extern template class Threads; @@ -89,11 +84,6 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kArgon2Impl), m_argon2Impl.toJSON(), allocator); # endif -# ifdef XMRIG_ALGO_ASTROBWT - obj.AddMember(StringRef(kAstroBWTMaxSize), m_astrobwtMaxSize, allocator); - obj.AddMember(StringRef(kAstroBWTAVX2), m_astrobwtAVX2, allocator); -# endif - m_threads.toJSON(obj, doc); return obj; @@ -158,24 +148,6 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value) m_argon2Impl = Json::getString(value, kArgon2Impl); # endif -# ifdef XMRIG_ALGO_ASTROBWT - const auto& astroBWTMaxSize = Json::getValue(value, kAstroBWTMaxSize); - if (astroBWTMaxSize.IsNull() || !astroBWTMaxSize.IsInt()) { - m_shouldSave = true; - } - else { - m_astrobwtMaxSize = std::min(std::max(astroBWTMaxSize.GetInt(), 400), 1200); - } - - const auto& astroBWTAVX2 = Json::getValue(value, kAstroBWTAVX2); - if (astroBWTAVX2.IsNull() || !astroBWTAVX2.IsBool()) { - m_shouldSave = true; - } - else { - m_astrobwtAVX2 = astroBWTAVX2.GetBool(); - } -# endif - m_threads.read(value); generate(); diff --git a/src/backend/cpu/CpuConfig.h b/src/backend/cpu/CpuConfig.h index 536c221e5..4342ad89d 100644 --- a/src/backend/cpu/CpuConfig.h +++ b/src/backend/cpu/CpuConfig.h @@ -56,11 +56,6 @@ public: static const char *kArgon2Impl; # endif -# ifdef XMRIG_ALGO_ASTROBWT - static const char *kAstroBWTMaxSize; - static const char *kAstroBWTAVX2; -# endif - CpuConfig() = default; bool isHwAES() const; @@ -69,7 +64,6 @@ public: std::vector get(const Miner *miner, const Algorithm &algorithm) const; void read(const rapidjson::Value &value); - inline bool astrobwtAVX2() const { return m_astrobwtAVX2; } inline bool isEnabled() const { return m_enabled; } inline bool isHugePages() const { return m_hugePageSize > 0; } inline bool isHugePagesJit() const { return m_hugePagesJit; } @@ -78,7 +72,6 @@ public: inline const Assembly &assembly() const { return m_assembly; } inline const String &argon2Impl() const { return m_argon2Impl; } inline const Threads &threads() const { return m_threads; } - inline int astrobwtMaxSize() const { return m_astrobwtMaxSize; } inline int priority() const { return m_priority; } inline size_t hugePageSize() const { return m_hugePageSize * 1024U; } inline uint32_t limit() const { return m_limit; } @@ -96,12 +89,10 @@ private: AesMode m_aes = AES_AUTO; Assembly m_assembly; - bool m_astrobwtAVX2 = false; bool m_enabled = true; bool m_hugePagesJit = false; bool m_shouldSave = false; bool m_yield = true; - int m_astrobwtMaxSize = 550; int m_memoryPool = 0; int m_priority = -1; size_t m_hugePageSize = kDefaultHugePageSizeKb; diff --git a/src/backend/cpu/CpuConfig_gen.h b/src/backend/cpu/CpuConfig_gen.h index 6698861af..b0ccd24c7 100644 --- a/src/backend/cpu/CpuConfig_gen.h +++ b/src/backend/cpu/CpuConfig_gen.h @@ -164,7 +164,6 @@ size_t inline generate(Threads& threads, uint32 count += threads.move(Algorithm::kASTROBWT_DERO_2, std::move(v2)); } - count += generate(Algorithm::kASTROBWT, threads, Algorithm::ASTROBWT_DERO, limit); return count; } #endif diff --git a/src/backend/cpu/CpuLaunchData.cpp b/src/backend/cpu/CpuLaunchData.cpp index 9d71ad837..7d3b08f4d 100644 --- a/src/backend/cpu/CpuLaunchData.cpp +++ b/src/backend/cpu/CpuLaunchData.cpp @@ -35,11 +35,9 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector& affinities) : algorithm(algorithm), assembly(config.assembly()), - astrobwtAVX2(config.astrobwtAVX2()), hugePages(config.isHugePages()), hwAES(config.isHwAES()), yield(config.isYield()), - astrobwtMaxSize(config.astrobwtMaxSize()), priority(config.priority()), affinity(thread.affinity()), miner(miner), diff --git a/src/backend/cpu/CpuLaunchData.h b/src/backend/cpu/CpuLaunchData.h index b44655027..9742a1b4e 100644 --- a/src/backend/cpu/CpuLaunchData.h +++ b/src/backend/cpu/CpuLaunchData.h @@ -58,11 +58,9 @@ public: const Algorithm algorithm; const Assembly assembly; - const bool astrobwtAVX2; const bool hugePages; const bool hwAES; const bool yield; - const int astrobwtMaxSize; const int priority; const int64_t affinity; const Miner *miner; diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 4109a6cf2..84e134d8f 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -73,11 +73,9 @@ xmrig::CpuWorker::CpuWorker(size_t id, const CpuLaunchData &data) : Worker(id, data.affinity, data.priority), m_algorithm(data.algorithm), m_assembly(data.assembly), - m_astrobwtAVX2(data.astrobwtAVX2), m_hwAES(data.hwAES), m_yield(data.yield), m_av(data.av()), - m_astrobwtMaxSize(data.astrobwtMaxSize * 1000), m_miner(data.miner), m_threads(data.threads), m_ctx() @@ -224,7 +222,6 @@ bool xmrig::CpuWorker::selfTest() # endif # ifdef XMRIG_ALGO_ASTROBWT - if (m_algorithm.id() == Algorithm::ASTROBWT_DERO) return verify(Algorithm::ASTROBWT_DERO, astrobwt_dero_test_out); if (m_algorithm.id() == Algorithm::ASTROBWT_DERO_2) return verify(Algorithm::ASTROBWT_DERO_2, astrobwt_dero_2_test_out); # endif @@ -319,15 +316,8 @@ void xmrig::CpuWorker::start() # ifdef XMRIG_ALGO_ASTROBWT case Algorithm::ASTROBWT: - if (job.algorithm().id() == Algorithm::ASTROBWT_DERO) { - if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize, m_astrobwtAVX2)) { - valid = false; - } - } - else { - if (!astrobwt::astrobwt_dero_v2(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash)) { - valid = false; - } + if (!astrobwt::astrobwt_dero_v2(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash)) { + valid = false; } break; # endif diff --git a/src/backend/cpu/CpuWorker.h b/src/backend/cpu/CpuWorker.h index ade256ec5..197beee2d 100644 --- a/src/backend/cpu/CpuWorker.h +++ b/src/backend/cpu/CpuWorker.h @@ -86,11 +86,9 @@ private: alignas(16) uint8_t m_hash[N * 32]{ 0 }; const Algorithm m_algorithm; const Assembly m_assembly; - const bool m_astrobwtAVX2; const bool m_hwAES; const bool m_yield; const CnHash::AlgoVariant m_av; - const int m_astrobwtMaxSize; const Miner *m_miner; const size_t m_threads; cryptonight_ctx *m_ctx[N]; diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp index 9a195db24..ee2cfca0f 100644 --- a/src/backend/cpu/platform/HwlocCpuInfo.cpp +++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp @@ -318,13 +318,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith size_t scratchpad = algorithm.l3(); uint32_t intensity = algorithm.maxIntensity() == 1 ? 0 : 1; -# ifdef XMRIG_ALGO_ASTROBWT - if (algorithm == Algorithm::ASTROBWT_DERO) { - // Use fake low value to force usage of all available cores for AstroBWT (taking 'limit' into account) - scratchpad = 16 * 1024; - } -# endif - if (cache->attr->cache.depth == 3) { for (size_t i = 0; i < cache->arity; ++i) { hwloc_obj_t l2 = cache->children[i]; diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index fd6ec3d95..e779cd606 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -220,12 +220,6 @@ public: size_t algo_l3 = algo.l3(); -# ifdef XMRIG_ALGO_ASTROBWT - if (algo.id() == Algorithm::ASTROBWT_DERO) { - algo_l3 = CudaAstroBWTRunner::BWT_DATA_STRIDE * 17 + 1024; - } -# endif - size_t i = 0; for (const auto &data : threads) { size_t mem_used = (data.thread.threads() * data.thread.blocks()) * algo_l3 / oneMiB; diff --git a/src/backend/cuda/CudaConfig_gen.h b/src/backend/cuda/CudaConfig_gen.h index 73c77e48d..1f3edc2dc 100644 --- a/src/backend/cuda/CudaConfig_gen.h +++ b/src/backend/cuda/CudaConfig_gen.h @@ -145,7 +145,6 @@ size_t inline generate(Threads &threads, const count += threads.move(Algorithm::kASTROBWT_DERO_2, CudaThreads(devices, Algorithm::ASTROBWT_DERO_2)); } - count += generate(Algorithm::kASTROBWT, threads, Algorithm::ASTROBWT_DERO, devices); return count; } #endif diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index 6f9543b81..068ed9e1c 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -204,12 +204,6 @@ public: size_t algo_l3 = algo.l3(); -# ifdef XMRIG_ALGO_ASTROBWT - if (algo.id() == Algorithm::ASTROBWT_DERO) { - algo_l3 = OclAstroBWTRunner::BWT_DATA_STRIDE * 17 + 324; - } -# endif - size_t i = 0; for (const auto &data : threads) { size_t mem_used = data.thread.intensity() * algo_l3 / oneMiB; diff --git a/src/backend/opencl/OclConfig_gen.h b/src/backend/opencl/OclConfig_gen.h index aedd3983f..5b4d64237 100644 --- a/src/backend/opencl/OclConfig_gen.h +++ b/src/backend/opencl/OclConfig_gen.h @@ -139,7 +139,6 @@ size_t inline generate(Threads& threads, const count += threads.move(Algorithm::kASTROBWT_DERO_2, OclThreads(devices, Algorithm::ASTROBWT_DERO_2)); } - count += generate(Algorithm::kASTROBWT, threads, Algorithm::ASTROBWT_DERO, devices); return count; } #endif diff --git a/src/base/crypto/Algorithm.cpp b/src/base/crypto/Algorithm.cpp index 6a20a5b51..5a2bca8e3 100644 --- a/src/base/crypto/Algorithm.cpp +++ b/src/base/crypto/Algorithm.cpp @@ -163,7 +163,6 @@ static const std::map kAlgorithmNames = { # endif # ifdef XMRIG_ALGO_ASTROBWT - ALGO_NAME(ASTROBWT_DERO), ALGO_NAME(ASTROBWT_DERO_2), # endif @@ -283,7 +282,6 @@ static const std::map kAlgorithmAlias # endif # ifdef XMRIG_ALGO_ASTROBWT - ALGO_ALIAS_AUTO(ASTROBWT_DERO), ALGO_ALIAS(ASTROBWT_DERO, "astrobwt/dero"), ALGO_ALIAS_AUTO(ASTROBWT_DERO_2), ALGO_ALIAS(ASTROBWT_DERO_2, "astrobwt/v2"), ALGO_ALIAS_AUTO(ASTROBWT_DERO_2), ALGO_ALIAS(ASTROBWT_DERO_2, "astrobwt/dero_he"), ALGO_ALIAS_AUTO(ASTROBWT_DERO_2), ALGO_ALIAS(ASTROBWT_DERO_2, "astrobwt/derohe"), @@ -370,7 +368,7 @@ std::vector xmrig::Algorithm::all(const std::function(strtol(arg, nullptr, 10))); - - case IConfig::AstroBWTAVX2Key: /* --astrobwt-avx2 */ - return set(doc, CpuConfig::kField, CpuConfig::kAstroBWTAVX2, true); -# endif - # ifdef XMRIG_ALGO_RANDOMX case IConfig::RandomXInitKey: /* --randomx-init */ return set(doc, RxConfig::kField, RxConfig::kInit, static_cast(strtol(arg, nullptr, 10))); diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index b3e768dc9..d3d3157c3 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -140,10 +140,6 @@ static const option options[] = { { "randomx-cache-qos", 0, nullptr, IConfig::RandomXCacheQoSKey }, { "cache-qos", 0, nullptr, IConfig::RandomXCacheQoSKey }, # endif - #ifdef XMRIG_ALGO_ASTROBWT - { "astrobwt-max-size", 1, nullptr, IConfig::AstroBWTMaxSizeKey }, - { "astrobwt-avx2", 0, nullptr, IConfig::AstroBWTAVX2Key }, - #endif # ifdef XMRIG_FEATURE_OPENCL { "opencl", 0, nullptr, IConfig::OclKey }, { "opencl-devices", 1, nullptr, IConfig::OclDevicesKey }, diff --git a/src/core/config/usage.h b/src/core/config/usage.h index 6cd599c96..6f74b21e5 100644 --- a/src/core/config/usage.h +++ b/src/core/config/usage.h @@ -108,11 +108,6 @@ static inline const std::string &usage() u += " --randomx-cache-qos enable Cache QoS\n"; # endif -# ifdef XMRIG_ALGO_ASTROBWT - u += " --astrobwt-max-size=N skip hashes with large stage 2 size, default: 550, min: 400, max: 1200\n"; - u += " --astrobwt-avx2 enable AVX2 optimizations for AstroBWT algorithm"; -# endif - # ifdef XMRIG_FEATURE_OPENCL u += "\nOpenCL backend:\n"; u += " --opencl enable OpenCL mining backend\n"; diff --git a/src/crypto/astrobwt/AstroBWT.cpp b/src/crypto/astrobwt/AstroBWT.cpp index 2ae649b2f..b1dd53164 100644 --- a/src/crypto/astrobwt/AstroBWT.cpp +++ b/src/crypto/astrobwt/AstroBWT.cpp @@ -31,12 +31,6 @@ #include -constexpr int STAGE1_SIZE = 147253; -constexpr int ALLOCATION_SIZE = (STAGE1_SIZE + 1048576) + (128 - (STAGE1_SIZE & 63)); - -constexpr int COUNTING_SORT_BITS = 10; -constexpr int COUNTING_SORT_SIZE = 1 << COUNTING_SORT_BITS; - static bool astrobwtInitialized = false; #ifdef ASTROBWT_AVX2 @@ -87,353 +81,6 @@ static void Salsa20_XORKeyStream_AVX256(const void* key, void* output, size_t si } #endif -static inline bool smaller(const uint8_t* v, uint64_t a, uint64_t b) -{ - const uint64_t value_a = a >> 21; - const uint64_t value_b = b >> 21; - - if (value_a < value_b) { - return true; - } - - if (value_a > value_b) { - return false; - } - - a &= (1 << 21) - 1; - b &= (1 << 21) - 1; - - if (a == b) { - return false; - } - - const uint64_t data_a = bswap_64(*reinterpret_cast(v + a + 5)); - const uint64_t data_b = bswap_64(*reinterpret_cast(v + b + 5)); - return (data_a < data_b); -} - -void sort_indices(uint32_t N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices) -{ - uint32_t counters[2][COUNTING_SORT_SIZE] = {}; - - { -#define ITER(X) \ - do { \ - const uint64_t k = bswap_64(*reinterpret_cast(v + i + X)); \ - ++counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]; \ - ++counters[1][k >> (64 - COUNTING_SORT_BITS)]; \ - } while (0) - - uint32_t i = 0; - const uint32_t n = N - 15; - for (; i < n; i += 16) { - ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); - ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15); - } - for (; i < N; ++i) { - ITER(0); - } - -#undef ITER - } - - uint32_t prev[2] = { counters[0][0], counters[1][0] }; - counters[0][0] = prev[0] - 1; - counters[1][0] = prev[1] - 1; - for (int i = 1; i < COUNTING_SORT_SIZE; ++i) - { - const uint32_t cur[2] = { counters[0][i] + prev[0], counters[1][i] + prev[1] }; - counters[0][i] = cur[0] - 1; - counters[1][i] = cur[1] - 1; - prev[0] = cur[0]; - prev[1] = cur[1]; - } - - { -#define ITER(X) \ - do { \ - const uint64_t k = bswap_64(*reinterpret_cast(v + (i - X))); \ - tmp_indices[counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]--] = (k & (static_cast(-1) << 21)) | (i - X); \ - } while (0) - - uint32_t i = N; - for (; i >= 8; i -= 8) { - ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); - } - for (; i > 0; --i) { - ITER(1); - } - -#undef ITER - } - - { -#define ITER(X) \ - do { \ - const uint64_t data = tmp_indices[i - X]; \ - indices[counters[1][data >> (64 - COUNTING_SORT_BITS)]--] = data; \ - } while (0) - - uint32_t i = N; - for (; i >= 8; i -= 8) { - ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); - } - for (; i > 0; --i) { - ITER(1); - } - -#undef ITER - } - - uint64_t prev_t = indices[0]; - for (uint32_t i = 1; i < N; ++i) - { - uint64_t t = indices[i]; - if (smaller(v, t, prev_t)) - { - const uint64_t t2 = prev_t; - int j = i - 1; - do - { - indices[j + 1] = prev_t; - --j; - - if (j < 0) { - break; - } - - prev_t = indices[j]; - } while (smaller(v, t, prev_t)); - indices[j + 1] = t; - t = t2; - } - prev_t = t; - } -} - -void sort_indices2(uint32_t N, const uint8_t* v, uint64_t* indices, uint64_t* tmp_indices) -{ - alignas(16) uint32_t counters[1 << COUNTING_SORT_BITS] = {}; - alignas(16) uint32_t counters2[1 << COUNTING_SORT_BITS]; - - { -#define ITER(X) { \ - const uint64_t k = bswap_64(*reinterpret_cast(v + i + X)); \ - ++counters[k >> (64 - COUNTING_SORT_BITS)]; \ - } - - uint32_t i = 0; - const uint32_t n = (N / 32) * 32; - for (; i < n; i += 32) { - ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); - ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15); - ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23); - ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31); - } - for (; i < N; ++i) { - ITER(0); - } - -#undef ITER - } - - uint32_t prev = static_cast(-1); - for (uint32_t i = 0; i < (1 << COUNTING_SORT_BITS); i += 16) - { -#define ITER(X) { \ - const uint32_t cur = counters[i + X] + prev; \ - counters[i + X] = cur; \ - counters2[i + X] = cur; \ - prev = cur; \ - } - ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); - ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15); -#undef ITER - } - - { -#define ITER(X) \ - do { \ - const uint64_t k = bswap_64(*reinterpret_cast(v + (i - X))); \ - indices[counters[k >> (64 - COUNTING_SORT_BITS)]--] = (k & (static_cast(-1) << 21)) | (i - X); \ - } while (0) - - uint32_t i = N; - for (; i >= 8; i -= 8) { - ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); - } - for (; i > 0; --i) { - ITER(1); - } - -#undef ITER - } - - uint32_t prev_i = 0; - for (uint32_t i0 = 0; i0 < (1 << COUNTING_SORT_BITS); ++i0) { - const uint32_t i = counters2[i0] + 1; - const uint32_t n = i - prev_i; - if (n > 1) { - memset(counters, 0, sizeof(uint32_t) * (1 << COUNTING_SORT_BITS)); - - const uint32_t n8 = (n / 8) * 8; - uint32_t j = 0; - -#define ITER(X) { \ - const uint64_t k = indices[prev_i + j + X]; \ - ++counters[(k >> (64 - COUNTING_SORT_BITS * 2)) & ((1 << COUNTING_SORT_BITS) - 1)]; \ - tmp_indices[j + X] = k; \ - } - for (; j < n8; j += 8) { - ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); - } - for (; j < n; ++j) { - ITER(0); - } -#undef ITER - - uint32_t prev = static_cast(-1); - for (uint32_t j = 0; j < (1 << COUNTING_SORT_BITS); j += 32) - { -#define ITER(X) { \ - const uint32_t cur = counters[j + X] + prev; \ - counters[j + X] = cur; \ - prev = cur; \ - } - ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); - ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15); - ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23); - ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31); -#undef ITER - } - -#define ITER(X) { \ - const uint64_t k = tmp_indices[j - X]; \ - const uint32_t index = counters[(k >> (64 - COUNTING_SORT_BITS * 2)) & ((1 << COUNTING_SORT_BITS) - 1)]--; \ - indices[prev_i + index] = k; \ - } - for (j = n; j >= 8; j -= 8) { - ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); - } - for (; j > 0; --j) { - ITER(1); - } -#undef ITER - - uint64_t prev_t = indices[prev_i]; - for (uint64_t* p = indices + prev_i + 1, *e = indices + i; p != e; ++p) - { - uint64_t t = *p; - if (smaller(v, t, prev_t)) - { - const uint64_t t2 = prev_t; - uint64_t* p1 = p; - do - { - *p1 = prev_t; - --p1; - - if (p1 <= indices + prev_i) { - break; - } - - prev_t = *(p1 - 1); - } while (smaller(v, t, prev_t)); - *p1 = t; - t = t2; - } - prev_t = t; - } - } - prev_i = i; - } -} - -bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size, bool avx2) -{ - alignas(8) uint8_t key[32]; - uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64; - uint8_t* stage1_output = scratchpad_ptr; - uint8_t* stage2_output = scratchpad_ptr; - uint64_t* indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE); - uint64_t* tmp_indices = (uint64_t*)(scratchpad_ptr + ALLOCATION_SIZE * 9); - uint8_t* stage1_result = (uint8_t*)(tmp_indices); - uint8_t* stage2_result = (uint8_t*)(tmp_indices); - -#ifdef ASTROBWT_AVX2 - if (hasAVX2 && avx2) { - SHA3_256_AVX2_ASM(input_data, input_size, key); - Salsa20_XORKeyStream_AVX256(key, stage1_output, STAGE1_SIZE); - } - else -#endif - { - sha3_HashBuffer(256, SHA3_FLAGS_NONE, input_data, input_size, key, sizeof(key)); - Salsa20_XORKeyStream(key, stage1_output, STAGE1_SIZE); - } - - sort_indices(STAGE1_SIZE + 1, stage1_output, indices, tmp_indices); - - { - const uint8_t* tmp = stage1_output - 1; - for (int i = 0; i <= STAGE1_SIZE; ++i) { - stage1_result[i] = tmp[indices[i] & ((1 << 21) - 1)]; - } - } - -#ifdef ASTROBWT_AVX2 - if (hasAVX2 && avx2) - SHA3_256_AVX2_ASM(stage1_result, STAGE1_SIZE + 1, key); - else -#endif - sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage1_result, STAGE1_SIZE + 1, key, sizeof(key)); - - const int stage2_size = STAGE1_SIZE + (*(uint32_t*)(key) & 0xfffff); - if (stage2_size > stage2_max_size) { - return false; - } - -#ifdef ASTROBWT_AVX2 - if (hasAVX2 && avx2) { - Salsa20_XORKeyStream_AVX256(key, stage2_output, stage2_size); - } - else -#endif - { - Salsa20_XORKeyStream(key, stage2_output, stage2_size); - } - - sort_indices2(stage2_size + 1, stage2_output, indices, tmp_indices); - - { - const uint8_t* tmp = stage2_output - 1; - int i = 0; - const int n = ((stage2_size + 1) / 4) * 4; - - for (; i < n; i += 4) - { - stage2_result[i + 0] = tmp[indices[i + 0] & ((1 << 21) - 1)]; - stage2_result[i + 1] = tmp[indices[i + 1] & ((1 << 21) - 1)]; - stage2_result[i + 2] = tmp[indices[i + 2] & ((1 << 21) - 1)]; - stage2_result[i + 3] = tmp[indices[i + 3] & ((1 << 21) - 1)]; - } - - for (; i <= stage2_size; ++i) { - stage2_result[i] = tmp[indices[i] & ((1 << 21) - 1)]; - } - } - -#ifdef ASTROBWT_AVX2 - if (hasAVX2 && avx2) - SHA3_256_AVX2_ASM(stage2_result, stage2_size + 1, output_hash); - else -#endif - sha3_HashBuffer(256, SHA3_FLAGS_NONE, stage2_result, stage2_size + 1, output_hash, 32); - - return true; -} - - bool xmrig::astrobwt::astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash) { constexpr size_t N = 9973; @@ -485,13 +132,6 @@ void xmrig::astrobwt::init() } -template<> -void xmrig::astrobwt::single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t) -{ - astrobwt_dero(input, static_cast(size), ctx[0]->memory, output, std::numeric_limits::max(), true); -} - - template<> void xmrig::astrobwt::single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t) { diff --git a/src/crypto/astrobwt/AstroBWT.h b/src/crypto/astrobwt/AstroBWT.h index 15dfa361e..2a3564f51 100644 --- a/src/crypto/astrobwt/AstroBWT.h +++ b/src/crypto/astrobwt/AstroBWT.h @@ -31,16 +31,12 @@ namespace xmrig { namespace astrobwt { -bool astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size, bool avx2); bool astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash); void init(); template void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t); -template<> -void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t); - template<> void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t); diff --git a/src/crypto/cn/CnHash.cpp b/src/crypto/cn/CnHash.cpp index 095b0eb0e..665ed49ff 100644 --- a/src/crypto/cn/CnHash.cpp +++ b/src/crypto/cn/CnHash.cpp @@ -376,10 +376,6 @@ xmrig::CnHash::CnHash() # endif # ifdef XMRIG_ALGO_ASTROBWT - m_map[Algorithm::ASTROBWT_DERO] = new cn_hash_fun_array{}; - m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE][Assembly::NONE] = astrobwt::single_hash; - m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash; - m_map[Algorithm::ASTROBWT_DERO_2] = new cn_hash_fun_array{}; m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE][Assembly::NONE] = astrobwt::single_hash; m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash; diff --git a/src/crypto/cn/CryptoNight_test.h b/src/crypto/cn/CryptoNight_test.h index 9bec3cd82..3025e32d0 100644 --- a/src/crypto/cn/CryptoNight_test.h +++ b/src/crypto/cn/CryptoNight_test.h @@ -433,20 +433,6 @@ const static uint8_t argon2_wrkz_test_out[256] = { #ifdef XMRIG_ALGO_ASTROBWT -// "astrobwt" -const static uint8_t astrobwt_dero_test_out[256] = { - 0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90, - 0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - // "astrobwt/v2" const static uint8_t astrobwt_dero_2_test_out[256] = { 0x48, 0x9E, 0xD2, 0x66, 0x14, 0x27, 0x98, 0x65, 0x03, 0xFB, 0x87, 0x25, 0xE1, 0xD3, 0x98, 0xDA,