mirror of
https://github.com/xmrig/xmrig.git
synced 2024-12-23 03:59:41 +00:00
Added x3 x4 x5 hashing modes.
This commit is contained in:
parent
dba1acd302
commit
9e3f2ae9f9
6 changed files with 442 additions and 20 deletions
|
@ -13,10 +13,10 @@ endif()
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||||
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-strict-aliasing")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-strict-aliasing")
|
||||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast")
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fno-exceptions -fno-rtti")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fno-exceptions -fno-rtti")
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s")
|
||||||
|
|
||||||
if (XMRIG_ARMv8)
|
if (XMRIG_ARMv8)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crypto")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crypto")
|
||||||
|
|
|
@ -298,13 +298,17 @@ void ApiRouter::getResults(rapidjson::Document &doc) const
|
||||||
|
|
||||||
void ApiRouter::getThreads(rapidjson::Document &doc) const
|
void ApiRouter::getThreads(rapidjson::Document &doc) const
|
||||||
{
|
{
|
||||||
doc.SetArray();
|
doc.SetObject();
|
||||||
|
auto &allocator = doc.GetAllocator();
|
||||||
|
|
||||||
const std::vector<xmrig::IThread *> &threads = m_controller->config()->threads();
|
const std::vector<xmrig::IThread *> &threads = m_controller->config()->threads();
|
||||||
|
rapidjson::Value list(rapidjson::kArrayType);
|
||||||
|
|
||||||
for (const xmrig::IThread *thread : threads) {
|
for (const xmrig::IThread *thread : threads) {
|
||||||
doc.PushBack(thread->toAPI(doc), doc.GetAllocator());
|
list.PushBack(thread->toAPI(doc), allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
doc.AddMember("threads", list, allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#define __CRYPTONIGHT_TEST_H__
|
#define __CRYPTONIGHT_TEST_H__
|
||||||
|
|
||||||
|
|
||||||
const static uint8_t test_input[152] = {
|
const static uint8_t test_input[380] = {
|
||||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||||
|
@ -36,52 +36,97 @@ const static uint8_t test_input[152] = {
|
||||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
|
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||||
|
0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF,
|
||||||
|
0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB,
|
||||||
|
0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29,
|
||||||
|
0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8,
|
||||||
|
0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E,
|
||||||
|
0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26,
|
||||||
|
0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF,
|
||||||
|
0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8,
|
||||||
|
0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2,
|
||||||
|
0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01,
|
||||||
|
0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2,
|
||||||
|
0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17,
|
||||||
|
0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5,
|
||||||
|
0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38,
|
||||||
|
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const static uint8_t test_output_v0[64] = {
|
const static uint8_t test_output_v0[160] = {
|
||||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
|
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
|
||||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
|
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||||
|
0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0,
|
||||||
|
0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C,
|
||||||
|
0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D,
|
||||||
|
0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7,
|
||||||
|
0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62,
|
||||||
|
0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Monero v7
|
// Monero v7
|
||||||
const static uint8_t test_output_v1[64] = {
|
const static uint8_t test_output_v1[160] = {
|
||||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||||
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
|
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
|
||||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9
|
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||||
|
0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98,
|
||||||
|
0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C,
|
||||||
|
0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE,
|
||||||
|
0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74,
|
||||||
|
0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B,
|
||||||
|
0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_AEON
|
#ifndef XMRIG_NO_AEON
|
||||||
const static uint8_t test_output_v0_lite[64] = {
|
const static uint8_t test_output_v0_lite[160] = {
|
||||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
|
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
|
||||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88
|
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||||
|
0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E,
|
||||||
|
0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18,
|
||||||
|
0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37,
|
||||||
|
0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53,
|
||||||
|
0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9,
|
||||||
|
0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// AEON v7
|
// AEON v7
|
||||||
const static uint8_t test_output_v1_lite[64] = {
|
const static uint8_t test_output_v1_lite[160] = {
|
||||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||||
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F,
|
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F,
|
||||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41
|
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||||
|
0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB,
|
||||||
|
0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32,
|
||||||
|
0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E,
|
||||||
|
0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4,
|
||||||
|
0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9,
|
||||||
|
0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_SUMO
|
#ifndef XMRIG_NO_SUMO
|
||||||
const static uint8_t test_output_heavy[64] = {
|
const static uint8_t test_output_heavy[160] = {
|
||||||
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
|
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
|
||||||
0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D,
|
0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D,
|
||||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||||
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2
|
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
|
||||||
|
0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7,
|
||||||
|
0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD,
|
||||||
|
0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1,
|
||||||
|
0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9,
|
||||||
|
0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51,
|
||||||
|
0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -427,7 +427,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
uint64_t ah0 = h0[1] ^ h0[5];
|
uint64_t ah0 = h0[1] ^ h0[5];
|
||||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
|
||||||
uint64_t idx0 = h0[0] ^ h0[4];
|
uint64_t idx0 = al0;
|
||||||
|
|
||||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||||
__m128i cx;
|
__m128i cx;
|
||||||
|
@ -517,8 +517,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
|
||||||
uint64_t idx0 = h0[0] ^ h0[4];
|
uint64_t idx0 = al0;
|
||||||
uint64_t idx1 = h1[0] ^ h1[4];
|
uint64_t idx1 = al1;
|
||||||
|
|
||||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||||
__m128i cx0, cx1;
|
__m128i cx0, cx1;
|
||||||
|
@ -611,21 +611,377 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define CN_STEP1(a, b, c, l, ptr, idx) \
|
||||||
|
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||||
|
c = _mm_load_si128(ptr);
|
||||||
|
|
||||||
|
|
||||||
|
#define CN_STEP2(a, b, c, l, ptr, idx) \
|
||||||
|
if (SOFT_AES) { \
|
||||||
|
c = soft_aesenc(c, a); \
|
||||||
|
} else { \
|
||||||
|
c = _mm_aesenc_si128(c, a); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
b = _mm_xor_si128(b, c); \
|
||||||
|
\
|
||||||
|
if (VARIANT > 0) { \
|
||||||
|
cryptonight_monero_tweak(reinterpret_cast<uint64_t*>(ptr), b); \
|
||||||
|
} else { \
|
||||||
|
_mm_store_si128(ptr, b); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define CN_STEP3(a, b, c, l, ptr, idx) \
|
||||||
|
idx = EXTRACT64(c); \
|
||||||
|
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||||
|
b = _mm_load_si128(ptr);
|
||||||
|
|
||||||
|
|
||||||
|
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \
|
||||||
|
lo = __umul128(idx, EXTRACT64(b), &hi); \
|
||||||
|
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
|
||||||
|
\
|
||||||
|
if (VARIANT > 0) { \
|
||||||
|
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
|
||||||
|
} else { \
|
||||||
|
_mm_store_si128(ptr, a); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
a = _mm_xor_si128(a, b); \
|
||||||
|
idx = EXTRACT64(a); \
|
||||||
|
\
|
||||||
|
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \
|
||||||
|
int64_t n = ((int64_t*)&l[idx & MASK])[0]; \
|
||||||
|
int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
|
||||||
|
int64_t q = n / (d | 0x5); \
|
||||||
|
((int64_t*)&l[idx & MASK])[0] = n ^ q; \
|
||||||
|
idx = d ^ q; \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define CONST_INIT(ctx, n) \
|
||||||
|
__m128i mc##n; \
|
||||||
|
if (VARIANT > 0) { \
|
||||||
|
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
|
||||||
|
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
{
|
{
|
||||||
|
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||||
|
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||||
|
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||||
|
|
||||||
|
if (VARIANT > 0 && size < 43) {
|
||||||
|
memset(output, 0, 32 * 3);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 3; i++) {
|
||||||
|
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||||
|
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||||
|
}
|
||||||
|
|
||||||
|
CONST_INIT(ctx[0], 0);
|
||||||
|
CONST_INIT(ctx[1], 1);
|
||||||
|
CONST_INIT(ctx[2], 2);
|
||||||
|
|
||||||
|
uint8_t* l0 = ctx[0]->memory;
|
||||||
|
uint8_t* l1 = ctx[1]->memory;
|
||||||
|
uint8_t* l2 = ctx[2]->memory;
|
||||||
|
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||||
|
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||||
|
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||||
|
|
||||||
|
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||||
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||||
|
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||||
|
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||||
|
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||||
|
|
||||||
|
uint64_t idx0, idx1, idx2;
|
||||||
|
idx0 = EXTRACT64(ax0);
|
||||||
|
idx1 = EXTRACT64(ax1);
|
||||||
|
idx2 = EXTRACT64(ax2);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ITERATIONS / 2; i++) {
|
||||||
|
uint64_t hi, lo;
|
||||||
|
__m128i *ptr0, *ptr1, *ptr2;
|
||||||
|
|
||||||
|
// EVEN ROUND
|
||||||
|
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||||
|
|
||||||
|
// ODD ROUND
|
||||||
|
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 3; i++) {
|
||||||
|
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||||
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||||
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
{
|
{
|
||||||
|
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||||
|
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||||
|
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||||
|
|
||||||
|
if (VARIANT > 0 && size < 43) {
|
||||||
|
memset(output, 0, 32 * 4);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||||
|
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||||
|
}
|
||||||
|
|
||||||
|
CONST_INIT(ctx[0], 0);
|
||||||
|
CONST_INIT(ctx[1], 1);
|
||||||
|
CONST_INIT(ctx[2], 2);
|
||||||
|
CONST_INIT(ctx[3], 3);
|
||||||
|
|
||||||
|
uint8_t* l0 = ctx[0]->memory;
|
||||||
|
uint8_t* l1 = ctx[1]->memory;
|
||||||
|
uint8_t* l2 = ctx[2]->memory;
|
||||||
|
uint8_t* l3 = ctx[3]->memory;
|
||||||
|
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||||
|
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||||
|
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||||
|
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||||
|
|
||||||
|
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||||
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||||
|
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||||
|
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||||
|
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
|
||||||
|
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||||
|
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx3 = _mm_set_epi64x(0, 0);
|
||||||
|
|
||||||
|
uint64_t idx0, idx1, idx2, idx3;
|
||||||
|
idx0 = _mm_cvtsi128_si64(ax0);
|
||||||
|
idx1 = _mm_cvtsi128_si64(ax1);
|
||||||
|
idx2 = _mm_cvtsi128_si64(ax2);
|
||||||
|
idx3 = _mm_cvtsi128_si64(ax3);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ITERATIONS / 2; i++)
|
||||||
|
{
|
||||||
|
uint64_t hi, lo;
|
||||||
|
__m128i *ptr0, *ptr1, *ptr2, *ptr3;
|
||||||
|
|
||||||
|
// EVEN ROUND
|
||||||
|
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||||
|
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
|
||||||
|
|
||||||
|
// ODD ROUND
|
||||||
|
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||||
|
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||||
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||||
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||||
{
|
{
|
||||||
|
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||||
|
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||||
|
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||||
|
|
||||||
|
if (VARIANT > 0 && size < 43) {
|
||||||
|
memset(output, 0, 32 * 5);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 5; i++) {
|
||||||
|
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||||
|
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||||
|
}
|
||||||
|
|
||||||
|
CONST_INIT(ctx[0], 0);
|
||||||
|
CONST_INIT(ctx[1], 1);
|
||||||
|
CONST_INIT(ctx[2], 2);
|
||||||
|
CONST_INIT(ctx[3], 3);
|
||||||
|
CONST_INIT(ctx[4], 4);
|
||||||
|
|
||||||
|
uint8_t* l0 = ctx[0]->memory;
|
||||||
|
uint8_t* l1 = ctx[1]->memory;
|
||||||
|
uint8_t* l2 = ctx[2]->memory;
|
||||||
|
uint8_t* l3 = ctx[3]->memory;
|
||||||
|
uint8_t* l4 = ctx[4]->memory;
|
||||||
|
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||||
|
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||||
|
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||||
|
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||||
|
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
|
||||||
|
|
||||||
|
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||||
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||||
|
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||||
|
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||||
|
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
|
||||||
|
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||||
|
__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
|
||||||
|
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
|
||||||
|
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx3 = _mm_set_epi64x(0, 0);
|
||||||
|
__m128i cx4 = _mm_set_epi64x(0, 0);
|
||||||
|
|
||||||
|
uint64_t idx0, idx1, idx2, idx3, idx4;
|
||||||
|
idx0 = _mm_cvtsi128_si64(ax0);
|
||||||
|
idx1 = _mm_cvtsi128_si64(ax1);
|
||||||
|
idx2 = _mm_cvtsi128_si64(ax2);
|
||||||
|
idx3 = _mm_cvtsi128_si64(ax3);
|
||||||
|
idx4 = _mm_cvtsi128_si64(ax4);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ITERATIONS / 2; i++)
|
||||||
|
{
|
||||||
|
uint64_t hi, lo;
|
||||||
|
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
|
||||||
|
|
||||||
|
// EVEN ROUND
|
||||||
|
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||||
|
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
|
||||||
|
CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
|
||||||
|
|
||||||
|
// ODD ROUND
|
||||||
|
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||||
|
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||||
|
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||||
|
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||||
|
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||||
|
|
||||||
|
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||||
|
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||||
|
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||||
|
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
|
||||||
|
CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 5; i++) {
|
||||||
|
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||||
|
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||||
|
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __CRYPTONIGHT_X86_H__ */
|
#endif /* __CRYPTONIGHT_X86_H__ */
|
||||||
|
|
|
@ -105,6 +105,23 @@ static inline __m128i soft_aesenc(const uint32_t* in, __m128i key)
|
||||||
return _mm_xor_si128(out, key);
|
return _mm_xor_si128(out, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline __m128i soft_aesenc(__m128i in, __m128i key)
|
||||||
|
{
|
||||||
|
uint32_t x0, x1, x2, x3;
|
||||||
|
x0 = _mm_cvtsi128_si32(in);
|
||||||
|
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||||
|
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||||
|
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||||
|
|
||||||
|
__m128i out = _mm_set_epi32(
|
||||||
|
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
|
||||||
|
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
|
||||||
|
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
|
||||||
|
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
|
||||||
|
|
||||||
|
return _mm_xor_si128(out, key);
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint32_t sub_word(uint32_t key)
|
static inline uint32_t sub_word(uint32_t key)
|
||||||
{
|
{
|
||||||
return (saes_sbox[key >> 24 ] << 24) |
|
return (saes_sbox[key >> 24 ] << 24) |
|
||||||
|
|
|
@ -104,7 +104,7 @@ void MultiWorker<N>::start()
|
||||||
|
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) {
|
if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) {
|
||||||
Workers::submit(JobResult(m_state.job.poolId(), m_state.job.id(), *nonce(i), m_hash, m_state.job.diff()));
|
Workers::submit(JobResult(m_state.job.poolId(), m_state.job.id(), *nonce(i), m_hash + (i * 32), m_state.job.diff()));
|
||||||
}
|
}
|
||||||
|
|
||||||
*nonce(i) += 1;
|
*nonce(i) += 1;
|
||||||
|
|
Loading…
Reference in a new issue