Merge branch 'dev'

This commit is contained in:
XMRig 2020-02-02 13:29:13 +07:00
commit f864687a96
No known key found for this signature in database
GPG key ID: 446A53638BE94409
30 changed files with 264 additions and 88 deletions

View file

@ -1,3 +1,10 @@
# v5.5.2
- [#1500](https://github.com/xmrig/xmrig/pull/1500) Removed unnecessary code from RandomX JIT compiler.
- [#1502](https://github.com/xmrig/xmrig/pull/1502) Optimizations for AMD Bulldozer.
- [#1508](https://github.com/xmrig/xmrig/pull/1508) Added support for BMI2 instructions.
- [#1510](https://github.com/xmrig/xmrig/pull/1510) Optimized `CFROUND` instruction for RandomX.
- [#1520](https://github.com/xmrig/xmrig/pull/1520) Fixed thread affinity.
# v5.5.1
- [#1469](https://github.com/xmrig/xmrig/issues/1469) Fixed build with gcc 4.8.
- [#1473](https://github.com/xmrig/xmrig/pull/1473) Added RandomX auto-config for mobile Ryzen APUs.

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -62,6 +62,7 @@ public:
virtual Assembly::Id assembly() const = 0;
virtual bool hasAES() const = 0;
virtual bool hasAVX2() const = 0;
virtual bool hasBMI2() const = 0;
virtual bool hasOneGbPages() const = 0;
virtual const char *backend() const = 0;
virtual const char *brand() const = 0;

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -153,6 +153,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
}
m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE];
m_bmi2 = data.flags[CPU_FEATURE_BMI2];
}

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -43,6 +43,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
inline bool hasBMI2() const override { return m_bmi2; }
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *backend() const override { return m_backend; }
inline const char *brand() const override { return m_brand; }
@ -59,6 +60,7 @@ private:
Assembly m_assembly;
bool m_aes = false;
bool m_avx2 = false;
bool m_bmi2 = false;
bool m_L2_exclusive = false;
char m_backend[32]{};
char m_brand[64 + 5]{};

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -45,6 +45,10 @@
# define bit_AVX2 (1 << 5)
#endif
#ifndef bit_BMI2
# define bit_BMI2 (1 << 8)
#endif
#ifndef bit_PDPE1GB
# define bit_PDPE1GB (1 << 26)
#endif
@ -141,6 +145,12 @@ static inline bool has_avx2()
}
static inline bool has_bmi2()
{
return has_feature(EXTENDED_FEATURES, EBX_Reg, bit_BMI2);
}
static inline bool has_pdpe1gb()
{
return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB);
@ -154,6 +164,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_threads(std::thread::hardware_concurrency()),
m_aes(has_aes_ni()),
m_avx2(has_avx2()),
m_bmi2(has_bmi2()),
m_pdpe1gb(has_pdpe1gb())
{
cpu_brand_string(m_brand);

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -44,6 +44,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; }
inline bool hasAES() const override { return m_aes; }
inline bool hasAVX2() const override { return m_avx2; }
inline bool hasBMI2() const override { return m_bmi2; }
inline bool hasOneGbPages() const override { return m_pdpe1gb; }
inline const char *brand() const override { return m_brand; }
inline MsrMod msrMod() const override { return m_msrMod; }
@ -63,6 +64,7 @@ private:
Assembly m_assembly = Assembly::NONE;
bool m_aes = false;
const bool m_avx2 = false;
const bool m_bmi2 = false;
const bool m_pdpe1gb = false;
MsrMod m_msrMod = MSR_MOD_NONE;
Vendor m_vendor = VENDOR_UNKNOWN;

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View file

@ -6,8 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -30,6 +30,7 @@
#include <hwloc.h>
#include <thread>
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
@ -42,8 +43,11 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
}
if (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) >= 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
return true;
}
return hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0;
const bool result = (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0);
std::this_thread::sleep_for(std::chrono::milliseconds(1));
return result;
}

View file

@ -4,8 +4,8 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -28,6 +28,7 @@
#include <stdlib.h>
#include <sys/resource.h>
#include <uv.h>
#include <thread>
#include "base/kernel/Platform.h"
@ -67,7 +68,9 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
thread_affinity_policy_data_t policy = { static_cast<integer_t>(cpu_id) };
mach_thread = pthread_mach_thread_np(pthread_self());
return thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1) == KERN_SUCCESS;
const bool result = (thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1) == KERN_SUCCESS);
std::this_thread::sleep_for(std::chrono::milliseconds(1));
return result;
}
#endif

View file

@ -4,8 +4,8 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -37,6 +37,7 @@
#include <sys/resource.h>
#include <unistd.h>
#include <uv.h>
#include <thread>
#include "base/kernel/Platform.h"
@ -92,10 +93,13 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
CPU_SET(cpu_id, &mn);
# ifndef __ANDROID__
return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mn) == 0;
const bool result = (pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mn) == 0);
# else
return sched_setaffinity(gettid(), sizeof(cpu_set_t), &mn) == 0;
const bool result = (sched_setaffinity(gettid(), sizeof(cpu_set_t), &mn) == 0);
# endif
std::this_thread::sleep_for(std::chrono::milliseconds(1));
return result;
}
#endif

View file

@ -4,9 +4,8 @@
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -98,7 +97,9 @@ bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
LOG_ERR("Unable to set affinity. Windows supports only affinity up to 63.");
}
return SetThreadAffinityMask(GetCurrentThread(), 1ULL << cpu_id) != 0;
const bool result = (SetThreadAffinityMask(GetCurrentThread(), 1ULL << cpu_id) != 0);
Sleep(1);
return result;
}
#endif

View file

@ -1,5 +1,5 @@
;# save VM register values
add rsp, 24
add rsp, 40
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9

View file

@ -1,5 +1,5 @@
lea rcx, [rsi+rax]
mov [rsp+8], rcx
mov [rsp+16], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
@ -9,7 +9,7 @@
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
mov [rsp+16], rcx
mov [rsp+24], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
@ -18,11 +18,11 @@
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
andpd xmm4, xmm13
andpd xmm5, xmm13
andpd xmm6, xmm13
andpd xmm7, xmm13
orpd xmm4, xmm14
orpd xmm5, xmm14
orpd xmm6, xmm14
orpd xmm7, xmm14

View file

@ -0,0 +1,24 @@
lea rcx, [rsi+rax]
mov [rsp+8], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
mov [rsp+16], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
vpcmov xmm4, xmm4, xmm14, xmm13
vpcmov xmm5, xmm5, xmm14, xmm13
vpcmov xmm6, xmm6, xmm14, xmm13
vpcmov xmm7, xmm7, xmm14, xmm13

View file

@ -1,4 +1,4 @@
mov rcx, [rsp+16]
mov rcx, [rsp+24]
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
@ -7,7 +7,7 @@
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
mov rcx, [rsp+8]
mov rcx, [rsp+16]
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6

View file

@ -1,5 +1,5 @@
mantissaMask:
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0
exp240:
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
scaleMask:

View file

@ -89,6 +89,7 @@ namespace randomx {
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeLoopLoadXOP = (uint8_t*)&randomx_program_loop_load_xop;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
@ -104,7 +105,8 @@ namespace randomx {
const int32_t prefetchScratchpadSize = codePrefetchScratchpadEnd - codePrefetchScratchpad;
const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t loopLoadSize = codeLoopLoadXOP - codeLoopLoad;
const int32_t loopLoadXOPSize = codeProgamStart - codeLoopLoadXOP;
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
@ -184,6 +186,7 @@ namespace randomx {
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_VPCMOV_XMM12[] = { 0x8F, 0x48, 0x18, 0xA2, 0xE6, 0xD0 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
static const uint8_t CALL = 0xe8;
@ -295,11 +298,23 @@ namespace randomx {
cpuid(1, info);
hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0);
cpuid(0x80000001, info);
hasXOP = ((info[2] & (1 << 11)) != 0);
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
memcpy(code, codePrologue, prologueSize);
if (hasXOP) {
memcpy(code + prologueSize, codeLoopLoadXOP, loopLoadXOPSize);
}
else {
memcpy(code + prologueSize, codeLoopLoad, loopLoadSize);
}
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
codePosFirst = prologueSize + (hasXOP ? loopLoadXOPSize : loopLoadSize);
# ifdef XMRIG_FIX_RYZEN
mainLoopBounds.first = code + prologueSize;
mainLoopBounds.second = code + epilogueOffset;
@ -317,7 +332,7 @@ namespace randomx {
uint8_t* p;
uint32_t n;
if (flags & RANDOMX_FLAG_RYZEN) {
if (flags & RANDOMX_FLAG_AMD) {
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
}
@ -385,7 +400,7 @@ namespace randomx {
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
if (hasAVX) {
uint32_t* p = (uint32_t*)(code + codePos + 32);
uint32_t* p = (uint32_t*)(code + codePos + 67);
*p = (*p & 0xFF000000U) | 0x0077F8C5U;
}
@ -393,10 +408,8 @@ namespace randomx {
xmrig::Rx::setMainLoopBounds(mainLoopBounds);
# endif
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
memcpy(code + prologueSize - 48, &pcfg.eMask, sizeof(pcfg.eMask));
codePos = codePosFirst;
//mark all registers as used
uint64_t* r = (uint64_t*)registerUsage;
@ -708,14 +721,31 @@ namespace randomx {
uint8_t* const p = code;
int pos = codePos;
const uint32_t dst = instr.dst;
emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.dst, p, pos);
emitByte(0xc0 + dst, p, pos);
emit(REX_MUL_R, p, pos);
emitByte(0xe0 + instr.src, p, pos);
emit(REX_MOV_R64R, p, pos);
emitByte(0xc2 + 8 * instr.dst, p, pos);
emitByte(0xc2 + 8 * dst, p, pos);
registerUsage[instr.dst] = pos;
registerUsage[dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_IMULH_R_BMI2(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
pos += 8;
registerUsage[dst] = pos;
codePos = pos;
}
@ -743,6 +773,29 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_IMULH_M_BMI2(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<false>(instr, p, pos);
*(uint32_t*)(p + pos) = static_cast<uint32_t>(0xC4D08B49 + (dst << 16));
*(uint64_t*)(p + pos + 4) = 0x0E04F6FB62ULL + (dst << 27);
pos += 9;
}
else {
*(uint64_t*)(p + pos) = 0x86F6FB62C4D08B49ULL + (dst << 16) + (dst << 59);
*(uint32_t*)(p + pos + 8) = instr.getImm32() & ScratchpadL3Mask;
pos += 12;
}
registerUsage[dst] = pos;
codePos = pos;
}
void JitCompilerX86::h_ISMULH_R(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
@ -992,7 +1045,12 @@ namespace randomx {
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, p, pos);
emit(REX_CVTDQ2PD_XMM12, p, pos);
if (hasXOP) {
emit(REX_VPCMOV_XMM12, p, pos);
}
else {
emit(REX_ANDPS_XMM12, p, pos);
}
emit(REX_DIVPD, p, pos);
emitByte(0xe4 + 8 * dst, p, pos);
@ -1014,18 +1072,21 @@ namespace randomx {
uint8_t* const p = code;
int pos = codePos;
emit(REX_MOV_RR64, p, pos);
emitByte(0xc0 + instr.src, p, pos);
int rotate = (13 - (instr.getImm32() & 63)) & 63;
if (rotate != 0) {
emit(ROL_RAX, p, pos);
emitByte(rotate, p, pos);
}
if (vm_flags & RANDOMX_FLAG_RYZEN) {
emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos);
const uint32_t src = instr.src;
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24);
if (vm_flags & RANDOMX_FLAG_AMD) {
*(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL;
*(uint8_t*)(p + pos + 15) = 8;
*(uint64_t*)(p + pos + 16) = 0x202444890414AE0FULL;
pos += 24;
}
else {
emit(AND_OR_MOV_LDMXCSR, p, pos);
*(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL;
pos += 14;
}
codePos = pos;

View file

@ -73,10 +73,12 @@ namespace randomx {
std::pair<const void*, const void*> mainLoopBounds;
# endif
int32_t codePos;
int32_t codePosFirst;
uint32_t vm_flags;
static bool BranchesWithin32B;
bool hasAVX;
bool hasXOP;
static void applyTweaks();
void generateProgramPrologue(Program&, ProgramConfiguration&);
@ -121,7 +123,9 @@ namespace randomx {
void h_IMUL_R(const Instruction&);
void h_IMUL_M(const Instruction&);
void h_IMULH_R(const Instruction&);
void h_IMULH_R_BMI2(const Instruction&);
void h_IMULH_M(const Instruction&);
void h_IMULH_M_BMI2(const Instruction&);
void h_ISMULH_R(const Instruction&);
void h_ISMULH_M(const Instruction&);
void h_IMUL_RCP(const Instruction&);

View file

@ -43,6 +43,7 @@
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_loop_load_xop)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_ryzen)
@ -93,8 +94,12 @@ DECL(randomx_program_prologue_first_load):
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
sub rsp, 24
stmxcsr dword ptr [rsp]
sub rsp, 40
mov dword ptr [rsp], 0x9FC0
mov dword ptr [rsp+4], 0xBFC0
mov dword ptr [rsp+8], 0xDFC0
mov dword ptr [rsp+12], 0xFFC0
mov dword ptr [rsp+32], -1
nop
nop
nop
@ -110,6 +115,9 @@ DECL(randomx_program_loop_begin):
DECL(randomx_program_loop_load):
#include "asm/program_loop_load.inc"
DECL(randomx_program_loop_load_xop):
#include "asm/program_loop_load_xop.inc"
DECL(randomx_program_start):
nop

View file

@ -34,6 +34,7 @@ PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_loop_load_xop
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_ryzen
@ -81,8 +82,12 @@ randomx_program_prologue_first_load PROC
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
sub rsp, 24
stmxcsr dword ptr [rsp]
sub rsp, 40
mov dword ptr [rsp], 9FC0h
mov dword ptr [rsp+4], 0BFC0h
mov dword ptr [rsp+8], 0DFC0h
mov dword ptr [rsp+12], 0FFC0h
mov dword ptr [rsp+32], -1
nop
nop
nop
@ -101,6 +106,10 @@ randomx_program_loop_load PROC
include asm/program_loop_load.inc
randomx_program_loop_load ENDP
randomx_program_loop_load_xop PROC
include asm/program_loop_load_xop.inc
randomx_program_loop_load_xop ENDP
randomx_program_start PROC
nop
randomx_program_start ENDP

View file

@ -35,6 +35,7 @@ extern "C" {
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_loop_load_xop();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_read_dataset_ryzen();

View file

@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/jit_compiler_a64_static.hpp"
#endif
#include "backend/cpu/Cpu.h"
#include <cassert>
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
@ -235,14 +237,29 @@ void RandomX_ConfigurationBase::Apply()
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
#define INST_HANDLE2(x, func_name, prev) \
CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); }
INST_HANDLE(IADD_RS, NULL);
INST_HANDLE(IADD_M, IADD_RS);
INST_HANDLE(ISUB_R, IADD_M);
INST_HANDLE(ISUB_M, ISUB_R);
INST_HANDLE(IMUL_R, ISUB_M);
INST_HANDLE(IMUL_M, IMUL_R);
#if defined(_M_X64) || defined(__x86_64__)
if (xmrig::Cpu::info()->hasBMI2()) {
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
}
else
#endif
{
INST_HANDLE(IMULH_R, IMUL_M);
INST_HANDLE(IMULH_M, IMULH_R);
}
INST_HANDLE(ISMULH_R, IMULH_M);
INST_HANDLE(ISMULH_M, ISMULH_R);
INST_HANDLE(IMUL_RCP, ISMULH_M);

View file

@ -49,7 +49,7 @@ enum randomx_flags {
RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8,
RANDOMX_FLAG_1GB_PAGES = 16,
RANDOMX_FLAG_RYZEN = 64,
RANDOMX_FLAG_AMD = 64,
};

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View file

@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View file

@ -7,8 +7,8 @@
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 tevador <tevador@gmail.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -45,8 +45,12 @@ xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes, xmrig::
m_flags |= RANDOMX_FLAG_JIT;
}
if ((assembly == Assembly::RYZEN) || ((assembly == Assembly::AUTO) && (Cpu::info()->assembly() == Assembly::RYZEN))) {
m_flags |= RANDOMX_FLAG_RYZEN;
if (assembly == Assembly::AUTO) {
assembly = Cpu::info()->assembly();
}
if ((assembly == Assembly::RYZEN) || (assembly == Assembly::BULLDOZER)) {
m_flags |= RANDOMX_FLAG_AMD;
}
m_vm = randomx_create_vm(static_cast<randomx_flags>(m_flags), dataset->cache() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad);

View file

@ -7,8 +7,8 @@
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 tevador <tevador@gmail.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View file

@ -7,10 +7,10 @@
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 tevador <tevador@gmail.com>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
* Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -91,13 +91,19 @@ static MsrItem rdmsr(uint32_t reg)
}
static uint64_t get_masked_value(uint64_t old_value, uint64_t new_value, uint64_t mask)
{
return (new_value & mask) | (old_value & ~mask);
}
static bool wrmsr_on_cpu(uint32_t reg, uint32_t cpu, uint64_t value, uint64_t mask)
{
// If a bit in mask is set to 1, use new value, otherwise use old value
if (mask != MsrItem::kNoMask) {
uint64_t old_value;
if (rdmsr_on_cpu(reg, cpu, old_value)) {
value = (value & mask) | (old_value & ~mask);
value = get_masked_value(old_value, value, mask);
}
}
@ -162,7 +168,7 @@ static bool wrmsr(const MsrItems &preset, bool save)
if (save) {
for (const auto &i : preset) {
auto item = rdmsr(i.reg());
LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), i.value());
LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), get_masked_value(item.value(), i.value(), i.mask()));
if (item.isValid()) {
savedState.emplace_back(item);

View file

@ -10,8 +10,8 @@
* Copyright 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
* Copyright 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
* Copyright 2007-2009 hiyohiyo <https://openlibsys.org>, <hiyohiyo@crystalmark.info>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -217,6 +217,12 @@ static MsrItem rdmsr(HANDLE driver, uint32_t reg)
}
static uint64_t get_masked_value(uint64_t old_value, uint64_t new_value, uint64_t mask)
{
return (new_value & mask) | (old_value & ~mask);
}
static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
{
struct {
@ -230,7 +236,7 @@ static bool wrmsr(HANDLE driver, uint32_t reg, uint64_t value, uint64_t mask)
if (mask != MsrItem::kNoMask) {
uint64_t old_value;
if (rdmsr(driver, reg, old_value)) {
value = (value & mask) | (old_value & ~mask);
value = get_masked_value(old_value, value, mask);
}
}
@ -268,7 +274,7 @@ static bool wrmsr(const MsrItems &preset, bool save)
if (save) {
for (const auto &i : preset) {
auto item = rdmsr(driver, i.reg());
LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), i.value());
LOG_VERBOSE(CLEAR "%s" CYAN_BOLD("0x%08" PRIx32) CYAN(":0x%016" PRIx64) CYAN_BOLD(" -> 0x%016" PRIx64), tag, i.reg(), item.value(), get_masked_value(item.value(), i.value(), i.mask()));
if (item.isValid()) {
savedState.emplace_back(item);

View file

@ -28,7 +28,7 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "5.5.1"
#define APP_VERSION "5.5.2-dev"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
@ -36,7 +36,7 @@
#define APP_VER_MAJOR 5
#define APP_VER_MINOR 5
#define APP_VER_PATCH 1
#define APP_VER_PATCH 2
#ifdef _MSC_VER
# if (_MSC_VER >= 1920)