mirror of
https://github.com/xmrig/xmrig.git
synced 2025-01-11 05:14:40 +00:00
RandomX: added BMI2 version for scratchpad prefetch
Saves 1 instruction and 1 byte in the main loop.
This commit is contained in:
parent
3ac8f6b23a
commit
d443dd86f1
6 changed files with 38 additions and 13 deletions
|
@ -110,8 +110,6 @@ namespace randomx {
|
||||||
#define ADDR(x) ((uint8_t*)&x)
|
#define ADDR(x) ((uint8_t*)&x)
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
#define codePrefetchScratchpad ADDR(randomx_prefetch_scratchpad)
|
|
||||||
#define codePrefetchScratchpadEnd ADDR(randomx_prefetch_scratchpad_end)
|
|
||||||
#define codePrologue ADDR(randomx_program_prologue)
|
#define codePrologue ADDR(randomx_program_prologue)
|
||||||
#define codeLoopBegin ADDR(randomx_program_loop_begin)
|
#define codeLoopBegin ADDR(randomx_program_loop_begin)
|
||||||
#define codeLoopLoad ADDR(randomx_program_loop_load)
|
#define codeLoopLoad ADDR(randomx_program_loop_load)
|
||||||
|
@ -134,7 +132,6 @@ namespace randomx {
|
||||||
#define codeShhEnd ADDR(randomx_sshash_end)
|
#define codeShhEnd ADDR(randomx_sshash_end)
|
||||||
#define codeShhInit ADDR(randomx_sshash_init)
|
#define codeShhInit ADDR(randomx_sshash_init)
|
||||||
|
|
||||||
#define prefetchScratchpadSize (codePrefetchScratchpadEnd - codePrefetchScratchpad)
|
|
||||||
#define prologueSize (codeLoopBegin - codePrologue)
|
#define prologueSize (codeLoopBegin - codePrologue)
|
||||||
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
||||||
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
|
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
|
||||||
|
@ -467,7 +464,7 @@ namespace randomx {
|
||||||
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
|
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
|
||||||
*(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast<uint64_t>(pcfg.readReg0) << 16) + (static_cast<uint64_t>(pcfg.readReg1) << 40);
|
*(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast<uint64_t>(pcfg.readReg0) << 16) + (static_cast<uint64_t>(pcfg.readReg1) << 40);
|
||||||
codePos += 6;
|
codePos += 6;
|
||||||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
|
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, RandomX_CurrentConfig.codePrefetchScratchpadTweakedSize, code, codePos);
|
||||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||||
codePos += loopStoreSize;
|
codePos += loopStoreSize;
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.global DECL(randomx_prefetch_scratchpad)
|
.global DECL(randomx_prefetch_scratchpad)
|
||||||
|
.global DECL(randomx_prefetch_scratchpad_bmi2)
|
||||||
.global DECL(randomx_prefetch_scratchpad_end)
|
.global DECL(randomx_prefetch_scratchpad_end)
|
||||||
.global DECL(randomx_program_prologue)
|
.global DECL(randomx_program_prologue)
|
||||||
.global DECL(randomx_program_prologue_first_load)
|
.global DECL(randomx_program_prologue_first_load)
|
||||||
|
@ -80,6 +81,13 @@ DECL(randomx_prefetch_scratchpad):
|
||||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
prefetcht0 [rsi+rdx]
|
prefetcht0 [rsi+rdx]
|
||||||
|
|
||||||
|
DECL(randomx_prefetch_scratchpad_bmi2):
|
||||||
|
rorx rdx, rax, 32
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rax]
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rdx]
|
||||||
|
|
||||||
DECL(randomx_prefetch_scratchpad_end):
|
DECL(randomx_prefetch_scratchpad_end):
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
|
|
|
@ -29,6 +29,7 @@ IFDEF RAX
|
||||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||||
|
|
||||||
PUBLIC randomx_prefetch_scratchpad
|
PUBLIC randomx_prefetch_scratchpad
|
||||||
|
PUBLIC randomx_prefetch_scratchpad_bmi2
|
||||||
PUBLIC randomx_prefetch_scratchpad_end
|
PUBLIC randomx_prefetch_scratchpad_end
|
||||||
PUBLIC randomx_program_prologue
|
PUBLIC randomx_program_prologue
|
||||||
PUBLIC randomx_program_prologue_first_load
|
PUBLIC randomx_program_prologue_first_load
|
||||||
|
@ -70,6 +71,14 @@ randomx_prefetch_scratchpad PROC
|
||||||
prefetcht0 [rsi+rdx]
|
prefetcht0 [rsi+rdx]
|
||||||
randomx_prefetch_scratchpad ENDP
|
randomx_prefetch_scratchpad ENDP
|
||||||
|
|
||||||
|
randomx_prefetch_scratchpad_bmi2 PROC
|
||||||
|
rorx rdx, rax, 32
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rax]
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rdx]
|
||||||
|
randomx_prefetch_scratchpad_bmi2 ENDP
|
||||||
|
|
||||||
randomx_prefetch_scratchpad_end PROC
|
randomx_prefetch_scratchpad_end PROC
|
||||||
randomx_prefetch_scratchpad_end ENDP
|
randomx_prefetch_scratchpad_end ENDP
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void randomx_prefetch_scratchpad();
|
void randomx_prefetch_scratchpad();
|
||||||
|
void randomx_prefetch_scratchpad_bmi2();
|
||||||
void randomx_prefetch_scratchpad_end();
|
void randomx_prefetch_scratchpad_end();
|
||||||
void randomx_program_prologue();
|
void randomx_program_prologue();
|
||||||
void randomx_program_prologue_first_load();
|
void randomx_program_prologue_first_load();
|
||||||
|
|
|
@ -177,10 +177,17 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||||
codeReadDatasetRyzenTweakedSize = b - a;
|
codeReadDatasetRyzenTweakedSize = b - a;
|
||||||
}
|
}
|
||||||
{
|
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||||
const uint8_t* a = addr(randomx_prefetch_scratchpad);
|
const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2);
|
||||||
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
||||||
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
||||||
|
codePrefetchScratchpadTweakedSize = b - a;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const uint8_t* a = addr(randomx_prefetch_scratchpad);
|
||||||
|
const uint8_t* b = addr(randomx_prefetch_scratchpad_bmi2);
|
||||||
|
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
||||||
|
codePrefetchScratchpadTweakedSize = b - a;
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
@ -217,13 +224,15 @@ void RandomX_ConfigurationBase::Apply()
|
||||||
//*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
|
//*(uint32_t*)(codeReadDatasetTweaked + 24) = DatasetBaseMask;
|
||||||
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
//*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||||
|
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
const bool hasBMI2 = xmrig::Cpu::info()->hasBMI2();
|
||||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
|
||||||
|
*(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 7 : 4)) = ScratchpadL3Mask64_Calculated;
|
||||||
|
*(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 17 : 18)) = ScratchpadL3Mask64_Calculated;
|
||||||
|
|
||||||
// Apply scratchpad prefetch mode
|
// Apply scratchpad prefetch mode
|
||||||
{
|
{
|
||||||
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8);
|
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 11 : 8));
|
||||||
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22);
|
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 21 : 22));
|
||||||
|
|
||||||
switch (scratchpadPrefetchMode)
|
switch (scratchpadPrefetchMode)
|
||||||
{
|
{
|
||||||
|
@ -290,7 +299,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||||
INST_HANDLE(IMUL_M, IMUL_R);
|
INST_HANDLE(IMUL_M, IMUL_R);
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
if (hasBMI2) {
|
||||||
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
|
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
|
||||||
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
|
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
|
||||||
}
|
}
|
||||||
|
@ -332,7 +341,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
if (hasBMI2) {
|
||||||
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
|
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -129,7 +129,8 @@ struct RandomX_ConfigurationBase
|
||||||
uint32_t codeReadDatasetTweakedSize;
|
uint32_t codeReadDatasetTweakedSize;
|
||||||
uint8_t codeReadDatasetRyzenTweaked[72];
|
uint8_t codeReadDatasetRyzenTweaked[72];
|
||||||
uint32_t codeReadDatasetRyzenTweakedSize;
|
uint32_t codeReadDatasetRyzenTweakedSize;
|
||||||
uint8_t codePrefetchScratchpadTweaked[32];
|
uint8_t codePrefetchScratchpadTweaked[28];
|
||||||
|
uint32_t codePrefetchScratchpadTweakedSize;
|
||||||
|
|
||||||
uint32_t AddressMask_Calculated[4];
|
uint32_t AddressMask_Calculated[4];
|
||||||
uint32_t ScratchpadL3Mask_Calculated;
|
uint32_t ScratchpadL3Mask_Calculated;
|
||||||
|
|
Loading…
Reference in a new issue