Merge pull request #1248 from SChernykh/evo

Fixed code cache cleanup on iOS/Darwin
This commit is contained in:
xmrig 2019-10-18 23:27:39 +07:00 committed by GitHub
commit 128c5f67ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 14 deletions

View file

@ -54,6 +54,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2") #set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")

View file

@ -73,7 +73,7 @@ static size_t CalcDatasetItemSize()
{ {
return return
// Prologue // Prologue
((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) + ((uint8_t*)randomx_calc_dataset_item_aarch64_prefetch - (uint8_t*)randomx_calc_dataset_item_aarch64) +
// Main loop // Main loop
RandomX_CurrentConfig.CacheAccesses * ( RandomX_CurrentConfig.CacheAccesses * (
// Main loop prologue // Main loop prologue
@ -82,7 +82,7 @@ static size_t CalcDatasetItemSize()
((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 + ((RandomX_CurrentConfig.SuperscalarLatency * 3) + 2) * 16 +
// Main loop epilogue // Main loop epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4 ((uint8_t*)randomx_calc_dataset_item_aarch64_store_result - (uint8_t*)randomx_calc_dataset_item_aarch64_mix) + 4
) + ) +
// Epilogue // Epilogue
((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result); ((uint8_t*)randomx_calc_dataset_item_aarch64_end - (uint8_t*)randomx_calc_dataset_item_aarch64_store_result);
} }
@ -103,6 +103,21 @@ JitCompilerA64::~JitCompilerA64()
freePagedMemory(code, CodeSize + CalcDatasetItemSize()); freePagedMemory(code, CodeSize + CalcDatasetItemSize());
} }
#if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
void sys_icache_invalidate(void *start, size_t len);
#endif
static void clear_code_cache(char* p1, char* p2)
{
# if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
sys_icache_invalidate(p1, static_cast<size_t>(p2 - p1));
# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__)
__builtin___clear_cache(p1, p2);
# else
# error "No clear code cache function found"
# endif
}
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config) void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config)
{ {
uint32_t codePos = MainLoopBegin + 4; uint32_t codePos = MainLoopBegin + 4;
@ -149,9 +164,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
# ifdef HAVE_BUILTIN_CLEAR_CACHE clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
__builtin___clear_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# endif
} }
void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset) void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
@ -206,9 +219,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos); emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos); emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
# ifdef HAVE_BUILTIN_CLEAR_CACHE clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
__builtin___clear_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# endif
} }
template<size_t N> template<size_t N>
@ -324,9 +335,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s
memcpy(code + codePos, p1, p2 - p1); memcpy(code + codePos, p1, p2 - p1);
codePos += p2 - p1; codePos += p2 - p1;
# ifdef HAVE_BUILTIN_CLEAR_CACHE clear_code_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
__builtin___clear_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
# endif
} }
template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector<uint64_t> &reciprocalCache); template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES], std::vector<uint64_t> &reciprocalCache);
@ -858,7 +867,7 @@ void JitCompilerA64::h_FADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 16; const uint32_t dst = (instr.dst % 4) + 16;
constexpr uint32_t tmp_reg_fp = 28; constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k); emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); emit32(ARMV8A::FADD | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
@ -881,7 +890,7 @@ void JitCompilerA64::h_FSUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 16; const uint32_t dst = (instr.dst % 4) + 16;
constexpr uint32_t tmp_reg_fp = 28; constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k); emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k); emit32(ARMV8A::FSUB | dst | (dst << 5) | (tmp_reg_fp << 16), code, k);
@ -911,7 +920,7 @@ void JitCompilerA64::h_FDIV_M(Instruction& instr, uint32_t& codePos)
const uint32_t dst = (instr.dst % 4) + 20; const uint32_t dst = (instr.dst % 4) + 20;
constexpr uint32_t tmp_reg_fp = 28; constexpr uint32_t tmp_reg_fp = 28;
emitMemLoadFP<tmp_reg_fp>(src, instr, code, k); emitMemLoadFP<tmp_reg_fp>(src, instr, code, k);
// and tmp_reg_fp, tmp_reg_fp, and_mask_reg // and tmp_reg_fp, tmp_reg_fp, and_mask_reg
emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k); emit32(0x4E201C00 | tmp_reg_fp | (tmp_reg_fp << 5) | (29 << 16), code, k);