Merge pull request #5184

0ec360b4 CryptonightR: define out i386/x86_64 specific code on other archs (moneromooo-monero)
773509dd slow-hash: fix build on arm (moneromooo-monero)
0cb6a763 cmake: ARCH_ID fixes for cross compilation (TheCharlatan)
This commit is contained in:
Riccardo Spagni 2019-02-27 14:44:59 +02:00
commit 7c863a9fa5
No known key found for this signature in database
GPG key ID: 55432DF31CCD4FCD
7 changed files with 60 additions and 27 deletions

View file

@ -115,6 +115,9 @@ string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
# to identify the target architecture, to direct logic in this cmake script. # to identify the target architecture, to direct logic in this cmake script.
# Since ARCH is a cached variable, it will not be set on first cmake invocation. # Since ARCH is a cached variable, it will not be set on first cmake invocation.
if (NOT ARCH OR ARCH STREQUAL "" OR ARCH STREQUAL "native" OR ARCH STREQUAL "default") if (NOT ARCH OR ARCH STREQUAL "" OR ARCH STREQUAL "native" OR ARCH STREQUAL "default")
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "")
set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
set(ARCH_ID "${CMAKE_SYSTEM_PROCESSOR}") set(ARCH_ID "${CMAKE_SYSTEM_PROCESSOR}")
else() else()
set(ARCH_ID "${ARCH}") set(ARCH_ID "${ARCH}")

View file

@ -41,6 +41,8 @@ set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) # Find programs on host
set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) # Find libs in target set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) # Find libs in target
set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) # Find includes in target set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) # Find includes in target
set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR} CACHE STRING "" FORCE)
# specify the cross compiler to be used. Darwin uses clang provided by the SDK. # specify the cross compiler to be used. Darwin uses clang provided by the SDK.
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
SET(CMAKE_C_COMPILER @prefix@/native/bin/clang) SET(CMAKE_C_COMPILER @prefix@/native/bin/clang)
@ -85,6 +87,11 @@ endif()
if(ARCHITECTURE STREQUAL "i686" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") if(ARCHITECTURE STREQUAL "i686" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(LINUX_32 ON) SET(LINUX_32 ON)
SET(ARCH_ID "i386")
endif()
if(ARCHITECTURE STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(ARCH_ID "x86_64")
endif() endif()
#Create a new global cmake flag that indicates building with depends #Create a new global cmake flag that indicates building with depends

View file

@ -46,9 +46,12 @@ set(crypto_sources
skein.c skein.c
slow-hash.c slow-hash.c
CryptonightR_JIT.c CryptonightR_JIT.c
CryptonightR_template.S
tree-hash.c) tree-hash.c)
if(ARCH_ID STREQUAL "i386" OR ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64")
list(APPEND crypto_sources CryptonightR_template.S)
endif()
set(crypto_headers) set(crypto_headers)
set(crypto_private_headers set(crypto_private_headers

View file

@ -12,6 +12,7 @@
#include "CryptonightR_template.h" #include "CryptonightR_template.h"
static const uint8_t prologue[] = { static const uint8_t prologue[] = {
#if defined __i386 || defined __x86_64__
0x4C, 0x8B, 0xD7, // mov r10, rdi 0x4C, 0x8B, 0xD7, // mov r10, rdi
0x53, // push rbx 0x53, // push rbx
0x55, // push rbp 0x55, // push rbp
@ -26,9 +27,11 @@ static const uint8_t prologue[] = {
0x41, 0x8B, 0x42, 0x18, // mov eax, DWORD PTR [r10+24] 0x41, 0x8B, 0x42, 0x18, // mov eax, DWORD PTR [r10+24]
0x41, 0x8B, 0x52, 0x1C, // mov edx, DWORD PTR [r10+28] 0x41, 0x8B, 0x52, 0x1C, // mov edx, DWORD PTR [r10+28]
0x45, 0x8B, 0x4A, 0x20, // mov r9d, DWORD PTR [r10+32] 0x45, 0x8B, 0x4A, 0x20, // mov r9d, DWORD PTR [r10+32]
#endif
}; };
static const uint8_t epilogue[] = { static const uint8_t epilogue[] = {
#if defined __i386 || defined __x86_64__
0x49, 0x8B, 0xE3, // mov rsp, r11 0x49, 0x8B, 0xE3, // mov rsp, r11
0x41, 0x89, 0x1A, // mov DWORD PTR [r10], ebx 0x41, 0x89, 0x1A, // mov DWORD PTR [r10], ebx
0x41, 0x89, 0x72, 0x04, // mov DWORD PTR [r10+4], esi 0x41, 0x89, 0x72, 0x04, // mov DWORD PTR [r10+4], esi
@ -38,6 +41,7 @@ static const uint8_t epilogue[] = {
0x5D, // pop rbp 0x5D, // pop rbp
0x5B, // pop rbx 0x5B, // pop rbx
0xC3, // ret 0xC3, // ret
#endif
}; };
#define APPEND_CODE(src, size) \ #define APPEND_CODE(src, size) \
@ -50,6 +54,7 @@ static const uint8_t epilogue[] = {
int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_func buf, const size_t buf_size) int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_func buf, const size_t buf_size)
{ {
#if defined __i386 || defined __x86_64__
uint8_t* JIT_code = (uint8_t*) buf; uint8_t* JIT_code = (uint8_t*) buf;
const uint8_t* JIT_code_end = JIT_code + buf_size; const uint8_t* JIT_code_end = JIT_code + buf_size;
@ -99,4 +104,7 @@ int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_f
__builtin___clear_cache((char*)buf, (char*)JIT_code); __builtin___clear_cache((char*)buf, (char*)JIT_code);
return 0; return 0;
#else
return 1;
#endif
} }

View file

@ -8,7 +8,11 @@
// - Call v4_generate_JIT_code with "buf" pointed to memory allocated on previous step // - Call v4_generate_JIT_code with "buf" pointed to memory allocated on previous step
// - Call the generated code instead of "v4_random_math(code, r)", omit the "code" parameter // - Call the generated code instead of "v4_random_math(code, r)", omit the "code" parameter
typedef void (*v4_random_math_JIT_func)(uint32_t* r) __attribute__((sysv_abi)); typedef void (*v4_random_math_JIT_func)(uint32_t* r)
#if defined __i386 || defined __x86_64__
__attribute__((sysv_abi))
#endif
;
// Given the random math sequence, generates machine code (x86-64) for it // Given the random math sequence, generates machine code (x86-64) for it
// Returns 0 if code was generated successfully // Returns 0 if code was generated successfully

View file

@ -1,6 +1,8 @@
#ifndef CRYPTONIGHTR_TEMPLATE_H #ifndef CRYPTONIGHTR_TEMPLATE_H
#define CRYPTONIGHTR_TEMPLATE_H #define CRYPTONIGHTR_TEMPLATE_H
#if defined __i386 || defined __x86_64__
void CryptonightR_instruction0(void); void CryptonightR_instruction0(void);
void CryptonightR_instruction1(void); void CryptonightR_instruction1(void);
void CryptonightR_instruction2(void); void CryptonightR_instruction2(void);
@ -1036,4 +1038,6 @@ const void* instructions_mov[257] = {
CryptonightR_instruction_mov256, CryptonightR_instruction_mov256,
}; };
#endif
#endif // CRYPTONIGHTR_TEMPLATE_H #endif // CRYPTONIGHTR_TEMPLATE_H

View file

@ -65,6 +65,31 @@ static void local_abort(const char *msg)
#endif #endif
} }
volatile int use_v4_jit_flag = -1;
static inline int use_v4_jit(void)
{
#if defined(__x86_64__)
if (use_v4_jit_flag != -1)
return use_v4_jit_flag;
const char *env = getenv("MONERO_USE_CNV4_JIT");
if (!env) {
use_v4_jit_flag = 0;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use_v4_jit_flag = 0;
}
else {
use_v4_jit_flag = 1;
}
return use_v4_jit_flag;
#else
return 0;
#endif
}
#define VARIANT1_1(p) \ #define VARIANT1_1(p) \
do if (variant == 1) \ do if (variant == 1) \
{ \ { \
@ -494,31 +519,6 @@ STATIC INLINE int force_software_aes(void)
return use; return use;
} }
volatile int use_v4_jit_flag = -1;
STATIC INLINE int use_v4_jit(void)
{
#if defined(__x86_64__)
if (use_v4_jit_flag != -1)
return use_v4_jit_flag;
const char *env = getenv("MONERO_USE_CNV4_JIT");
if (!env) {
use_v4_jit_flag = 0;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use_v4_jit_flag = 0;
}
else {
use_v4_jit_flag = 1;
}
return use_v4_jit_flag;
#else
return 0;
#endif
}
STATIC INLINE int check_aes_hw(void) STATIC INLINE int check_aes_hw(void)
{ {
int cpuid_results[4]; int cpuid_results[4];
@ -1029,6 +1029,8 @@ void slow_hash_free_state(void)
#define U64(x) ((uint64_t *) (x)) #define U64(x) ((uint64_t *) (x))
#define hp_jitfunc ((v4_random_math_JIT_func)NULL)
STATIC INLINE void xor64(uint64_t *a, const uint64_t b) STATIC INLINE void xor64(uint64_t *a, const uint64_t b)
{ {
*a ^= b; *a ^= b;
@ -1574,6 +1576,8 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
#else #else
// Portable implementation as a fallback // Portable implementation as a fallback
#define hp_jitfunc ((v4_random_math_JIT_func)NULL)
void slow_hash_allocate_state(void) void slow_hash_allocate_state(void)
{ {
// Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c // Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c