From 1f609c7ebdb48acb5372417e4af07e57c9bc908e Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 13:05:03 +0300 Subject: [PATCH 01/10] Update libcpuid to recent git version. --- src/3rdparty/libcpuid/asm-bits.c | 1661 +++++++++++---------- src/3rdparty/libcpuid/asm-bits.h | 124 +- src/3rdparty/libcpuid/cpuid_main.c | 56 +- src/3rdparty/libcpuid/libcpuid.h | 39 +- src/3rdparty/libcpuid/libcpuid_internal.h | 5 +- src/3rdparty/libcpuid/libcpuid_types.h | 28 +- src/3rdparty/libcpuid/recog_amd.c | 4 + src/3rdparty/libcpuid/recog_intel.c | 3 +- 8 files changed, 991 insertions(+), 929 deletions(-) diff --git a/src/3rdparty/libcpuid/asm-bits.c b/src/3rdparty/libcpuid/asm-bits.c index b8e32284f..bfabd404e 100644 --- a/src/3rdparty/libcpuid/asm-bits.c +++ b/src/3rdparty/libcpuid/asm-bits.c @@ -1,825 +1,836 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "libcpuid.h" -#include "asm-bits.h" - -int cpuid_exists_by_eflags(void) -{ -#if defined(PLATFORM_X64) - return 1; /* CPUID is always present on the x86_64 */ -#elif defined(PLATFORM_X86) -# if defined(COMPILER_GCC) - int result; - __asm __volatile( - " pushfl\n" - " pop %%eax\n" - " mov %%eax, %%ecx\n" - " xor $0x200000, %%eax\n" - " push %%eax\n" - " popfl\n" - " pushfl\n" - " pop %%eax\n" - " xor %%ecx, %%eax\n" - " mov %%eax, %0\n" - " push %%ecx\n" - " popfl\n" - : "=m"(result) - : :"eax", "ecx", "memory"); - return (result != 0); -# elif defined(COMPILER_MICROSOFT) - int result; - __asm { - pushfd - pop eax - mov ecx, eax - xor eax, 0x200000 - push eax - popfd - pushfd - pop eax - xor eax, ecx - mov result, eax - push ecx - popfd - }; - return (result != 0); -# else - return 0; -# endif /* COMPILER_MICROSOFT */ -#else - return 0; -#endif /* PLATFORM_X86 */ -} - -#ifdef INLINE_ASM_SUPPORTED -/* - * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions - * are implemented in separate .asm files. Otherwise, use inline assembly - */ -void exec_cpuid(uint32_t *regs) -{ -#ifdef COMPILER_GCC -# ifdef PLATFORM_X64 - __asm __volatile( - " mov %0, %%rdi\n" - - " push %%rbx\n" - " push %%rcx\n" - " push %%rdx\n" - - " mov (%%rdi), %%eax\n" - " mov 4(%%rdi), %%ebx\n" - " mov 8(%%rdi), %%ecx\n" - " mov 12(%%rdi), %%edx\n" - - " cpuid\n" - - " movl %%eax, (%%rdi)\n" - " movl %%ebx, 4(%%rdi)\n" - " movl %%ecx, 8(%%rdi)\n" - " movl %%edx, 12(%%rdi)\n" - " pop %%rdx\n" - " pop %%rcx\n" - " pop %%rbx\n" - : - :"m"(regs) - :"memory", "eax", "rdi" - ); -# else - __asm __volatile( - " mov %0, %%edi\n" - - " push %%ebx\n" - " push %%ecx\n" - " push %%edx\n" - - " mov (%%edi), %%eax\n" - " mov 4(%%edi), %%ebx\n" - " mov 8(%%edi), %%ecx\n" - " mov 12(%%edi), %%edx\n" - - " cpuid\n" - - " mov %%eax, (%%edi)\n" - " mov %%ebx, 4(%%edi)\n" - " mov %%ecx, 8(%%edi)\n" - " mov %%edx, 12(%%edi)\n" - " pop %%edx\n" - " pop %%ecx\n" - " pop %%ebx\n" - : - :"m"(regs) - :"memory", "eax", "edi" - ); -# endif /* COMPILER_GCC */ -#else -# ifdef COMPILER_MICROSOFT - __asm { - push ebx - push ecx - push edx - push edi - mov edi, regs - - mov eax, [edi] - mov ebx, [edi+4] - mov ecx, [edi+8] - mov edx, [edi+12] - - cpuid - - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - - pop edi - pop edx - pop ecx - pop ebx - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif -} -#endif /* INLINE_ASSEMBLY_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void cpu_rdtsc(uint64_t* result) -{ - uint32_t low_part, hi_part; -#ifdef COMPILER_GCC - __asm __volatile ( - " rdtsc\n" - " mov %%eax, %0\n" - " mov %%edx, %1\n" - :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx" - ); -#else -# ifdef COMPILER_MICROSOFT - __asm { - rdtsc - mov low_part, eax - mov hi_part, edx - }; -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ - *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32); -} -#endif /* INLINE_ASM_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void busy_sse_loop(int cycles) -{ -#ifdef COMPILER_GCC -#ifndef __APPLE__ -# define XALIGN ".balign 16\n" -#else -# define XALIGN ".align 4\n" -#endif - __asm __volatile ( - " xorps %%xmm0, %%xmm0\n" - " xorps %%xmm1, %%xmm1\n" - " xorps %%xmm2, %%xmm2\n" - " xorps %%xmm3, %%xmm3\n" - " xorps %%xmm4, %%xmm4\n" - " xorps %%xmm5, %%xmm5\n" - " xorps %%xmm6, %%xmm6\n" - " xorps %%xmm7, %%xmm7\n" - XALIGN - /* ".bsLoop:\n" */ - "1:\n" - // 0: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 1: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 2: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 3: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 4: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 5: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 6: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 7: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 8: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 9: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //10: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //11: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //12: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //13: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //14: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //15: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //16: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //17: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //18: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //19: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //20: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //21: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //22: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //23: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //24: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //25: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //26: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //27: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //28: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //29: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //30: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //31: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - - " dec %%eax\n" - /* "jnz .bsLoop\n" */ - " jnz 1b\n" - ::"a"(cycles) - ); -#else -# ifdef COMPILER_MICROSOFT - __asm { - mov eax, cycles - xorps xmm0, xmm0 - xorps xmm1, xmm1 - xorps xmm2, xmm2 - xorps xmm3, xmm3 - xorps xmm4, xmm4 - xorps xmm5, xmm5 - xorps xmm6, xmm6 - xorps xmm7, xmm7 - //-- - align 16 -bsLoop: - // 0: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 1: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 2: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 3: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 4: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 5: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 6: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 7: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 8: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 9: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 10: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 11: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 12: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 13: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 14: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 15: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 16: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 17: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 18: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 19: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 20: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 21: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 22: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 23: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 24: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 25: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 26: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 27: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 28: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 29: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 30: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 31: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - //---------------------- - dec eax - jnz bsLoop - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ -} -#endif /* INLINE_ASSEMBLY_SUPPORTED */ +/* + * Copyright 2008 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "libcpuid.h" +#include "asm-bits.h" + +int cpuid_exists_by_eflags(void) +{ +#if defined(PLATFORM_X64) + return 1; /* CPUID is always present on the x86_64 */ +#elif defined(PLATFORM_X86) +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) + int result; + __asm __volatile( + " pushfl\n" + " pop %%eax\n" + " mov %%eax, %%ecx\n" + " xor $0x200000, %%eax\n" + " push %%eax\n" + " popfl\n" + " pushfl\n" + " pop %%eax\n" + " xor %%ecx, %%eax\n" + " mov %%eax, %0\n" + " push %%ecx\n" + " popfl\n" + : "=m"(result) + : :"eax", "ecx", "memory"); + return (result != 0); +# elif defined(COMPILER_MICROSOFT) + int result; + __asm { + pushfd + pop eax + mov ecx, eax + xor eax, 0x200000 + push eax + popfd + pushfd + pop eax + xor eax, ecx + mov result, eax + push ecx + popfd + }; + return (result != 0); +# else + return 0; +# endif /* COMPILER_MICROSOFT */ +#elif defined(PLATFORM_ARM) + return 0; +#else + return 0; +#endif /* PLATFORM_X86 */ +} + +#ifdef INLINE_ASM_SUPPORTED +/* + * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions + * are implemented in separate .asm files. Otherwise, use inline assembly + */ +void exec_cpuid(uint32_t *regs) +{ +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +# ifdef PLATFORM_X64 + __asm __volatile( + " mov %0, %%rdi\n" + + " push %%rbx\n" + " push %%rcx\n" + " push %%rdx\n" + + " mov (%%rdi), %%eax\n" + " mov 4(%%rdi), %%ebx\n" + " mov 8(%%rdi), %%ecx\n" + " mov 12(%%rdi), %%edx\n" + + " cpuid\n" + + " movl %%eax, (%%rdi)\n" + " movl %%ebx, 4(%%rdi)\n" + " movl %%ecx, 8(%%rdi)\n" + " movl %%edx, 12(%%rdi)\n" + " pop %%rdx\n" + " pop %%rcx\n" + " pop %%rbx\n" + : + :"m"(regs) + :"memory", "eax", "rdi" + ); +# elif defined(PLATFORM_X86) + __asm __volatile( + " mov %0, %%edi\n" + + " push %%ebx\n" + " push %%ecx\n" + " push %%edx\n" + + " mov (%%edi), %%eax\n" + " mov 4(%%edi), %%ebx\n" + " mov 8(%%edi), %%ecx\n" + " mov 12(%%edi), %%edx\n" + + " cpuid\n" + + " mov %%eax, (%%edi)\n" + " mov %%ebx, 4(%%edi)\n" + " mov %%ecx, 8(%%edi)\n" + " mov %%edx, 12(%%edi)\n" + " pop %%edx\n" + " pop %%ecx\n" + " pop %%ebx\n" + : + :"m"(regs) + :"memory", "eax", "edi" + ); +# elif defined(PLATFORM_ARM) +# endif /* COMPILER_GCC */ +#else +# ifdef COMPILER_MICROSOFT + __asm { + push ebx + push ecx + push edx + push edi + mov edi, regs + + mov eax, [edi] + mov ebx, [edi+4] + mov ecx, [edi+8] + mov edx, [edi+12] + + cpuid + + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + + pop edi + pop edx + pop ecx + pop ebx + } +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif +} +#endif /* INLINE_ASSEMBLY_SUPPORTED */ + +#ifdef INLINE_ASM_SUPPORTED +void cpu_rdtsc(uint64_t* result) +{ + uint32_t low_part, hi_part; +#if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +#ifdef PLATFORM_ARM + low_part = 0; + hi_part = 0; +#else + __asm __volatile ( + " rdtsc\n" + " mov %%eax, %0\n" + " mov %%edx, %1\n" + :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx" + ); +#endif +#else +# ifdef COMPILER_MICROSOFT + __asm { + rdtsc + mov low_part, eax + mov hi_part, edx + }; +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif /* COMPILER_GCC */ + *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32); +} +#endif /* INLINE_ASM_SUPPORTED */ + +#ifdef INLINE_ASM_SUPPORTED +void busy_sse_loop(int cycles) +{ +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +#ifndef __APPLE__ +# define XALIGN ".balign 16\n" +#else +# define XALIGN ".align 4\n" +#endif +#ifdef PLATFORM_ARM +#else + __asm __volatile ( + " xorps %%xmm0, %%xmm0\n" + " xorps %%xmm1, %%xmm1\n" + " xorps %%xmm2, %%xmm2\n" + " xorps %%xmm3, %%xmm3\n" + " xorps %%xmm4, %%xmm4\n" + " xorps %%xmm5, %%xmm5\n" + " xorps %%xmm6, %%xmm6\n" + " xorps %%xmm7, %%xmm7\n" + XALIGN + /* ".bsLoop:\n" */ + "1:\n" + // 0: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 1: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 2: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 3: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 4: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 5: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 6: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 7: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 8: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 9: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //10: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //11: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //12: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //13: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //14: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //15: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //16: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //17: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //18: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //19: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //20: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //21: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //22: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //23: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //24: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //25: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //26: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //27: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //28: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //29: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //30: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //31: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + + " dec %%eax\n" + /* "jnz .bsLoop\n" */ + " jnz 1b\n" + ::"a"(cycles) + ); +#endif +#else +# ifdef COMPILER_MICROSOFT + __asm { + mov eax, cycles + xorps xmm0, xmm0 + xorps xmm1, xmm1 + xorps xmm2, xmm2 + xorps xmm3, xmm3 + xorps xmm4, xmm4 + xorps xmm5, xmm5 + xorps xmm6, xmm6 + xorps xmm7, xmm7 + //-- + align 16 +bsLoop: + // 0: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 1: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 2: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 3: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 4: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 5: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 6: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 7: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 8: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 9: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 10: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 11: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 12: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 13: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 14: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 15: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 16: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 17: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 18: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 19: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 20: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 21: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 22: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 23: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 24: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 25: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 26: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 27: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 28: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 29: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 30: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 31: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + //---------------------- + dec eax + jnz bsLoop + } +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif /* COMPILER_GCC */ +} +#endif /* INLINE_ASSEMBLY_SUPPORTED */ \ No newline at end of file diff --git a/src/3rdparty/libcpuid/asm-bits.h b/src/3rdparty/libcpuid/asm-bits.h index 3a03e11ce..9049e2fe9 100644 --- a/src/3rdparty/libcpuid/asm-bits.h +++ b/src/3rdparty/libcpuid/asm-bits.h @@ -1,53 +1,71 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __ASM_BITS_H__ -#define __ASM_BITS_H__ -#include "libcpuid.h" - -/* Determine Compiler: */ -#if defined(_MSC_VER) -# define COMPILER_MICROSOFT -#elif defined(__GNUC__) -# define COMPILER_GCC -#endif - -/* Determine Platform */ -#if defined(__x86_64__) || defined(_M_AMD64) -# define PLATFORM_X64 -#elif defined(__i386__) || defined(_M_IX86) -# define PLATFORM_X86 -#endif - -/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */ -#if (defined(COMPILER_GCC) && defined(PLATFORM_X64)) || defined(PLATFORM_X86) -# define INLINE_ASM_SUPPORTED -#endif - -int cpuid_exists_by_eflags(void); -void exec_cpuid(uint32_t *regs); -void busy_sse_loop(int cycles); - -#endif /* __ASM_BITS_H__ */ +/* + * Copyright 2008 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __ASM_BITS_H__ +#define __ASM_BITS_H__ +#include "libcpuid.h" + +/* Determine Compiler: */ +#if defined(_MSC_VER) +#if !defined(COMPILER_MICROSOFT) +# define COMPILER_MICROSOFT +#endif +#elif defined(__GNUC__) +#if !defined(COMPILER_GCC) +# define COMPILER_GCC +#endif +#elif defined(__clang__) +#if !defined(COMPILER_CLANG) +# define COMPILER_CLANG +#endif +#endif + +/* Determine Platform */ +#if defined(__x86_64__) || defined(_M_AMD64) +#if !defined(PLATFORM_X64) +# define PLATFORM_X64 +#endif +#elif defined(__i386__) || defined(_M_IX86) +#if !defined(PLATFORM_X86) +# define PLATFORM_X86 +#endif +#elif defined(__ARMEL__) +#if !defined(PLATFORM_ARM) +# define PLATFORM_ARM +#endif +#endif + +/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */ +#if (((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && \ + (defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM))) || \ + (defined(COMPILER_MICROSOFT) && defined(PLATFORM_X86)) +# define INLINE_ASM_SUPPORTED +#endif + +int cpuid_exists_by_eflags(void); +void exec_cpuid(uint32_t *regs); +void busy_sse_loop(int cycles); + +#endif /* __ASM_BITS_H__ */ diff --git a/src/3rdparty/libcpuid/cpuid_main.c b/src/3rdparty/libcpuid/cpuid_main.c index f22c7dd69..61cb638db 100644 --- a/src/3rdparty/libcpuid/cpuid_main.c +++ b/src/3rdparty/libcpuid/cpuid_main.c @@ -221,42 +221,42 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str) { - int i; - cpu_vendor_t vendor = VENDOR_UNKNOWN; - const struct { cpu_vendor_t vendor; char match[16]; } - matchtable[NUM_CPU_VENDORS] = { - /* source: http://www.sandpile.org/ia32/cpuid.htm */ - { VENDOR_INTEL , "GenuineIntel" }, - { VENDOR_AMD , "AuthenticAMD" }, - { VENDOR_CYRIX , "CyrixInstead" }, - { VENDOR_NEXGEN , "NexGenDriven" }, - { VENDOR_TRANSMETA , "GenuineTMx86" }, - { VENDOR_UMC , "UMC UMC UMC " }, - { VENDOR_CENTAUR , "CentaurHauls" }, - { VENDOR_RISE , "RiseRiseRise" }, - { VENDOR_SIS , "SiS SiS SiS " }, - { VENDOR_NSC , "Geode by NSC" }, - }; + int i; + cpu_vendor_t vendor = VENDOR_UNKNOWN; + const struct { cpu_vendor_t vendor; char match[16]; } + matchtable[NUM_CPU_VENDORS] = { + /* source: http://www.sandpile.org/ia32/cpuid.htm */ + { VENDOR_INTEL , "GenuineIntel" }, + { VENDOR_AMD , "AuthenticAMD" }, + { VENDOR_CYRIX , "CyrixInstead" }, + { VENDOR_NEXGEN , "NexGenDriven" }, + { VENDOR_TRANSMETA , "GenuineTMx86" }, + { VENDOR_UMC , "UMC UMC UMC " }, + { VENDOR_CENTAUR , "CentaurHauls" }, + { VENDOR_RISE , "RiseRiseRise" }, + { VENDOR_SIS , "SiS SiS SiS " }, + { VENDOR_NSC , "Geode by NSC" }, + }; - memcpy(vendor_str + 0, &raw_vendor[1], 4); - memcpy(vendor_str + 4, &raw_vendor[3], 4); - memcpy(vendor_str + 8, &raw_vendor[2], 4); - vendor_str[12] = 0; + memcpy(vendor_str + 0, &raw_vendor[1], 4); + memcpy(vendor_str + 4, &raw_vendor[3], 4); + memcpy(vendor_str + 8, &raw_vendor[2], 4); + vendor_str[12] = 0; - /* Determine vendor: */ - for (i = 0; i < NUM_CPU_VENDORS; i++) - if (!strcmp(vendor_str, matchtable[i].match)) { - vendor = matchtable[i].vendor; - break; - } - return vendor; + /* Determine vendor: */ + for (i = 0; i < NUM_CPU_VENDORS; i++) + if (!strcmp(vendor_str, matchtable[i].match)) { + vendor = matchtable[i].vendor; + break; + } + return vendor; } static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) { int i, j, basic, xmodel, xfamily, ext; char brandstr[64] = {0}; - data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str); + data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str); if (data->vendor == VENDOR_UNKNOWN) return set_error(ERR_CPU_UNKN); diff --git a/src/3rdparty/libcpuid/libcpuid.h b/src/3rdparty/libcpuid/libcpuid.h index c44990c31..847e5a4a8 100644 --- a/src/3rdparty/libcpuid/libcpuid.h +++ b/src/3rdparty/libcpuid/libcpuid.h @@ -60,7 +60,7 @@ */ /** @mainpage A simple libcpuid introduction - * + * * LibCPUID provides CPU identification and access to the CPUID and RDTSC * instructions on the x86. *

@@ -82,6 +82,7 @@ */ /** @defgroup libcpuid LibCPUID + * @brief LibCPUID provides CPU identification @{ */ /* Include some integer type specifications: */ @@ -535,23 +536,23 @@ typedef enum { * @brief Describes common library error codes */ typedef enum { - ERR_OK = 0, /*!< "No error" */ - ERR_NO_CPUID = -1, /*!< "CPUID instruction is not supported" */ - ERR_NO_RDTSC = -2, /*!< "RDTSC instruction is not supported" */ - ERR_NO_MEM = -3, /*!< "Memory allocation failed" */ - ERR_OPEN = -4, /*!< "File open operation failed" */ - ERR_BADFMT = -5, /*!< "Bad file format" */ - ERR_NOT_IMP = -6, /*!< "Not implemented" */ - ERR_CPU_UNKN = -7, /*!< "Unsupported processor" */ - ERR_NO_RDMSR = -8, /*!< "RDMSR instruction is not supported" */ - ERR_NO_DRIVER= -9, /*!< "RDMSR driver error (generic)" */ - ERR_NO_PERMS = -10, /*!< "No permissions to install RDMSR driver" */ - ERR_EXTRACT = -11, /*!< "Cannot extract RDMSR driver (read only media?)" */ - ERR_HANDLE = -12, /*!< "Bad handle" */ - ERR_INVMSR = -13, /*!< "Invalid MSR" */ - ERR_INVCNB = -14, /*!< "Invalid core number" */ - ERR_HANDLE_R = -15, /*!< "Error on handle read" */ - ERR_INVRANGE = -16, /*!< "Invalid given range" */ + ERR_OK = 0, /*!< No error */ + ERR_NO_CPUID = -1, /*!< CPUID instruction is not supported */ + ERR_NO_RDTSC = -2, /*!< RDTSC instruction is not supported */ + ERR_NO_MEM = -3, /*!< Memory allocation failed */ + ERR_OPEN = -4, /*!< File open operation failed */ + ERR_BADFMT = -5, /*!< Bad file format */ + ERR_NOT_IMP = -6, /*!< Not implemented */ + ERR_CPU_UNKN = -7, /*!< Unsupported processor */ + ERR_NO_RDMSR = -8, /*!< RDMSR instruction is not supported */ + ERR_NO_DRIVER= -9, /*!< RDMSR driver error (generic) */ + ERR_NO_PERMS = -10, /*!< No permissions to install RDMSR driver */ + ERR_EXTRACT = -11, /*!< Cannot extract RDMSR driver (read only media?) */ + ERR_HANDLE = -12, /*!< Bad handle */ + ERR_INVMSR = -13, /*!< Invalid MSR */ + ERR_INVCNB = -14, /*!< Invalid core number */ + ERR_HANDLE_R = -15, /*!< Error on handle read */ + ERR_INVRANGE = -16, /*!< Invalid given range */ } cpu_error_t; /** @@ -668,7 +669,7 @@ struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw); const char* cpuid_lib_version(void); #ifdef __cplusplus -}; /* extern "C" */ +} /* extern "C" */ #endif diff --git a/src/3rdparty/libcpuid/libcpuid_internal.h b/src/3rdparty/libcpuid/libcpuid_internal.h index 038aa2092..648046162 100644 --- a/src/3rdparty/libcpuid/libcpuid_internal.h +++ b/src/3rdparty/libcpuid/libcpuid_internal.h @@ -75,8 +75,9 @@ enum _intel_bits_t { _3 = LBIT( 14 ), _5 = LBIT( 15 ), _7 = LBIT( 16 ), - XEON_ = LBIT( 17 ), - ATOM_ = LBIT( 18 ), + _9 = LBIT( 17 ), + XEON_ = LBIT( 18 ), + ATOM_ = LBIT( 19 ), }; typedef enum _intel_bits_t intel_bits_t; diff --git a/src/3rdparty/libcpuid/libcpuid_types.h b/src/3rdparty/libcpuid/libcpuid_types.h index 9e897275f..0e667aa67 100644 --- a/src/3rdparty/libcpuid/libcpuid_types.h +++ b/src/3rdparty/libcpuid/libcpuid_types.h @@ -32,6 +32,32 @@ #ifndef __LIBCPUID_TYPES_H__ #define __LIBCPUID_TYPES_H__ -#include +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +# include +#else +/* we have to provide our own: */ +# if !defined(__int32_t_defined) +typedef int int32_t; +# endif + +# if !defined(__uint32_t_defined) +typedef unsigned uint32_t; +# endif + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef signed short int16_t; +typedef unsigned short uint16_t; +#if (defined _MSC_VER) && (_MSC_VER <= 1300) + /* MSVC 6.0: no long longs ... */ + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; +#else + /* all other sane compilers: */ + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#endif + +#endif #endif /* __LIBCPUID_TYPES_H__ */ diff --git a/src/3rdparty/libcpuid/recog_amd.c b/src/3rdparty/libcpuid/recog_amd.c index 352d733b8..4726f6339 100644 --- a/src/3rdparty/libcpuid/recog_amd.c +++ b/src/3rdparty/libcpuid/recog_amd.c @@ -49,6 +49,10 @@ enum _amd_model_codes_t { _1400, _1500, _1600, + _1900, + _2400, + _2500, + _2700, }; static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) diff --git a/src/3rdparty/libcpuid/recog_intel.c b/src/3rdparty/libcpuid/recog_intel.c index 5467c19f0..397d750e1 100644 --- a/src/3rdparty/libcpuid/recog_intel.c +++ b/src/3rdparty/libcpuid/recog_intel.c @@ -376,7 +376,7 @@ static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data) bits |= bit_matchtable[i].bit; } - if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) { + if ((i = match_pattern(bs, "Core(TM) [im][3579]")) != 0) { bits |= CORE_; i--; switch (bs[i + 9]) { @@ -387,6 +387,7 @@ static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data) case '3': bits |= _3; break; case '5': bits |= _5; break; case '7': bits |= _7; break; + case '9': bits |= _9; break; } } for (i = 0; i < COUNT_OF(matchtable); i++) From ee4d980955f749e82e6b0bc944b155c7f00ab131 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 17:51:56 +0300 Subject: [PATCH 02/10] Old static class Cpu replaced to interface ICpuInfo. --- CMakeLists.txt | 13 +- src/App.cpp | 2 +- src/Cpu.h | 66 --------- src/Summary.cpp | 18 +-- src/api/ApiRouter.cpp | 11 +- .../cpu/BasicCpuInfo.cpp} | 60 +++----- src/common/cpu/BasicCpuInfo.h | 70 ++++++++++ .../cpu/BasicCpuInfo_arm.cpp} | 34 ++--- src/{Cpu_unix.cpp => common/cpu/Cpu.cpp} | 45 +++--- src/{Cpu_mac.cpp => common/cpu/Cpu.h} | 30 ++-- src/common/interfaces/ICpuInfo.h | 59 ++++++++ src/common/xmrig.h | 8 ++ src/core/Config.cpp | 18 +-- src/core/Controller.cpp | 2 +- src/{Cpu.cpp => core/cpu/AdvancedCpuInfo.cpp} | 132 ++++++++---------- src/core/cpu/AdvancedCpuInfo.h | 75 ++++++++++ src/{Cpu_win.cpp => core/cpu/Cpu.cpp} | 40 ++++-- src/workers/Worker.cpp | 4 +- 18 files changed, 406 insertions(+), 281 deletions(-) delete mode 100644 src/Cpu.h rename src/{Cpu_stub.cpp => common/cpu/BasicCpuInfo.cpp} (77%) create mode 100644 src/common/cpu/BasicCpuInfo.h rename src/{Cpu_arm.cpp => common/cpu/BasicCpuInfo_arm.cpp} (74%) rename src/{Cpu_unix.cpp => common/cpu/Cpu.cpp} (71%) rename src/{Cpu_mac.cpp => common/cpu/Cpu.h} (75%) create mode 100644 src/common/interfaces/ICpuInfo.h rename src/{Cpu.cpp => core/cpu/AdvancedCpuInfo.cpp} (57%) create mode 100644 src/core/cpu/AdvancedCpuInfo.h rename src/{Cpu_win.cpp => core/cpu/Cpu.cpp} (69%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b779b74d4..6f7d62910 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ set(HEADERS src/common/config/ConfigLoader.h src/common/config/ConfigWatcher.h src/common/Console.h + src/common/cpu/Cpu.h src/common/crypto/Algorithm.h src/common/crypto/keccak.h src/common/interfaces/IClientListener.h @@ -27,6 +28,7 @@ set(HEADERS src/common/interfaces/IConfigCreator.h src/common/interfaces/IConsoleListener.h src/common/interfaces/IControllerListener.h + src/common/interfaces/ICpuInfo.h src/common/interfaces/ILogBackend.h src/common/interfaces/IStrategy.h src/common/interfaces/IStrategyListener.h @@ -49,7 +51,6 @@ set(HEADERS src/common/xmrig.h src/core/ConfigLoader_platform.h src/core/Controller.h - src/Cpu.h src/interfaces/IJobResultListener.h src/interfaces/IThread.h src/interfaces/IWorker.h @@ -135,7 +136,6 @@ if (WIN32) res/app.rc src/App_win.cpp src/common/Platform_win.cpp - src/Cpu_win.cpp src/Mem_win.cpp ) @@ -145,14 +145,12 @@ elseif (APPLE) set(SOURCES_OS src/App_unix.cpp src/common/Platform_mac.cpp - src/Cpu_mac.cpp src/Mem_unix.cpp ) else() set(SOURCES_OS src/App_unix.cpp src/common/Platform_unix.cpp - src/Cpu_unix.cpp src/Mem_unix.cpp ) @@ -184,14 +182,15 @@ if (WITH_LIBCPUID) include_directories(src/3rdparty/libcpuid) set(CPUID_LIB cpuid) - set(SOURCES_CPUID src/Cpu.cpp) + set(SOURCES_CPUID src/core/cpu/AdvancedCpuInfo.h src/core/cpu/AdvancedCpuInfo.cpp src/core/cpu/Cpu.cpp) else() add_definitions(/DXMRIG_NO_LIBCPUID) + set(SOURCES_CPUID src/common/cpu/BasicCpuInfo.h src/common/cpu/Cpu.cpp) if (XMRIG_ARM) - set(SOURCES_CPUID src/Cpu_arm.cpp) + set(SOURCES_CPUID ${SOURCES_CPUID} src/common/cpu/BasicCpuInfo_arm.cpp) else() - set(SOURCES_CPUID src/Cpu_stub.cpp) + set(SOURCES_CPUID ${SOURCES_CPUID} src/common/cpu/BasicCpuInfo.cpp) endif() endif() diff --git a/src/App.cpp b/src/App.cpp index adcc57524..134e4ef56 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -29,11 +29,11 @@ #include "api/Api.h" #include "App.h" #include "common/Console.h" +#include "common/cpu/Cpu.h" #include "common/log/Log.h" #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "Cpu.h" #include "crypto/CryptoNight.h" #include "Mem.h" #include "net/Network.h" diff --git a/src/Cpu.h b/src/Cpu.h deleted file mode 100644 index a125bae80..000000000 --- a/src/Cpu.h +++ /dev/null @@ -1,66 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef __CPU_H__ -#define __CPU_H__ - - -#include - - -class Cpu -{ -public: - enum Flags { - X86_64 = 1, - AES = 2, - BMI2 = 4 - }; - - static size_t optimalThreadsCount(size_t size, int maxCpuUsage); - static void init(); - - static inline bool hasAES() { return (m_flags & AES) != 0; } - static inline bool isX64() { return (m_flags & X86_64) != 0; } - static inline const char *brand() { return m_brand; } - static inline int cores() { return m_totalCores; } - static inline int l2() { return m_l2_cache; } - static inline int l3() { return m_l3_cache; } - static inline int sockets() { return m_sockets; } - static inline int threads() { return m_totalThreads; } - -private: - static void initCommon(); - - static bool m_l2_exclusive; - static char m_brand[64]; - static int m_flags; - static int m_l2_cache; - static int m_l3_cache; - static int m_sockets; - static int m_totalCores; - static size_t m_totalThreads; -}; - - -#endif /* __CPU_H__ */ diff --git a/src/Summary.cpp b/src/Summary.cpp index de6b1234c..ba220e5b1 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -27,11 +27,11 @@ #include +#include "common/cpu/Cpu.h" #include "common/log/Log.h" #include "common/net/Pool.h" #include "core/Config.h" #include "core/Controller.h" -#include "Cpu.h" #include "Mem.h" #include "Summary.h" #include "version.h" @@ -52,21 +52,23 @@ static void print_memory(xmrig::Config *config) { static void print_cpu(xmrig::Config *config) { + using namespace xmrig; + if (config->isColors()) { Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES", "CPU", - Cpu::brand(), - Cpu::sockets(), - Cpu::isX64() ? "\x1B[1;32m" : "\x1B[1;31m-", - Cpu::hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-"); + Cpu::info()->brand(), + Cpu::info()->sockets(), + Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-", + Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-"); # ifndef XMRIG_NO_LIBCPUID - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0); + Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); # endif } else { - Log::i()->text(" * %-13s%s (%d) %sx64 %sAES", "CPU", Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-"); + Log::i()->text(" * %-13s%s (%d) %sx64 %sAES", "CPU", Cpu::info()->brand(), Cpu::info()->sockets(), Cpu::info()->isX64() ? "" : "-", Cpu::info()->hasAES() ? "" : "-"); # ifndef XMRIG_NO_LIBCPUID - Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0); + Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); # endif } } diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp index 2c62696ac..dd7accf69 100644 --- a/src/api/ApiRouter.cpp +++ b/src/api/ApiRouter.cpp @@ -35,12 +35,12 @@ #include "api/ApiRouter.h" #include "common/api/HttpReply.h" #include "common/api/HttpRequest.h" +#include "common/cpu/Cpu.h" #include "common/crypto/keccak.h" #include "common/net/Job.h" #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "Cpu.h" #include "interfaces/IThread.h" #include "rapidjson/document.h" #include "rapidjson/prettywriter.h" @@ -238,13 +238,14 @@ void ApiRouter::getIdentify(rapidjson::Document &doc) const void ApiRouter::getMiner(rapidjson::Document &doc) const { + using namespace xmrig; auto &allocator = doc.GetAllocator(); rapidjson::Value cpu(rapidjson::kObjectType); - cpu.AddMember("brand", rapidjson::StringRef(Cpu::brand()), allocator); - cpu.AddMember("aes", Cpu::hasAES(), allocator); - cpu.AddMember("x64", Cpu::isX64(), allocator); - cpu.AddMember("sockets", Cpu::sockets(), allocator); + cpu.AddMember("brand", rapidjson::StringRef(Cpu::info()->brand()), allocator); + cpu.AddMember("aes", Cpu::info()->hasAES(), allocator); + cpu.AddMember("x64", Cpu::info()->isX64(), allocator); + cpu.AddMember("sockets", Cpu::info()->sockets(), allocator); doc.AddMember("version", APP_VERSION, allocator); doc.AddMember("kind", APP_KIND, allocator); diff --git a/src/Cpu_stub.cpp b/src/common/cpu/BasicCpuInfo.cpp similarity index 77% rename from src/Cpu_stub.cpp rename to src/common/cpu/BasicCpuInfo.cpp index 8b27ad657..66af53ccf 100644 --- a/src/Cpu_stub.cpp +++ b/src/common/cpu/BasicCpuInfo.cpp @@ -21,6 +21,9 @@ * along with this program. If not, see . */ +#include +#include + #ifdef _MSC_VER # include @@ -32,14 +35,8 @@ # define bit_AES (1 << 25) #endif -#ifndef bit_BMI2 -# define bit_BMI2 (1 << 8) -#endif -#include - - -#include "Cpu.h" +#include "common/cpu/BasicCpuInfo.h" #define VENDOR_ID (0) @@ -96,43 +93,18 @@ static inline bool has_aes_ni() } -static inline bool has_bmi2() { - int cpu_info[4] = { 0 }; - cpuid(EXTENDED_FEATURES, cpu_info); - - return (cpu_info[EBX_Reg] & bit_BMI2) != 0; -} - - -char Cpu::m_brand[64] = { 0 }; -int Cpu::m_flags = 0; -int Cpu::m_l2_cache = 0; -int Cpu::m_l3_cache = 0; -int Cpu::m_sockets = 1; -int Cpu::m_totalCores = 0; -size_t Cpu::m_totalThreads = 0; - - -size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage) -{ - const size_t count = m_totalThreads / 2; - return count < 1 ? 1 : count; -} - - -void Cpu::initCommon() +xmrig::BasicCpuInfo::BasicCpuInfo() : + m_aes(has_aes_ni()), + m_brand(), + m_threads(std::thread::hardware_concurrency()) { cpu_brand_string(m_brand); - -# if defined(__x86_64__) || defined(_M_AMD64) - m_flags |= X86_64; -# endif - - if (has_aes_ni()) { - m_flags |= AES; - } - - if (has_bmi2()) { - m_flags |= BMI2; - } +} + + +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +{ + const size_t count = threads() / 2; + + return count < 1 ? 1 : count; } diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h new file mode 100644 index 000000000..f9d710d6f --- /dev/null +++ b/src/common/cpu/BasicCpuInfo.h @@ -0,0 +1,70 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_BASICCPUINFO_H +#define XMRIG_BASICCPUINFO_H + + +#include "common/interfaces/ICpuInfo.h" + + +namespace xmrig { + + +class BasicCpuInfo : public ICpuInfo +{ +public: + BasicCpuInfo(); + +protected: + size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + + inline Assembly assembly() const override { return ASM_NONE; } + inline bool hasAES() const override { return m_aes; } + inline bool isSupported() const override { return true; } + inline const char *brand() const override { return m_brand; } + inline int32_t cores() const override { return -1; } + inline int32_t L2() const override { return -1; } + inline int32_t L3() const override { return -1; } + inline int32_t nodes() const override { return -1; } + inline int32_t sockets() const override { return 1; } + inline int32_t threads() const override { return m_threads; } + +# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) + inline bool isX64() const override { return true; } +# else + inline bool isX64() const override { return false; } +# endif + +private: + bool m_aes; + char m_brand[64]; + int32_t m_threads; +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_BASICCPUINFO_H */ diff --git a/src/Cpu_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp similarity index 74% rename from src/Cpu_arm.cpp rename to src/common/cpu/BasicCpuInfo_arm.cpp index 3e259d0d8..34eb73882 100644 --- a/src/Cpu_arm.cpp +++ b/src/common/cpu/BasicCpuInfo_arm.cpp @@ -21,37 +21,27 @@ * along with this program. If not, see . */ - #include +#include -#include "Cpu.h" +#include "common/cpu/BasicCpuInfo.h" -char Cpu::m_brand[64] = { 0 }; -int Cpu::m_flags = 0; -int Cpu::m_l2_cache = 0; -int Cpu::m_l3_cache = 0; -int Cpu::m_sockets = 1; -int Cpu::m_totalCores = 0; -size_t Cpu::m_totalThreads = 0; - - -size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage) -{ - return m_totalThreads; -} - - -void Cpu::initCommon() +xmrig::BasicCpuInfo::BasicCpuInfo() : + m_aes(false), + m_brand(), + m_threads(std::thread::hardware_concurrency()) { memcpy(m_brand, "Unknown", 7); -# if defined (__arm64__) || defined (__aarch64__) - m_flags |= X86_64; -# endif - # if __ARM_FEATURE_CRYPTO m_flags |= AES; # endif } + + +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +{ + return threads(); +} diff --git a/src/Cpu_unix.cpp b/src/common/cpu/Cpu.cpp similarity index 71% rename from src/Cpu_unix.cpp rename to src/common/cpu/Cpu.cpp index b895c8979..b1bb28ac7 100644 --- a/src/Cpu_unix.cpp +++ b/src/common/cpu/Cpu.cpp @@ -22,33 +22,36 @@ */ -#ifdef __FreeBSD__ -# include -# include -# include -# include -#endif +#include -#include -#include -#include -#include +#include "common/cpu/BasicCpuInfo.h" +#include "common/cpu/Cpu.h" -#include "Cpu.h" +static xmrig::ICpuInfo *cpuInfo = nullptr; -#ifdef __FreeBSD__ -typedef cpuset_t cpu_set_t; -#endif - - -void Cpu::init() +xmrig::ICpuInfo *xmrig::Cpu::info() { -# ifdef XMRIG_NO_LIBCPUID - m_totalThreads = sysconf(_SC_NPROCESSORS_CONF); -# endif + assert(cpuInfo != nullptr); - initCommon(); + return cpuInfo; +} + + +void xmrig::Cpu::init() +{ + assert(cpuInfo == nullptr); + + cpuInfo = new BasicCpuInfo(); +} + + +void xmrig::Cpu::release() +{ + assert(cpuInfo != nullptr); + + delete cpuInfo; + cpuInfo = nullptr; } diff --git a/src/Cpu_mac.cpp b/src/common/cpu/Cpu.h similarity index 75% rename from src/Cpu_mac.cpp rename to src/common/cpu/Cpu.h index 085148bc4..1d5a9fb1d 100644 --- a/src/Cpu_mac.cpp +++ b/src/common/cpu/Cpu.h @@ -4,7 +4,7 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , * * This program is free software: you can redistribute it and/or modify @@ -21,20 +21,26 @@ * along with this program. If not, see . */ - -#include -#include -#include +#ifndef XMRIG_CPU_H +#define XMRIG_CPU_H -#include "Cpu.h" +#include "common/interfaces/ICpuInfo.h" -void Cpu::init() +namespace xmrig { + + +class Cpu { -# ifdef XMRIG_NO_LIBCPUID - m_totalThreads = sysconf(_SC_NPROCESSORS_CONF); -# endif +public: + static ICpuInfo *info(); + static void init(); + static void release(); +}; - initCommon(); -} + +} /* namespace xmrig */ + + +#endif /* XMRIG_CPU_H */ diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h new file mode 100644 index 000000000..c959bf52a --- /dev/null +++ b/src/common/interfaces/ICpuInfo.h @@ -0,0 +1,59 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2016-2018 XMRig + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_CPUINFO_H +#define XMRIG_CPUINFO_H + + +#include + + +#include "common/xmrig.h" + + +namespace xmrig { + + +class ICpuInfo +{ +public: + virtual ~ICpuInfo() {} + + virtual bool hasAES() const = 0; + virtual bool isSupported() const = 0; + virtual bool isX64() const = 0; + virtual const char *brand() const = 0; + virtual int32_t cores() const = 0; + virtual int32_t L2() const = 0; + virtual int32_t L3() const = 0; + virtual int32_t nodes() const = 0; + virtual int32_t sockets() const = 0; + virtual int32_t threads() const = 0; + virtual size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const = 0; + virtual xmrig::Assembly assembly() const = 0; +}; + + +} /* namespace xmrig */ + + +#endif // XMRIG_CPUINFO_H diff --git a/src/common/xmrig.h b/src/common/xmrig.h index e1c7702e1..820bc4fbe 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -94,6 +94,14 @@ enum OclVendor { }; +enum Assembly { + ASM_NONE, + ASM_AUTO, + ASM_INTEL, + ASM_RYZEN +}; + + } /* namespace xmrig */ diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 0380c26d2..d99bfb095 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -27,9 +27,9 @@ #include "common/config/ConfigLoader.h" +#include "common/cpu/Cpu.h" #include "core/Config.h" #include "core/ConfigCreator.h" -#include "Cpu.h" #include "crypto/CryptoNight_constants.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" @@ -153,7 +153,7 @@ bool xmrig::Config::finalize() if (!m_threads.cpu.empty()) { m_threads.mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; + const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; for (size_t i = 0; i < m_threads.cpu.size(); ++i) { m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES)); @@ -168,10 +168,10 @@ bool xmrig::Config::finalize() const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; if (!m_threads.count) { - m_threads.count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); } else if (m_safe) { - const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage); + const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); if (m_threads.count > count) { m_threads.count = count; } @@ -232,7 +232,7 @@ bool xmrig::Config::parseString(int key, const char *arg) case ThreadsKey: /* --threads */ if (strncmp(arg, "all", 3) == 0) { - m_threads.count = Cpu::threads(); + m_threads.count = Cpu::info()->threads(); return true; } @@ -339,10 +339,10 @@ xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const # endif if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; + return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; } - if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV_DOUBLE) { + if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { return static_cast(m_algoVariant + 2); } @@ -354,10 +354,10 @@ xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const { if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; + return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; } - if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV_DOUBLE) { + if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { return static_cast(m_algoVariant + 2); } diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp index ce73f0374..792ac9399 100644 --- a/src/core/Controller.cpp +++ b/src/core/Controller.cpp @@ -26,6 +26,7 @@ #include "common/config/ConfigLoader.h" +#include "common/cpu/Cpu.h" #include "common/interfaces/IControllerListener.h" #include "common/log/ConsoleLog.h" #include "common/log/FileLog.h" @@ -33,7 +34,6 @@ #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "Cpu.h" #include "net/Network.h" diff --git a/src/Cpu.cpp b/src/core/cpu/AdvancedCpuInfo.cpp similarity index 57% rename from src/Cpu.cpp rename to src/core/cpu/AdvancedCpuInfo.cpp index eebe585d8..009a2bcbd 100644 --- a/src/Cpu.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -21,65 +21,24 @@ * along with this program. If not, see . */ - #include #include #include +#include -#include "Cpu.h" +#include "core/cpu/AdvancedCpuInfo.h" -bool Cpu::m_l2_exclusive = false; -char Cpu::m_brand[64] = { 0 }; -int Cpu::m_flags = 0; -int Cpu::m_l2_cache = 0; -int Cpu::m_l3_cache = 0; -int Cpu::m_sockets = 1; -int Cpu::m_totalCores = 0; -size_t Cpu::m_totalThreads = 0; - - -size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage) -{ - if (m_totalThreads == 1) { - return 1; - } - - size_t cache = 0; - if (m_l3_cache) { - cache = m_l2_exclusive ? (m_l2_cache + m_l3_cache) : m_l3_cache; - } - else { - cache = m_l2_cache; - } - - size_t count = 0; - - if (cache) { - count = cache / size; - - if (cache % size >= size / 2) { - count++; - } - } - else { - count = m_totalThreads / 2; - } - - if (count > m_totalThreads) { - count = m_totalThreads; - } - - if (((float) count / m_totalThreads * 100) > maxCpuUsage) { - count = (int) ceil((float) m_totalThreads * (maxCpuUsage / 100.0)); - } - - return count < 1 ? 1 : count; -} - - -void Cpu::initCommon() +xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : + m_aes(false), + m_L2_exclusive(false), + m_brand(), + m_cores(0), + m_L2(0), + m_L3(0), + m_sockets(1), + m_threads(std::thread::hardware_concurrency()) { struct cpu_raw_data_t raw = { 0 }; struct cpu_id_t data = { 0 }; @@ -89,40 +48,67 @@ void Cpu::initCommon() strncpy(m_brand, data.brand_str, sizeof(m_brand) - 1); - m_totalThreads = data.total_logical_cpus; - m_sockets = m_totalThreads / data.num_logical_cpus; - + m_sockets = threads() / data.num_logical_cpus; if (m_sockets == 0) { m_sockets = 1; } - m_totalCores = data.num_cores * m_sockets; - m_l3_cache = data.l3_cache > 0 ? data.l3_cache * m_sockets : 0; + m_cores = data.num_cores * m_sockets; + m_L3 = data.l3_cache > 0 ? data.l3_cache * m_sockets : 0; // Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97 if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) { - m_l2_cache = data.l2_cache * (m_totalCores / 2) * m_sockets; - m_l2_exclusive = true; + m_L2 = data.l2_cache * (cores() / 2) * m_sockets; + m_L2_exclusive = true; } // Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue // These processors have L2 cache shared by 2 cores. else if (data.vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) { - int l2_count_per_socket = m_totalCores > 1 ? m_totalCores / 2 : 1; - m_l2_cache = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0; + int l2_count_per_socket = cores() > 1 ? cores() / 2 : 1; + m_L2 = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0; } else{ - m_l2_cache = data.l2_cache > 0 ? data.l2_cache * m_totalCores * m_sockets : 0; + m_L2 = data.l2_cache > 0 ? data.l2_cache * cores() * m_sockets : 0; } -# if defined(__x86_64__) || defined(_M_AMD64) - m_flags |= X86_64; -# endif - - if (data.flags[CPU_FEATURE_AES]) { - m_flags |= AES; - } - - if (data.flags[CPU_FEATURE_BMI2]) { - m_flags |= BMI2; - } + m_aes = data.flags[CPU_FEATURE_AES]; +} + + +size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +{ + if (threads() == 1) { + return 1; + } + + size_t cache = 0; + if (m_L3) { + cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3; + } + else { + cache = m_L2; + } + + size_t count = 0; + + if (cache) { + count = cache / memSize; + + if (cache % memSize >= memSize / 2) { + count++; + } + } + else { + count = threads() / 2; + } + + if (count > (size_t) threads()) { + count = threads(); + } + + if (((float) count / threads() * 100) > maxCpuUsage) { + count = (int) ceil((float) threads() * (maxCpuUsage / 100.0)); + } + + return count < 1 ? 1 : count; } diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h new file mode 100644 index 000000000..96fd329a2 --- /dev/null +++ b/src/core/cpu/AdvancedCpuInfo.h @@ -0,0 +1,75 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_ADVANCEDCPUINFO_H +#define XMRIG_ADVANCEDCPUINFO_H + + +#include "common/interfaces/ICpuInfo.h" + + +namespace xmrig { + + +class AdvancedCpuInfo : public ICpuInfo +{ +public: + AdvancedCpuInfo(); + +protected: + size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + + inline Assembly assembly() const override { return ASM_NONE; } + inline bool hasAES() const override { return m_aes; } + inline bool isSupported() const override { return true; } + inline const char *brand() const override { return m_brand; } + inline int32_t cores() const override { return m_cores; } + inline int32_t L2() const override { return m_L2; } + inline int32_t L3() const override { return m_L3; } + inline int32_t nodes() const override { return -1; } + inline int32_t sockets() const override { return m_sockets; } + inline int32_t threads() const override { return m_threads; } + +# if defined(__x86_64__) || defined(_M_AMD64) + inline bool isX64() const override { return true; } +# else + inline bool isX64() const override { return false; } +# endif + +private: + bool m_aes; + bool m_L2_exclusive; + char m_brand[64]; + int32_t m_cores; + int32_t m_L2; + int32_t m_L3; + int32_t m_sockets; + int32_t m_threads; +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_ADVANCEDCPUINFO_H */ diff --git a/src/Cpu_win.cpp b/src/core/cpu/Cpu.cpp similarity index 69% rename from src/Cpu_win.cpp rename to src/core/cpu/Cpu.cpp index 7258f7266..773255d27 100644 --- a/src/Cpu_win.cpp +++ b/src/core/cpu/Cpu.cpp @@ -22,20 +22,40 @@ */ -#include +#include -#include "Cpu.h" +#include "common/cpu/Cpu.h" -void Cpu::init() +#ifndef XMRIG_NO_LIBCPUID +# include "core/cpu/AdvancedCpuInfo.h" +#endif + + +static xmrig::ICpuInfo *cpuInfo = nullptr; + + +xmrig::ICpuInfo *xmrig::Cpu::info() { -# ifdef XMRIG_NO_LIBCPUID - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); + assert(cpuInfo != nullptr); - m_totalThreads = sysinfo.dwNumberOfProcessors; -# endif - - initCommon(); + return cpuInfo; +} + + +void xmrig::Cpu::init() +{ + assert(cpuInfo == nullptr); + + cpuInfo = new AdvancedCpuInfo(); +} + + +void xmrig::Cpu::release() +{ + assert(cpuInfo != nullptr); + + delete cpuInfo; + cpuInfo = nullptr; } diff --git a/src/workers/Worker.cpp b/src/workers/Worker.cpp index 567b3e085..c569908c6 100644 --- a/src/workers/Worker.cpp +++ b/src/workers/Worker.cpp @@ -24,8 +24,8 @@ #include +#include "common/cpu/Cpu.h" #include "common/Platform.h" -#include "Cpu.h" #include "workers/CpuThread.h" #include "workers/Handle.h" #include "workers/Worker.h" @@ -41,7 +41,7 @@ Worker::Worker(Handle *handle) : m_sequence(0), m_thread(static_cast(handle->config())) { - if (Cpu::threads() > 1 && m_thread->affinity() != -1L) { + if (xmrig::Cpu::info()->threads() > 1 && m_thread->affinity() != -1L) { Platform::setThreadAffinity(m_thread->affinity()); } From c9fd8061c27ed9df8cdca08a843d395f464e5227 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 18:07:44 +0300 Subject: [PATCH 03/10] Fix Linux build. --- src/common/interfaces/ICpuInfo.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h index c959bf52a..267616d0a 100644 --- a/src/common/interfaces/ICpuInfo.h +++ b/src/common/interfaces/ICpuInfo.h @@ -24,6 +24,7 @@ #define XMRIG_CPUINFO_H +#include #include From 0c20d7a125f2cb823b6e4b6aa4577e951126f17a Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 19:09:05 +0300 Subject: [PATCH 04/10] Auto-detect proper asm variant. --- src/common/cpu/BasicCpuInfo_arm.cpp | 2 +- src/core/cpu/AdvancedCpuInfo.cpp | 12 +++++++++++- src/core/cpu/AdvancedCpuInfo.h | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/common/cpu/BasicCpuInfo_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp index 34eb73882..c1c127db6 100644 --- a/src/common/cpu/BasicCpuInfo_arm.cpp +++ b/src/common/cpu/BasicCpuInfo_arm.cpp @@ -36,7 +36,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : memcpy(m_brand, "Unknown", 7); # if __ARM_FEATURE_CRYPTO - m_flags |= AES; + m_aes = true; # endif } diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp index 009a2bcbd..ac5508c38 100644 --- a/src/core/cpu/AdvancedCpuInfo.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -31,6 +31,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : + m_assembly(ASM_NONE), m_aes(false), m_L2_exclusive(false), m_brand(), @@ -71,7 +72,16 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : m_L2 = data.l2_cache > 0 ? data.l2_cache * cores() * m_sockets : 0; } - m_aes = data.flags[CPU_FEATURE_AES]; + if (data.flags[CPU_FEATURE_AES]) { + m_aes = true; + + if (data.vendor == VENDOR_AMD && data.ext_family >= 23) { + m_assembly = ASM_RYZEN; + } + else if (data.vendor == VENDOR_INTEL && data.ext_model >= 42) { + m_assembly = ASM_INTEL; + } + } } diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h index 96fd329a2..5e8967ad2 100644 --- a/src/core/cpu/AdvancedCpuInfo.h +++ b/src/core/cpu/AdvancedCpuInfo.h @@ -7,7 +7,6 @@ * Copyright 2017-2018 XMR-Stak , * Copyright 2016-2018 XMRig , * - * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -40,7 +39,7 @@ public: protected: size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; - inline Assembly assembly() const override { return ASM_NONE; } + inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool isSupported() const override { return true; } inline const char *brand() const override { return m_brand; } @@ -58,6 +57,7 @@ protected: # endif private: + Assembly m_assembly; bool m_aes; bool m_L2_exclusive; char m_brand[64]; From dd27c422932c797021221403cb95984a2013e94b Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 20:16:33 +0300 Subject: [PATCH 05/10] Initial compile with ASM. --- CMakeLists.txt | 4 +- cmake/asm.cmake | 23 +++ src/crypto/asm/cnv2_main_loop.S | 21 +++ src/crypto/asm/cnv2_main_loop.asm | 18 ++ src/crypto/asm/cnv2_main_loop_ivybridge.inc | 183 ++++++++++++++++++++ src/crypto/asm/cnv2_main_loop_ryzen.inc | 179 +++++++++++++++++++ 6 files changed, 427 insertions(+), 1 deletion(-) create mode 100644 cmake/asm.cmake create mode 100644 src/crypto/asm/cnv2_main_loop.S create mode 100644 src/crypto/asm/cnv2_main_loop.asm create mode 100644 src/crypto/asm/cnv2_main_loop_ivybridge.inc create mode 100644 src/crypto/asm/cnv2_main_loop_ryzen.inc diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f7d62910..22bf9a78d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,7 @@ option(WITH_SUMO "CryptoNight-Heavy support" ON) option(WITH_HTTPD "HTTP REST API" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) +option(WITH_ASM "Enable ASM PoW implementations" ON) option(BUILD_STATIC "Build static binary" OFF) include (CheckIncludeFile) @@ -195,6 +196,7 @@ else() endif() include(cmake/OpenSSL.cmake) +include(cmake/asm.cmake) CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H) if (HAVE_SYSLOG_H) @@ -254,4 +256,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) -target_link_libraries(${PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) +target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) diff --git a/cmake/asm.cmake b/cmake/asm.cmake new file mode 100644 index 000000000..02093810a --- /dev/null +++ b/cmake/asm.cmake @@ -0,0 +1,23 @@ +if (WITH_ASM AND NOT XMRIG_ARM) + set(XMRIG_ASM_LIBRARY "xmrig-asm") + + if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + enable_language(ASM_MASM) + set_property(SOURCE "src/crypto/asm/cnv2_main_loop.asm" PROPERTY ASM_MASM) + add_library(${XMRIG_ASM_LIBRARY} STATIC + "src/crypto/asm/cnv2_main_loop.asm" + ) + else() + enable_language(ASM) + set_property(SOURCE "src/crypto/asm/cnv2_main_loop.S" PROPERTY C) + add_library(${XMRIG_ASM_LIBRARY} STATIC + "src/crypto/asm/cnv2_main_loop.S" + ) + endif() + + set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) +else() +# set(XMRIG_ASM_SOURCES "") + set(XMRIG_ASM_LIBRARY "") + add_definitions(/DXMRIG_NO_ASM) +endif() diff --git a/src/crypto/asm/cnv2_main_loop.S b/src/crypto/asm/cnv2_main_loop.S new file mode 100644 index 000000000..dc5a82f58 --- /dev/null +++ b/src/crypto/asm/cnv2_main_loop.S @@ -0,0 +1,21 @@ +#define ALIGN .align +.intel_syntax noprefix +.section .text +.global cnv2_mainloop_ivybridge_asm +.global cnv2_mainloop_ryzen_asm + +ALIGN 64 +cnv2_mainloop_ivybridge_asm: + sub rsp, 48 + mov rcx, rdi + #include "cnv2_main_loop_ivybridge.inc" + add rsp, 48 + ret 0 + +ALIGN 64 +cnv2_mainloop_ryzen_asm: + sub rsp, 48 + mov rcx, rdi + #include "cnv2_main_loop_ryzen.inc" + add rsp, 48 + ret 0 diff --git a/src/crypto/asm/cnv2_main_loop.asm b/src/crypto/asm/cnv2_main_loop.asm new file mode 100644 index 000000000..7ec895c49 --- /dev/null +++ b/src/crypto/asm/cnv2_main_loop.asm @@ -0,0 +1,18 @@ +_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE +PUBLIC cnv2_mainloop_ivybridge_asm +PUBLIC cnv2_mainloop_ryzen_asm + +ALIGN 64 +cnv2_mainloop_ivybridge_asm PROC + INCLUDE cnv2_main_loop_ivybridge.inc + ret 0 +cnv2_mainloop_ivybridge_asm ENDP + +ALIGN 64 +cnv2_mainloop_ryzen_asm PROC + INCLUDE cnv2_main_loop_ryzen.inc + ret 0 +cnv2_mainloop_ryzen_asm ENDP + +_TEXT_CNV2_MAINLOOP ENDS +END diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc new file mode 100644 index 000000000..0985d1bd1 --- /dev/null +++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc @@ -0,0 +1,183 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 524288 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movq xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movq xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movq xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movq xmm0, rcx + punpcklqdq xmm5, xmm0 + + ALIGN 64 +$main_loop_ivybridge: + movdqu xmm6, XMMWORD PTR [r10+rbx] + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movq xmm0, r11 + movq xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm1, xmm7 + movdqu xmm2, XMMWORD PTR [rcx+rbx] + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movq rcx, xmm3 + movdqu XMMWORD PTR [rax+rbx], xmm2 + mov rax, rcx + movdqu XMMWORD PTR [r10+rbx], xmm1 + shl rax, 32 + xor rdi, rax + movq rbp, xmm6 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + mov r10, rbp + and r10d, 2097136 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r10+rbx] + lea r14, QWORD PTR [r10+rbx] + xor r10d, 32 + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movq rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movq xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movq rdx, xmm3 + test edx, 524287 + je $sqrt_fixup_ivybridge + psrlq xmm3, 19 +$sqrt_fixup_ivybridge_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movq xmm2, rdx + xor rdx, [rcx+rbx] + movq xmm0, rax + xor rax, [rcx+rbx+8] + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm4 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm0 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov r10, r8 + add r11, rax + mov QWORD PTR [r14+8], r11 + and r10d, 2097136 + xor r11, r12 + dec rsi + jne $main_loop_ivybridge + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp $cnv2_main_loop_ivybridge_endp + +$sqrt_fixup_ivybridge: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movq xmm3, rdx + jmp $sqrt_fixup_ivybridge_ret + +$cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cnv2_main_loop_ryzen.inc new file mode 100644 index 000000000..3294548ed --- /dev/null +++ b/src/crypto/asm/cnv2_main_loop_ryzen.inc @@ -0,0 +1,179 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movq xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN 64 +$main_loop_ryzen: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movq xmm0, r11 + movq xmm6, r8 + punpcklqdq xmm6, xmm0 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + movq r14, xmm5 + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movq rax, xmm0 + + div r9 + movq xmm0, rax + movq xmm1, rdx + punpckldq xmm0, xmm1 + movq r15, xmm0 + paddq xmm0, xmm5 + movdqa xmm2, xmm0 + psrlq xmm0, 12 + paddq xmm0, xmm7 + sqrtsd xmm1, xmm0 + movq rdi, xmm1 + test rdi, 524287 + je $sqrt_fixup_ryzen + shr rdi, 19 + +$sqrt_fixup_ryzen_ret: + mov rax, rsi + mul r14 + movq xmm1, rax + movq xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne $main_loop_ryzen + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp $cnv2_main_loop_ryzen_endp + +$sqrt_fixup_ryzen: + movq r9, xmm2 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp $sqrt_fixup_ryzen_ret + +$cnv2_main_loop_ryzen_endp: From f163aad38c27190759297b73a80603e0601c3e5a Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 23 Sep 2018 20:45:10 +0300 Subject: [PATCH 06/10] Fix macOS/Clang build. --- src/crypto/asm/cnv2_main_loop.S | 18 ++++++++++++------ src/crypto/asm/cnv2_main_loop_ivybridge.inc | 18 +++++++++--------- src/crypto/asm/cnv2_main_loop_ryzen.inc | 18 +++++++++--------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/crypto/asm/cnv2_main_loop.S b/src/crypto/asm/cnv2_main_loop.S index dc5a82f58..580a45880 100644 --- a/src/crypto/asm/cnv2_main_loop.S +++ b/src/crypto/asm/cnv2_main_loop.S @@ -1,19 +1,25 @@ #define ALIGN .align .intel_syntax noprefix +#ifdef __APPLE__ +# define FN_PREFIX(fn) _ ## fn +.text +#else +# define FN_PREFIX(fn) fn .section .text -.global cnv2_mainloop_ivybridge_asm -.global cnv2_mainloop_ryzen_asm +#endif +.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) +.global FN_PREFIX(cnv2_mainloop_ryzen_asm) -ALIGN 64 -cnv2_mainloop_ivybridge_asm: +ALIGN 16 +FN_PREFIX(cnv2_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi #include "cnv2_main_loop_ivybridge.inc" add rsp, 48 ret 0 -ALIGN 64 -cnv2_mainloop_ryzen_asm: +ALIGN 16 +FN_PREFIX(cnv2_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi #include "cnv2_main_loop_ryzen.inc" diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc index 0985d1bd1..a253a5495 100644 --- a/src/crypto/asm/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc @@ -49,8 +49,8 @@ movq xmm0, rcx punpcklqdq xmm5, xmm0 - ALIGN 64 -$main_loop_ivybridge: + ALIGN 16 +main_loop_ivybridge: movdqu xmm6, XMMWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d @@ -108,9 +108,9 @@ $main_loop_ivybridge: psubq xmm3, XMMWORD PTR [rsp+16] movq rdx, xmm3 test edx, 524287 - je $sqrt_fixup_ivybridge + je sqrt_fixup_ivybridge psrlq xmm3, 19 -$sqrt_fixup_ivybridge_ret: +sqrt_fixup_ivybridge_ret: mov ecx, r10d mov rax, rdi @@ -144,7 +144,7 @@ $sqrt_fixup_ivybridge_ret: and r10d, 2097136 xor r11, r12 dec rsi - jne $main_loop_ivybridge + jne main_loop_ivybridge ldmxcsr DWORD PTR [rsp] mov rbx, QWORD PTR [rsp+160] @@ -159,9 +159,9 @@ $sqrt_fixup_ivybridge_ret: pop rdi pop rsi pop rbp - jmp $cnv2_main_loop_ivybridge_endp + jmp cnv2_main_loop_ivybridge_endp -$sqrt_fixup_ivybridge: +sqrt_fixup_ivybridge: dec rdx mov r13d, -1022 shl r13, 32 @@ -178,6 +178,6 @@ $sqrt_fixup_ivybridge: sub rcx, r9 adc rdx, 0 movq xmm3, rdx - jmp $sqrt_fixup_ivybridge_ret + jmp sqrt_fixup_ivybridge_ret -$cnv2_main_loop_ivybridge_endp: +cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cnv2_main_loop_ryzen.inc index 3294548ed..d386aa2df 100644 --- a/src/crypto/asm/cnv2_main_loop_ryzen.inc +++ b/src/crypto/asm/cnv2_main_loop_ryzen.inc @@ -45,8 +45,8 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 64 -$main_loop_ryzen: + ALIGN 16 +main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 movq xmm6, r8 @@ -103,10 +103,10 @@ $main_loop_ryzen: sqrtsd xmm1, xmm0 movq rdi, xmm1 test rdi, 524287 - je $sqrt_fixup_ryzen + je sqrt_fixup_ryzen shr rdi, 19 -$sqrt_fixup_ryzen_ret: +sqrt_fixup_ryzen_ret: mov rax, rsi mul r14 movq xmm1, rax @@ -141,7 +141,7 @@ $sqrt_fixup_ryzen_ret: and r10d, 2097136 movdqa xmm3, xmm5 dec ebp - jne $main_loop_ryzen + jne main_loop_ryzen ldmxcsr DWORD PTR [rsp] movaps xmm6, XMMWORD PTR [rsp+48] @@ -157,9 +157,9 @@ $sqrt_fixup_ryzen_ret: pop r13 pop r12 pop rdi - jmp $cnv2_main_loop_ryzen_endp + jmp cnv2_main_loop_ryzen_endp -$sqrt_fixup_ryzen: +sqrt_fixup_ryzen: movq r9, xmm2 dec rdi mov edx, -1022 @@ -174,6 +174,6 @@ $sqrt_fixup_ryzen: imul rcx, rax sub rcx, r9 adc rdi, 0 - jmp $sqrt_fixup_ryzen_ret + jmp sqrt_fixup_ryzen_ret -$cnv2_main_loop_ryzen_endp: +cnv2_main_loop_ryzen_endp: From ba65a34a017e6009ea8960aca3ae23df7443e0ff Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 24 Sep 2018 09:51:21 +0300 Subject: [PATCH 07/10] Initial ASM wrapper. --- CMakeLists.txt | 2 +- cmake/asm.cmake | 1 - src/crypto/CryptoNight.h | 10 +++--- src/crypto/CryptoNight_x86.h | 25 ++++++++++++++ src/crypto/asm/cnv2_main_loop_ivybridge.inc | 37 +++++++++++---------- src/workers/CpuThread.cpp | 5 ++- src/workers/Worker.h | 7 ++-- 7 files changed, 58 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22bf9a78d..1b3fe6f2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG) endif() add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) -target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) +target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) diff --git a/cmake/asm.cmake b/cmake/asm.cmake index 02093810a..3a0bc8945 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) else() -# set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_LIBRARY "") add_definitions(/DXMRIG_NO_ASM) endif() diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index e8e86dc42..680f1740e 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -22,8 +22,8 @@ * along with this program. If not, see . */ -#ifndef __CRYPTONIGHT_H__ -#define __CRYPTONIGHT_H__ +#ifndef XMRIG_CRYPTONIGHT_H +#define XMRIG_CRYPTONIGHT_H #include @@ -31,9 +31,9 @@ struct cryptonight_ctx { - alignas(16) uint8_t state[200]; - alignas(16) uint8_t* memory; + alignas(16) uint8_t state[224]; + alignas(16) uint8_t *memory; }; -#endif /* __CRYPTONIGHT_H__ */ +#endif /* XMRIG_CRYPTONIGHT_H */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 1cb066873..064dbdc28 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } +extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); +extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); + + +template +inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) +{ + constexpr size_t MEM = xmrig::cn_select_memory(); + + xmrig::keccak(input, size, ctx[0]->state); + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + if (ASM == xmrig::ASM_INTEL) { + cnv2_mainloop_ivybridge_asm(ctx[0]); + } + else { + cnv2_mainloop_ryzen_asm(ctx[0]); + } + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + template inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx) { diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc index a253a5495..8c2c2d3b0 100644 --- a/src/crypto/asm/cnv2_main_loop_ivybridge.inc +++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc @@ -48,10 +48,10 @@ punpcklqdq xmm4, xmm0 movq xmm0, rcx punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] ALIGN 16 main_loop_ivybridge: - movdqu xmm6, XMMWORD PTR [r10+rbx] lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d mov eax, r10d @@ -63,28 +63,29 @@ main_loop_ivybridge: movq xmm7, r8 punpcklqdq xmm7, xmm0 aesenc xmm6, xmm7 + movq rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm2, XMMWORD PTR [rcx+rbx] movdqu xmm1, XMMWORD PTR [rax+rbx] movdqu xmm0, XMMWORD PTR [r10+rbx] paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rcx+rbx] paddq xmm0, xmm5 paddq xmm2, xmm4 movdqu XMMWORD PTR [rcx+rbx], xmm0 - movq rcx, xmm3 movdqu XMMWORD PTR [rax+rbx], xmm2 - mov rax, rcx movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movq rcx, xmm3 + mov rax, rcx shl rax, 32 xor rdi, rax - movq rbp, xmm6 movdqa xmm0, xmm6 pxor xmm0, xmm4 - mov r10, rbp - and r10d, 2097136 movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r10+rbx] - lea r14, QWORD PTR [r10+rbx] - xor r10d, 32 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] mov r12, QWORD PTR [r14+8] xor edx, edx lea r9d, DWORD PTR [ecx+ecx] @@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret: mul rbp movq xmm2, rdx xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 movq xmm0, rax xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 punpcklqdq xmm2, xmm0 mov r9d, r10d @@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret: movdqa xmm4, xmm6 movdqu XMMWORD PTR [rcx+rbx], xmm2 movdqu XMMWORD PTR [r10+rbx], xmm1 - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov r10, r8 - add r11, rax - mov QWORD PTR [r14+8], r11 - and r10d, 2097136 + movdqu xmm6, [rdi+rbx] + mov r10d, edi xor r11, r12 dec rsi jne main_loop_ivybridge diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index ca7681f06..d9d60f51c 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a { assert(variant >= VARIANT_0 && variant < VARIANT_MAX); - static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = { + constexpr const size_t count = VARIANT_MAX * 10 * 3; + static const cn_hash_fun func_table[count + 2] = { cryptonight_single_hash, cryptonight_double_hash, cryptonight_single_hash, @@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif + cryptonight_single_hash_asm, + cryptonight_single_hash_asm }; const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; diff --git a/src/workers/Worker.h b/src/workers/Worker.h index aad9e3c5e..73e250330 100644 --- a/src/workers/Worker.h +++ b/src/workers/Worker.h @@ -21,8 +21,8 @@ * along with this program. If not, see . */ -#ifndef __WORKER_H__ -#define __WORKER_H__ +#ifndef XMRIG_WORKER_H +#define XMRIG_WORKER_H #include @@ -33,7 +33,6 @@ #include "Mem.h" -struct cryptonight_ctx; class Handle; @@ -67,4 +66,4 @@ protected: }; -#endif /* __WORKER_H__ */ +#endif /* XMRIG_WORKER_H */ From c2fcf2385526223e804dc729847d7534223fd293 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 24 Sep 2018 14:19:26 +0300 Subject: [PATCH 08/10] Implemented "asm" option. --- CMakeLists.txt | 2 +- cmake/asm.cmake | 2 + src/Summary.cpp | 29 +++++++++ src/common/interfaces/IConfig.h | 1 + src/common/xmrig.h | 3 +- src/core/Config.cpp | 21 +++++-- src/core/Config.h | 9 +-- src/core/ConfigLoader_platform.h | 2 + src/core/cpu/AdvancedCpuInfo.cpp | 2 +- src/crypto/Asm.cpp | 100 +++++++++++++++++++++++++++++++ src/crypto/Asm.h | 50 ++++++++++++++++ src/crypto/CryptoNight_x86.h | 2 + src/workers/CpuThread.cpp | 71 +++++++++++++++++----- src/workers/CpuThread.h | 15 +++-- 14 files changed, 275 insertions(+), 34 deletions(-) create mode 100644 src/crypto/Asm.cpp create mode 100644 src/crypto/Asm.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b3fe6f2c..1becac5ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,5 +255,5 @@ if (WITH_DEBUG_LOG) add_definitions(/DAPP_DEBUG) endif() -add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) +add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES}) target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) diff --git a/cmake/asm.cmake b/cmake/asm.cmake index 3a0bc8945..a9b76ffda 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -15,8 +15,10 @@ if (WITH_ASM AND NOT XMRIG_ARM) ) endif() + set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) else() + set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_LIBRARY "") add_definitions(/DXMRIG_NO_ASM) endif() diff --git a/src/Summary.cpp b/src/Summary.cpp index ba220e5b1..3c1d06a70 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -32,11 +32,28 @@ #include "common/net/Pool.h" #include "core/Config.h" #include "core/Controller.h" +#include "crypto/Asm.h" #include "Mem.h" #include "Summary.h" #include "version.h" +#ifndef XMRIG_NO_ASM +static const char *coloredAsmNames[] = { + "\x1B[1;31mnone\x1B[0m", + "auto", + "\x1B[1;32mintel\x1B[0m", + "\x1B[1;32mryzen\x1B[0m" +}; + + +inline static const char *asmName(xmrig::Assembly assembly, bool colors) +{ + return colors ? coloredAsmNames[assembly] : xmrig::Asm::toString(assembly); +} +#endif + + static void print_memory(xmrig::Config *config) { # ifdef _WIN32 if (config->isColors()) { @@ -101,6 +118,18 @@ static void print_threads(xmrig::Config *config) config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "", config->donateLevel()); } + +# ifndef XMRIG_NO_ASM + if (config->assembly() == xmrig::ASM_AUTO) { + const xmrig::Assembly assembly = xmrig::Cpu::info()->assembly(); + + Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13sauto:%s") + : " * %-13sauto:%s", "ASSEMBLY", asmName(assembly, config->isColors())); + } + else { + Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s") : " * %-13s%s", "ASSEMBLY", asmName(config->assembly(), config->isColors())); + } +# endif } diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index d35931634..0fcac2d15 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -80,6 +80,7 @@ public: SafeKey = 1005, ThreadsKey = 't', HardwareAESKey = 1011, + AssemblyKey = 1015, // xmrig amd OclPlatformKey = 1400, diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 820bc4fbe..52650f0d2 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -98,7 +98,8 @@ enum Assembly { ASM_NONE, ASM_AUTO, ASM_INTEL, - ASM_RYZEN + ASM_RYZEN, + ASM_MAX }; diff --git a/src/core/Config.cpp b/src/core/Config.cpp index d99bfb095..20a3aece0 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -30,6 +30,7 @@ #include "common/cpu/Cpu.h" #include "core/Config.h" #include "core/ConfigCreator.h" +#include "crypto/Asm.h" #include "crypto/CryptoNight_constants.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" @@ -43,6 +44,7 @@ static char affinity_tmp[20] = { 0 }; xmrig::Config::Config() : xmrig::CommonConfig(), m_aesMode(AES_AUTO), m_algoVariant(AV_AUTO), + m_assembly(ASM_AUTO), m_hugePages(true), m_safe(false), m_maxCpuUsage(75), @@ -51,11 +53,6 @@ xmrig::Config::Config() : xmrig::CommonConfig(), } -xmrig::Config::~Config() -{ -} - - bool xmrig::Config::reload(const char *json) { return xmrig::ConfigLoader::reload(this, json); @@ -178,7 +175,7 @@ bool xmrig::Config::finalize() } for (size_t i = 0; i < m_threads.count; ++i) { - m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority)); + m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly)); } return true; @@ -204,6 +201,12 @@ bool xmrig::Config::parseBoolean(int key, bool enable) m_aesMode = enable ? AES_HW : AES_SOFT; break; +# ifndef XMRIG_NO_ASM + case AssemblyKey: + m_assembly = Asm::parse(enable); + break; +# endif + default: break; } @@ -244,6 +247,12 @@ bool xmrig::Config::parseString(int key, const char *arg) return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10)); } +# ifndef XMRIG_NO_ASM + case AssemblyKey: /* --asm */ + m_assembly = Asm::parse(arg); + break; +# endif + default: break; } diff --git a/src/core/Config.h b/src/core/Config.h index f0f1404fb..95afc34cd 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -21,8 +21,8 @@ * along with this program. If not, see . */ -#ifndef __CONFIG_H__ -#define __CONFIG_H__ +#ifndef XMRIG_CONFIG_H +#define XMRIG_CONFIG_H #include @@ -69,7 +69,6 @@ public: Config(); - ~Config(); bool reload(const char *json); @@ -77,6 +76,7 @@ public: inline AesMode aesMode() const { return m_aesMode; } inline AlgoVariant algoVariant() const { return m_algoVariant; } + inline Assembly assembly() const { return m_assembly; } inline bool isHugePages() const { return m_hugePages; } inline const std::vector &threads() const { return m_threads.list; } inline int priority() const { return m_priority; } @@ -116,6 +116,7 @@ private: AesMode m_aesMode; AlgoVariant m_algoVariant; + Assembly m_assembly; bool m_hugePages; bool m_safe; int m_maxCpuUsage; @@ -126,4 +127,4 @@ private: } /* namespace xmrig */ -#endif /* __CONFIG_H__ */ +#endif /* XMRIG_CONFIG_H */ diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index c034f3e75..3b95a90fc 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -135,6 +135,7 @@ static struct option const options[] = { { "tls", 0, nullptr, xmrig::IConfig::TlsKey }, { "tls-fingerprint", 1, nullptr, xmrig::IConfig::FingerprintKey }, { "version", 0, nullptr, xmrig::IConfig::VersionKey }, + { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { nullptr, 0, nullptr, 0 } }; @@ -159,6 +160,7 @@ static struct option const config_options[] = { { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, { "hw-aes", 0, nullptr, xmrig::IConfig::HardwareAESKey }, + { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { nullptr, 0, nullptr, 0 } }; diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp index ac5508c38..1f86a4205 100644 --- a/src/core/cpu/AdvancedCpuInfo.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -47,7 +47,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : cpuid_get_raw_data(&raw); cpu_identify(&raw, &data); - strncpy(m_brand, data.brand_str, sizeof(m_brand) - 1); + strncpy(m_brand, data.brand_str, sizeof(m_brand)); m_sockets = threads() / data.num_logical_cpus; if (m_sockets == 0) { diff --git a/src/crypto/Asm.cpp b/src/crypto/Asm.cpp new file mode 100644 index 000000000..79dd1cc9c --- /dev/null +++ b/src/crypto/Asm.cpp @@ -0,0 +1,100 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include + + +#ifdef _MSC_VER +# define strncasecmp _strnicmp +# define strcasecmp _stricmp +#endif + + +#include "crypto/Asm.h" +#include "rapidjson/document.h" + + +static const char *asmNames[] = { + "none", + "auto", + "intel", + "ryzen" +}; + + +xmrig::Assembly xmrig::Asm::parse(const char *assembly, Assembly defaultValue) +{ + constexpr size_t const size = sizeof(asmNames) / sizeof((asmNames)[0]); + assert(assembly != nullptr); + assert(ASM_MAX == size); + + if (assembly == nullptr) { + return defaultValue; + } + + for (size_t i = 0; i < size; i++) { + if (strcasecmp(assembly, asmNames[i]) == 0) { + return static_cast(i); + } + } + + return defaultValue; +} + + +xmrig::Assembly xmrig::Asm::parse(const rapidjson::Value &value, Assembly defaultValue) +{ + if (value.IsBool()) { + return parse(value.IsBool()); + } + + if (value.IsString()) { + return parse(value.GetString(), defaultValue); + } + + return defaultValue; +} + + +const char *xmrig::Asm::toString(Assembly assembly) +{ + return asmNames[assembly]; +} + + +rapidjson::Value xmrig::Asm::toJSON(Assembly assembly) +{ + using namespace rapidjson; + + if (assembly == ASM_NONE) { + return Value(false); + } + + if (assembly == ASM_AUTO) { + return Value(true); + } + + return Value(StringRef(toString(assembly))); +} diff --git a/src/crypto/Asm.h b/src/crypto/Asm.h new file mode 100644 index 000000000..3b755fd64 --- /dev/null +++ b/src/crypto/Asm.h @@ -0,0 +1,50 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2016-2018 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_ASM_H +#define XMRIG_ASM_H + + +#include "common/xmrig.h" +#include "rapidjson/fwd.h" + + +namespace xmrig { + + +class Asm +{ +public: + static Assembly parse(const char *assembly, Assembly defaultValue = ASM_AUTO); + static Assembly parse(const rapidjson::Value &value, Assembly defaultValue = ASM_AUTO); + static const char *toString(Assembly assembly); + static rapidjson::Value toJSON(Assembly assembly); + + inline static Assembly parse(bool enable) { return enable ? ASM_AUTO : ASM_NONE; } +}; + + +} /* namespace xmrig */ + + +#endif /* XMRIG_ASM_H */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 064dbdc28..42ea37b55 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -561,6 +561,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si } +#ifndef XMRIG_NO_ASM extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); @@ -584,6 +585,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); } +#endif template diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp index d9d60f51c..ff6be5859 100644 --- a/src/workers/CpuThread.cpp +++ b/src/workers/CpuThread.cpp @@ -24,8 +24,10 @@ #include +#include "common/cpu/Cpu.h" #include "common/log/Log.h" #include "common/net/Pool.h" +#include "crypto/Asm.h" #include "rapidjson/document.h" #include "workers/CpuThread.h" @@ -37,9 +39,10 @@ #endif -xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch) : +xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) : m_algorithm(algorithm), m_av(av), + m_assembly(assembly), m_prefetch(prefetch), m_softAES(softAES), m_priority(priority), @@ -50,23 +53,23 @@ xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiw } -xmrig::CpuThread::~CpuThread() -{ -} - - bool xmrig::CpuThread::isSoftAES(AlgoVariant av) { return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA; } -xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant) +xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly) { assert(variant >= VARIANT_0 && variant < VARIANT_MAX); +# ifndef XMRIG_NO_ASM + constexpr const size_t count = VARIANT_MAX * 10 * 3 + 2; +# else constexpr const size_t count = VARIANT_MAX * 10 * 3; - static const cn_hash_fun func_table[count + 2] = { +# endif + + static const cn_hash_fun func_table[count] = { cryptonight_single_hash, cryptonight_double_hash, cryptonight_single_hash, @@ -243,13 +246,14 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif +# ifndef XMRIG_NO_ASM cryptonight_single_hash_asm, cryptonight_single_hash_asm +# endif }; - const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; - # ifndef NDEBUG + const size_t index = fnIndex(algorithm, av, variant, assembly); cn_hash_fun func = func_table[index]; assert(index < sizeof(func_table) / sizeof(func_table[0])); @@ -257,12 +261,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a return func; # else - return func_table[index]; + return func_table[fnIndex(algorithm, av, variant, assembly)]; # endif } -xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority) +xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly) { assert(av > AV_AUTO && av < AV_MAX); @@ -285,7 +289,7 @@ xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, A } } - return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false); + return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false, assembly); } @@ -303,7 +307,7 @@ xmrig::CpuThread *xmrig::CpuThread::createFromData(size_t index, Algo algorithm, assert(av > AV_AUTO && av < AV_MAX); - return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false); + return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false, data.assembly); } @@ -325,11 +329,14 @@ xmrig::CpuThread::Data xmrig::CpuThread::parse(const rapidjson::Value &object) } const auto &affinity = object["affine_to_cpu"]; - if (affinity.IsUint64()) { data.affinity = affinity.GetInt64(); } +# ifndef XMRIG_NO_ASM + data.assembly = Asm::parse(object["asm"]); +# endif + return data; } @@ -371,7 +378,11 @@ void xmrig::CpuThread::print() const LOG_DEBUG(GREEN_BOLD("CPU thread: ") " index " WHITE_BOLD("%zu") ", multiway " WHITE_BOLD("%d") ", av " WHITE_BOLD("%d") ",", index(), static_cast(multiway()), static_cast(m_av)); +# ifndef XMRIG_NO_ASM + LOG_DEBUG(" assembly: %s, affine_to_cpu: %" PRId64, Asm::toString(m_assembly), affinity()); +# else LOG_DEBUG(" affine_to_cpu: %" PRId64, affinity()); +# endif } #endif @@ -406,5 +417,35 @@ rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const obj.AddMember("low_power_mode", multiway(), allocator); obj.AddMember("affine_to_cpu", affinity() == -1L ? Value(kFalseType) : Value(affinity()), allocator); +# ifndef XMRIG_NO_ASM + obj.AddMember("asm", Asm::toJSON(m_assembly), allocator); +# endif + return obj; } + + +size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly) +{ + const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; + +# ifndef XMRIG_NO_ASM + if (assembly == ASM_AUTO) { + assembly = Cpu::info()->assembly(); + } + + if (assembly == ASM_NONE) { + return index; + } + + constexpr const size_t offset = VARIANT_MAX * 10 * 3; + + if (algorithm == CRYPTONIGHT && variant == VARIANT_2) { + if (av == AV_SINGLE) { + return offset + assembly - 2; + } + } +# endif + + return index; +} diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h index 622dc3a25..29ab96964 100644 --- a/src/workers/CpuThread.h +++ b/src/workers/CpuThread.h @@ -40,7 +40,7 @@ class CpuThread : public IThread public: struct Data { - inline Data() : valid(false), affinity(-1L), multiway(SingleWay) {} + inline Data() : assembly(ASM_AUTO), valid(false), affinity(-1L), multiway(SingleWay) {} inline void setMultiway(int value) { @@ -50,27 +50,27 @@ public: } } + Assembly assembly; bool valid; int64_t affinity; Multiway multiway; }; - CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch); - ~CpuThread(); + CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly); typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx); static bool isSoftAES(AlgoVariant av); - static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant); - static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority); + static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly); + static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly); static CpuThread *createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES); static Data parse(const rapidjson::Value &object); static Multiway multiway(AlgoVariant av); inline bool isPrefetch() const { return m_prefetch; } inline bool isSoftAES() const { return m_softAES; } - inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant); } + inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant, m_assembly); } inline Algo algorithm() const override { return m_algorithm; } inline int priority() const override { return m_priority; } @@ -91,8 +91,11 @@ protected: rapidjson::Value toConfig(rapidjson::Document &doc) const override; private: + static size_t fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly); + const Algo m_algorithm; const AlgoVariant m_av; + const Assembly m_assembly; const bool m_prefetch; const bool m_softAES; const int m_priority; From f4a867b70f04d76bb05cda6a49f0699c9f1827c1 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 24 Sep 2018 14:57:30 +0300 Subject: [PATCH 09/10] Fix 32bit build. --- cmake/asm.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/asm.cmake b/cmake/asm.cmake index a9b76ffda..d6c88e99f 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -1,4 +1,4 @@ -if (WITH_ASM AND NOT XMRIG_ARM) +if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) set(XMRIG_ASM_LIBRARY "xmrig-asm") if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) From ebcdac7d1365ffa29f7a7f7af14e8a19a3deb0d1 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 24 Sep 2018 20:43:31 +0300 Subject: [PATCH 10/10] Fixed crash when use ASM code for MSYS2, thanks @SChernykh. --- cmake/asm.cmake | 21 ++++++++++++--------- src/crypto/asm/cnv2_main_loop_win.S | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 src/crypto/asm/cnv2_main_loop_win.S diff --git a/cmake/asm.cmake b/cmake/asm.cmake index d6c88e99f..cb50f0d95 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -1,20 +1,23 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) set(XMRIG_ASM_LIBRARY "xmrig-asm") - if (CMAKE_CXX_COMPILER_ID MATCHES MSVC) + if (CMAKE_C_COMPILER_ID MATCHES MSVC) enable_language(ASM_MASM) - set_property(SOURCE "src/crypto/asm/cnv2_main_loop.asm" PROPERTY ASM_MASM) - add_library(${XMRIG_ASM_LIBRARY} STATIC - "src/crypto/asm/cnv2_main_loop.asm" - ) + set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.asm") + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM) else() enable_language(ASM) - set_property(SOURCE "src/crypto/asm/cnv2_main_loop.S" PROPERTY C) - add_library(${XMRIG_ASM_LIBRARY} STATIC - "src/crypto/asm/cnv2_main_loop.S" - ) + + if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) + set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop_win.S") + else() + set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.S") + endif() + + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C) endif() + add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE}) set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) else() diff --git a/src/crypto/asm/cnv2_main_loop_win.S b/src/crypto/asm/cnv2_main_loop_win.S new file mode 100644 index 000000000..3c2028b61 --- /dev/null +++ b/src/crypto/asm/cnv2_main_loop_win.S @@ -0,0 +1,15 @@ +#define ALIGN .align +.intel_syntax noprefix +.section .text +.global cnv2_mainloop_ivybridge_asm +.global cnv2_mainloop_ryzen_asm + +ALIGN 16 +cnv2_mainloop_ivybridge_asm: + #include "cnv2_main_loop_ivybridge.inc" + ret 0 + +ALIGN 16 +cnv2_mainloop_ryzen_asm: + #include "cnv2_main_loop_ryzen.inc" + ret 0