Merge branch 'dev'

# Conflicts:
#	options.c
#	src/version.h
This commit is contained in:
XMRig 2017-07-02 22:16:23 +03:00
commit 751be470b8
166 changed files with 8154 additions and 8284 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
/build
/CMakeLists.txt.user

View file

@ -1,3 +1,29 @@
# v2.0.0
- Option `--backup-url` removed, instead now possibility specify multiple pools for example: `-o example1.com:3333 -u user1 -p password1 -k -o example2.com:5555 -u user2 -o example3.com:4444 -u user3`
- [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-l, --log-file=FILE` to write log to file.
- [#15](https://github.com/xmrig/xmrig/issues/15) Added option `-S, --syslog` to use syslog for logging, Linux only.
- [#18](https://github.com/xmrig/xmrig/issues/18) Added nice messages for accepted/rejected shares with diff and network latency.
- [#20](https://github.com/xmrig/xmrig/issues/20) Fixed `--cpu-affinity` for more than 32 threads.
- Fixed Windows XP support.
- Fixed regression, option `--no-color` was not fully disable colored output.
- Show resolved pool IP address in miner output.
# v1.0.1
- Fix broken software AES implementation, app has crashed if CPU not support AES-NI, only version 1.0.0 affected.
# v1.0.0
- Miner complete rewritten in C++ with libuv.
- This version should be fully compatible (except config file) with previos versions, many new nice features will come in next versions.
- This is still beta. If you found regression, stability or perfomance issues or have an idea for new feature please fell free to open new [issue](https://github.com/xmrig/xmrig/issues/new).
- Added new option `--print-time=N`, print hashrate report every N seconds.
- New hashrate reports, by default every 60 secons.
- Added Microsoft Visual C++ 2015 and 2017 support.
- Removed dependency on libcurl.
- To compile this version from source please switch to [dev](https://github.com/xmrig/xmrig/tree/dev) branch.
# v0.8.2
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
# v0.8.2
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
- Fixed gcc 7.1 support.

View file

@ -1,146 +1,196 @@
cmake_minimum_required(VERSION 3.0)
project(xmrig C)
project(xmrig)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON)
include (CheckIncludeFile)
set(HEADERS
compat.h
algo/cryptonight/cryptonight.h
algo/cryptonight/cryptonight_aesni.h
algo/cryptonight/cryptonight_softaes.h
elist.h
xmrig.h
version.h
options.h
cpu.h
persistent_memory.h
stratum.h
stats.h
util.h
donate.h
src/3rdparty/align.h
src/App.h
src/Cpu.h
src/interfaces/IClientListener.h
src/interfaces/IJobResultListener.h
src/interfaces/ILogBackend.h
src/interfaces/IStrategy.h
src/interfaces/IStrategyListener.h
src/interfaces/IWorker.h
src/log/ConsoleLog.h
src/log/FileLog.h
src/log/Log.h
src/Mem.h
src/net/Client.h
src/net/Job.h
src/net/JobResult.h
src/net/Network.h
src/net/SubmitResult.h
src/net/Url.h
src/net/strategies/DonateStrategy.h
src/net/strategies/FailoverStrategy.h
src/net/strategies/SinglePoolStrategy.h
src/Options.h
src/Summary.h
src/version.h
src/workers/DoubleWorker.h
src/workers/Handle.h
src/workers/Hashrate.h
src/workers/SingleWorker.h
src/workers/Worker.h
src/workers/Workers.h
)
set(HEADERS_CRYPTO
crypto/c_groestl.h
crypto/c_blake256.h
crypto/c_jh.h
crypto/c_skein.h
)
set(HEADERS_COMPAT
compat/winansi.h
)
set(HEADERS_UTILS
utils/applog.h
utils/threads.h
utils/summary.h
src/crypto/c_blake256.h
src/crypto/c_groestl.h
src/crypto/c_jh.h
src/crypto/c_keccak.h
src/crypto/c_skein.h
src/crypto/CryptoNight.h
src/crypto/CryptoNight_p.h
src/crypto/CryptoNight_test.h
src/crypto/groestl_tables.h
src/crypto/hash.h
src/crypto/skein_port.h
)
set(SOURCES
xmrig.c
algo/cryptonight/cryptonight.c
algo/cryptonight/cryptonight_av1_aesni.c
algo/cryptonight/cryptonight_av2_aesni_double.c
algo/cryptonight/cryptonight_av3_softaes.c
algo/cryptonight/cryptonight_av4_softaes_double.c
util.c
options.c
stratum.c
stats.c
memory.c
src/App.cpp
src/log/ConsoleLog.cpp
src/log/FileLog.cpp
src/log/Log.cpp
src/Mem.cpp
src/net/Client.cpp
src/net/Job.cpp
src/net/Network.cpp
src/net/Url.cpp
src/net/strategies/DonateStrategy.cpp
src/net/strategies/FailoverStrategy.cpp
src/net/strategies/SinglePoolStrategy.cpp
src/Options.cpp
src/Summary.cpp
src/workers/DoubleWorker.cpp
src/workers/Handle.cpp
src/workers/Hashrate.cpp
src/workers/SingleWorker.cpp
src/workers/Worker.cpp
src/workers/Workers.cpp
src/xmrig.cpp
)
set(SOURCES_CRYPTO
crypto/c_keccak.c
crypto/c_groestl.c
crypto/c_blake256.c
crypto/c_jh.c
crypto/c_skein.c
crypto/soft_aes.c
)
set(SOURCES_UTILS
utils/applog.c
utils/summary.c
src/crypto/c_keccak.c
src/crypto/c_groestl.c
src/crypto/c_blake256.c
src/crypto/c_jh.c
src/crypto/c_skein.c
src/crypto/soft_aes.c
src/crypto/soft_aes.c
src/crypto/CryptoNight.cpp
)
if (WIN32)
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c win/app.rc compat/winansi.c)
set(EXTRA_LIBS ws2_32)
add_definitions(/D_WIN32_WINNT=0x600)
set(SOURCES_OS
res/app.rc
src/3rdparty/winansi.cpp
src/3rdparty/winansi.h
src/App_win.cpp
src/Cpu_win.cpp
src/Mem_win.cpp
src/net/Network_win.cpp
)
add_definitions(/DWIN32)
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
elseif (APPLE)
set(SOURCES_OS mac/cpu_mac.c mac/memory_mac.c mac/xmrig_mac.c)
set(SOURCES_OS
src/App_unix.cpp
src/Cpu_mac.cpp
src/Mem_unix.cpp
src/net/Network_mac.cpp
)
else()
set(SOURCES_OS unix/cpu_unix.c unix/memory_unix.c unix/xmrig_unix.c)
set(SOURCES_OS
src/App_unix.cpp
src/Cpu_unix.cpp
src/Mem_unix.cpp
src/net/Network_unix.cpp
)
set(EXTRA_LIBS pthread)
endif()
include_directories(.)
add_definitions(/DUSE_NATIVE_THREADS)
add_definitions(/D_GNU_SOURCE)
add_definitions(/DUNICODE)
#add_definitions(/DAPP_DEBUG)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
find_package(UV REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants")
else()
# https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_ID.html
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wall -Wno-strict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
endif()
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate")
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -Wall -std=c++14 -fno-exceptions -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
if (WIN32)
if (WIN32)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
else()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
endif()
add_definitions(/D_GNU_SOURCE)
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /Ot /Oi /MT /GL")
elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wall")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -Wall -std=c++14 -fno-exceptions -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
endif()
include_directories(compat/jansson)
add_subdirectory(compat/jansson)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
find_package(CURL REQUIRED)
include_directories(${CURL_INCLUDE_DIRS})
add_definitions(/DCURL_STATICLIB)
link_directories(${CURL_LIBRARIES})
if (WITH_LIBCPUID)
add_subdirectory(compat/libcpuid)
add_subdirectory(src/3rdparty/libcpuid)
include_directories(compat/libcpuid)
include_directories(src/3rdparty/libcpuid)
set(CPUID_LIB cpuid)
set(SOURCES_CPUID cpu.c)
set(SOURCES_CPUID src/Cpu.cpp)
else()
add_definitions(/DXMRIG_NO_LIBCPUID)
set(SOURCES_CPUID cpu_stub.c)
set(SOURCES_CPUID src/Cpu_stub.cpp)
endif()
if (WITH_AEON)
set(SOURCES_AEON
algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
algo/cryptonight-lite/cryptonight_lite_aesni.h
algo/cryptonight-lite/cryptonight_lite_softaes.h
)
else()
add_definitions(/DXMRIG_NO_AEON)
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
if (HAVE_SYSLOG_H)
add_definitions(/DHAVE_SYSLOG_H)
set(SOURCES_SYSLOG src/log/SysLog.h src/log/SysLog.cpp)
endif()
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
target_link_libraries(xmrig jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
else()
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
target_link_libraries(xmrig32 jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
endif()
include_directories(src)
include_directories(src/3rdparty)
include_directories(src/3rdparty/jansson)
include_directories(${UV_INCLUDE_DIR})
add_subdirectory(src/3rdparty/jansson)
add_executable(xmrig ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG})
target_link_libraries(xmrig jansson ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB})

View file

@ -9,7 +9,7 @@ Based on cpuminer-multi with heavy optimizations/rewrites and removing a lot of
* [Download](#download)
* [Usage](#usage)
* [Algorithm variations](#algorithm-variations)
* [Build](#build)
* [Build](https://github.com/xmrig/xmrig/wiki/Build)
* [Common Issues](#common-issues)
* [Other information](#other-information)
* [Donations](#donations)
@ -59,6 +59,7 @@ xmrig.exe -o xmr-eu.dwarfpool.com:8005 -u YOUR_WALLET -p x -k
--max-cpu-usage=N maximum cpu usage for automatic threads mode (default 75)
--safe safe adjust threads and av settings for current cpu
--nicehash enable nicehash support
--print-time=N print hashrate report every N seconds
-h, --help display this help and exit
-V, --version output version information and exit
```
@ -70,42 +71,6 @@ Since version 0.8.0.
* `--av=3` Software AES implementation.
* `--av=4` Lower power mode (double hash) of `3`.
## Build
### Ubuntu (Debian-based distros)
```
sudo apt-get install git build-essential cmake libcurl4-openssl-dev
git clone https://github.com/xmrig/xmrig.git
cd xmrig
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
make
```
### Windows
It's complicated, you need [MSYS2](http://www.msys2.org/), custom libcurl build, and of course CMake too.
Necessary MSYS2 packages:
```
pacman -Sy
pacman -S mingw-w64-x86_64-gcc
pacman -S make
pacman -S mingw-w64-x86_64-cmake
pacman -S mingw-w64-x86_64-pkg-config
```
Configure options for libcurl:
```
./configure --disable-shared --enable-optimize --enable-threaded-resolver --disable-libcurl-option --disable-ares --disable-rt --disable-ftp --disable-file --disable-ldap --disable-ldaps --disable-rtsp --disable-dict --disable-telnet --disable-tftp --disable-pop3 --disable-imap --disable-smb --disable-smtp --disable-gopher --disable-manual --disable-ipv6 --disable-sspi --disable-crypto-auth --disable-ntlm-wb --disable-tls-srp --disable-unix-sockets --without-zlib --without-winssl --without-ssl --without-libssh2 --without-nghttp2 --disable-cookies --without-ca-bundle --without-librtmp
```
CMake options:
```
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\<path>\curl-7.53.1\include" -DCURL_LIBRARY="c:\<path>\curl-7.53.1\lib\.libs"
```
### Optional features
`-DWITH_LIBCPUID=OFF` Disable libcpuid. Auto configuration of CPU after this will be very limited.
`-DWITH_AEON=OFF` Disable CryptoNight-Lite support.
## Common Issues
### HUGE PAGES unavailable
* Run XMRig as Administrator.

View file

@ -1,256 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_LITE_AESNI_H__
#define __CRYPTONIGHT_LITE_AESNI_H__
#include <x86intrin.h>
#define aes_genkey_sub(imm8) \
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
*xout0 = sl_xor(*xout0); \
*xout0 = _mm_xor_si128(*xout0, xout1); \
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
*xout2 = sl_xor(*xout2); \
*xout2 = _mm_xor_si128(*xout2, xout1); \
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x1)
}
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x2)
}
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x4)
}
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x8)
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub1(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub2(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub4(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub8(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */

View file

@ -1,77 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "cryptonight_lite_aesni.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
}

View file

@ -1,111 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "cryptonight_lite_aesni.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t* h1 = (uint64_t*) ctx->state1;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
}

View file

@ -1,77 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "cryptonight_lite_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
}

View file

@ -1,111 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "algo/cryptonight/cryptonight.h"
#include "cryptonight_lite_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t* h1 = (uint64_t*) ctx->state1;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
}

View file

@ -1,237 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__
#define __CRYPTONIGHT_LITE_SOFTAES_H__
#include <x86intrin.h>
extern __m128i soft_aesenc(__m128i in, __m128i key);
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x1);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x4);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x8);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEMORY_LITE / sizeof(__m128i); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY_LITE / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */

View file

@ -1,244 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <string.h>
#include <mm_malloc.h>
#ifndef BUILD_TEST
# include "xmrig.h"
#endif
#include "crypto/c_groestl.h"
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
#include "cryptonight.h"
#include "options.h"
const static char test_input[152] = {
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
};
const static char test_output0[64] = {
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
};
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
#ifndef XMRIG_NO_AEON
const static char test_output1[64] = {
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
};
void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
#endif
void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) = NULL;
static bool self_test() {
if (cryptonight_hash_ctx == NULL) {
return false;
}
char output[64];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
cryptonight_hash_ctx(test_input, 76, output, ctx);
_mm_free(ctx->memory);
_mm_free(ctx);
# ifndef XMRIG_NO_AEON
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
return memcmp(output, test_output1, (opt_double_hash ? 64 : 32)) == 0;
}
# endif
return memcmp(output, test_output0, (opt_double_hash ? 64 : 32)) == 0;
}
#ifndef XMRIG_NO_AEON
bool cryptonight_lite_init(int variant) {
switch (variant) {
case AEON_AV1_AESNI:
cryptonight_hash_ctx = cryptonight_lite_av1_aesni;
break;
case AEON_AV2_AESNI_DOUBLE:
opt_double_hash = true;
cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double;
break;
case AEON_AV3_SOFT_AES:
cryptonight_hash_ctx = cryptonight_lite_av3_softaes;
break;
case AEON_AV4_SOFT_AES_DOUBLE:
opt_double_hash = true;
cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double;
break;
default:
break;
}
return self_test();
}
#endif
bool cryptonight_init(int variant)
{
# ifndef XMRIG_NO_AEON
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
return cryptonight_lite_init(variant);
}
# endif
switch (variant) {
case XMR_AV1_AESNI:
cryptonight_hash_ctx = cryptonight_av1_aesni;
break;
case XMR_AV2_AESNI_DOUBLE:
opt_double_hash = true;
cryptonight_hash_ctx = cryptonight_av2_aesni_double;
break;
case XMR_AV3_SOFT_AES:
cryptonight_hash_ctx = cryptonight_av3_softaes;
break;
case XMR_AV4_SOFT_AES_DOUBLE:
opt_double_hash = true;
cryptonight_hash_ctx = cryptonight_av4_softaes_double;
break;
default:
break;
}
return self_test();
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash((uint8_t*)output, input, len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(input, len * 8, (uint8_t*)output);
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
jh_hash(32 * 8, input, 8 * len, (uint8_t*)output);
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
skein_hash(8 * 32, input, 8 * len, (uint8_t*)output);
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
#ifndef BUILD_TEST
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
do {
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
(*hashes_done)++;
if (unlikely(hash[7] < target)) {
return 1;
}
(*nonceptr)++;
} while (likely(((*nonceptr) < max_nonce && !work_restart[thr_id].restart)));
return 0;
}
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
int rc = 0;
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
do {
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
(*hashes_done) += 2;
if (unlikely(hash[7] < target)) {
return rc |= 1;
}
if (unlikely(hash[15] < target)) {
return rc |= 2;
}
if (rc) {
break;
}
(*nonceptr0)++;
(*nonceptr1)++;
} while (likely(((*nonceptr0) < max_nonce && !work_restart[thr_id].restart)));
return rc;
}
#endif

View file

@ -1,256 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_AESNI_H__
#define __CRYPTONIGHT_AESNI_H__
#include <x86intrin.h>
#define aes_genkey_sub(imm8) \
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, (imm8)); \
xout1 = _mm_shuffle_epi32(xout1, 0xFF); \
*xout0 = sl_xor(*xout0); \
*xout0 = _mm_xor_si128(*xout0, xout1); \
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);\
xout1 = _mm_shuffle_epi32(xout1, 0xAA); \
*xout2 = sl_xor(*xout2); \
*xout2 = _mm_xor_si128(*xout2, xout1); \
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub1(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x1)
}
static inline void aes_genkey_sub2(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x2)
}
static inline void aes_genkey_sub4(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x4)
}
static inline void aes_genkey_sub8(__m128i* xout0, __m128i* xout2)
{
aes_genkey_sub(0x8)
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub1(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub2(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub4(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub8(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
#endif /* __CRYPTONIGHT_AESNI_H__ */

View file

@ -1,77 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "crypto/c_keccak.h"
void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
}

View file

@ -1,111 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "cryptonight_aesni.h"
#include "crypto/c_keccak.h"
void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEMORY;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t* h1 = (uint64_t*) ctx->state1;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
}

View file

@ -1,77 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "cryptonight_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
}

View file

@ -1,111 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <x86intrin.h>
#include <string.h>
#include "cryptonight.h"
#include "cryptonight_softaes.h"
#include "crypto/c_keccak.h"
void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEMORY;
uint64_t* h0 = (uint64_t*) ctx->state0;
uint64_t* h1 = (uint64_t*) ctx->state1;
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
lo = _umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
lo = _umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
}

View file

@ -1,237 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_SOFTAES_H__
#define __CRYPTONIGHT_SOFTAES_H__
#include <x86intrin.h>
extern __m128i soft_aesenc(__m128i in, __m128i key);
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = xout0;
*k1 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x1);
*k2 = xout0;
*k3 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x2);
*k4 = xout0;
*k5 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x4);
*k6 = xout0;
*k7 = xout2;
aes_genkey_sub(&xout0, &xout2, 0x8);
*k8 = xout0;
*k9 = xout2;
}
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEMORY / sizeof(__m128i); i += 8) {
aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
// This is more than we have registers, compiler will assign 2 keys on the stack
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; __builtin_expect(i < MEMORY / sizeof(__m128i), 1); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
#if defined(__x86_64__)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
#elif defined(__i386__)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
#endif /* __CRYPTONIGHT_SOFTAES_H__ */

8
cmake/FindUV.cmake Normal file
View file

@ -0,0 +1,8 @@
find_path(UV_INCLUDE_DIR NAMES uv.h)
find_library(UV_LIBRARY NAMES uv libuv)
set(UV_LIBRARIES ${UV_LIBRARY})
set(UV_INCLUDE_DIRS ${UV_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(UV DEFAULT_MSG UV_LIBRARY UV_INCLUDE_DIR)

View file

@ -1,218 +0,0 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
int _current_verboselevel;
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
{
int i;
for (i = 0; i < count; i++)
if (reg & (1u << matchtable[i].bit))
data->flags[matchtable[i].feature] = 1;
}
static void default_warn(const char *msg)
{
fprintf(stderr, "%s", msg);
}
libcpuid_warn_fn_t _warn_fun = default_warn;
#if defined(_MSC_VER)
# define vsnprintf _vsnprintf
#endif
void warnf(const char* format, ...)
{
char buff[1024];
va_list va;
if (!_warn_fun) return;
va_start(va, format);
vsnprintf(buff, sizeof(buff), format, va);
va_end(va);
_warn_fun(buff);
}
void debugf(int verboselevel, const char* format, ...)
{
char buff[1024];
va_list va;
if (verboselevel > _current_verboselevel) return;
va_start(va, format);
vsnprintf(buff, sizeof(buff), format, va);
va_end(va);
_warn_fun(buff);
}
static int popcount64(uint64_t mask)
{
int num_set_bits = 0;
while (mask) {
mask &= mask - 1;
num_set_bits++;
}
return num_set_bits;
}
static int score(const struct match_entry_t* entry, const struct cpu_id_t* data,
int brand_code, uint64_t bits, int model_code)
{
int res = 0;
if (entry->family == data->family ) res += 2;
if (entry->model == data->model ) res += 2;
if (entry->stepping == data->stepping ) res += 2;
if (entry->ext_family == data->ext_family) res += 2;
if (entry->ext_model == data->ext_model ) res += 2;
if (entry->ncores == data->num_cores ) res += 2;
if (entry->l2cache == data->l2_cache ) res += 1;
if (entry->l3cache == data->l3_cache ) res += 1;
if (entry->brand_code == brand_code ) res += 2;
if (entry->model_code == model_code ) res += 2;
res += popcount64(entry->model_bits & bits) * 2;
return res;
}
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
struct cpu_id_t* data, int brand_code, uint64_t bits,
int model_code)
{
int bestscore = -1;
int bestindex = 0;
int i, t;
debugf(3, "Matching cpu f:%d, m:%d, s:%d, xf:%d, xm:%d, ncore:%d, l2:%d, bcode:%d, bits:%llu, code:%d\n",
data->family, data->model, data->stepping, data->ext_family,
data->ext_model, data->num_cores, data->l2_cache, brand_code, (unsigned long long) bits, model_code);
for (i = 0; i < count; i++) {
t = score(&matchtable[i], data, brand_code, bits, model_code);
debugf(3, "Entry %d, `%s', score %d\n", i, matchtable[i].name, t);
if (t > bestscore) {
debugf(2, "Entry `%s' selected - best score so far (%d)\n", matchtable[i].name, t);
bestscore = t;
bestindex = i;
}
}
strcpy(data->cpu_codename, matchtable[bestindex].name);
return bestscore;
}
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
struct cpu_list_t* list)
{
int i, j, n, good;
n = 0;
list->names = (char**) malloc(sizeof(char*) * count);
for (i = 0; i < count; i++) {
if (strstr(matchtable[i].name, "Unknown")) continue;
good = 1;
for (j = n - 1; j >= 0; j--)
if (!strcmp(list->names[j], matchtable[i].name)) {
good = 0;
break;
}
if (!good) continue;
#if defined(_MSC_VER)
list->names[n++] = _strdup(matchtable[i].name);
#else
list->names[n++] = strdup(matchtable[i].name);
#endif
}
list->num_entries = n;
}
static int xmatch_entry(char c, const char* p)
{
int i, j;
if (c == 0) return -1;
if (c == p[0]) return 1;
if (p[0] == '.') return 1;
if (p[0] == '#' && isdigit(c)) return 1;
if (p[0] == '[') {
j = 1;
while (p[j] && p[j] != ']') j++;
if (!p[j]) return -1;
for (i = 1; i < j; i++)
if (p[i] == c) return j + 1;
}
return -1;
}
int match_pattern(const char* s, const char* p)
{
int i, j, dj, k, n, m;
n = (int) strlen(s);
m = (int) strlen(p);
for (i = 0; i < n; i++) {
if (xmatch_entry(s[i], p) != -1) {
j = 0;
k = 0;
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
k++;
j += dj;
}
if (j == m) return i + 1;
}
}
return 0;
}
struct cpu_id_t* get_cached_cpuid(void)
{
static int initialized = 0;
static struct cpu_id_t id;
if (initialized) return &id;
if (cpu_identify(NULL, &id))
memset(&id, 0, sizeof(id));
initialized = 1;
return &id;
}
int match_all(uint64_t bits, uint64_t mask)
{
return (bits & mask) == mask;
}
void debug_print_lbits(int debuglevel, uint64_t mask)
{
int i, first = 0;
for (i = 0; i < 64; i++) if (mask & (((uint64_t) 1) << i)) {
if (first) first = 0;
else debugf(2, " + ");
debugf(2, "LBIT(%d)", i);
}
debugf(2, "\n");
}

View file

@ -1,549 +0,0 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_amd.h"
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "amd_code_t.h"
#undef CODE
};
struct amd_code_and_bits_t {
int code;
uint64_t bits;
};
enum _amd_bits_t {
ATHLON_ = LBIT( 0 ),
_XP_ = LBIT( 1 ),
_M_ = LBIT( 2 ),
_MP_ = LBIT( 3 ),
MOBILE_ = LBIT( 4 ),
DURON_ = LBIT( 5 ),
SEMPRON_ = LBIT( 6 ),
OPTERON_ = LBIT( 7 ),
TURION_ = LBIT( 8 ),
_LV_ = LBIT( 9 ),
_64_ = LBIT( 10 ),
_X2 = LBIT( 11 ),
_X3 = LBIT( 12 ),
_X4 = LBIT( 13 ),
_X6 = LBIT( 14 ),
_FX = LBIT( 15 ),
};
typedef enum _amd_bits_t amd_bits_t;
enum _amd_model_codes_t {
// Only for Ryzen CPUs:
_1400,
_1500,
_1600,
};
const struct match_entry_t cpudb_amd[] = {
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD CPU" },
/* 486 and the likes */
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 486" },
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2" },
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX2WB" },
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4" },
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "AMD 486DX4WB" },
/* Pentia clones */
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown AMD 586" },
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K5" },
/* The K6 */
{ 5, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6" },
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2" },
{ 5, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-III" },
{ 5, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 12, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown K6" },
{ 5, 13, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "K6-2+" },
/* Athlon et al. */
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
{ 6, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (Slot-A)" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Duron (Spitfire)" },
{ 6, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon (ThunderBird)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Palomino)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Athlon XP" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Morgan)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_ , 0, "Athlon XP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, DURON_ , 0, "Duron (Applebred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Thoroughbred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (T-Bred)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (T-Bred)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Athlon XP (Barton)" },
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Barton)" },
{ 6, 10, -1, -1, -1, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron (Barton)" },
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron (Thorton)" },
{ 6, 10, -1, -1, -1, 1, 256, -1, NC, ATHLON_|_XP_ , 0, "Athlon XP (Thorton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_MP_ , 0, "Athlon MP (Barton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_ , 0, "Mobile Athlon (Barton)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, ATHLON_|_XP_|_M_|_LV_, 0, "Mobile Athlon (Barton)" },
/* K8 Architecture */
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown K8" },
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Unknown K9" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, 0 , 0, "Unknown A64" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, OPTERON_ , 0, "Opteron" },
{ 15, 3, -1, 15, -1, 2, -1, -1, NC, OPTERON_|_X2 , 0, "Opteron (Dual Core)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (512K)" },
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (1024K)" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_FX , 0, "Athlon FX" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX" },
{ 15, 3, -1, 15, 35, 2, -1, -1, NC, ATHLON_|_64_|_FX , 0, "Athlon 64 FX X2 (Toledo)" },
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (512K)" },
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (1024K)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (512K)" },
{ 15, -1, -1, 15, -1, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (1024K)" },
{ 15, -1, -1, 15, -1, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (512K)" },
{ 15, -1, -1, 15, -1, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion 64 X2 (1024K)" },
{ 15, -1, -1, 15, -1, 1, 128, -1, NC, SEMPRON_ , 0, "A64 Sempron (128K)" },
{ 15, -1, -1, 15, -1, 1, 256, -1, NC, SEMPRON_ , 0, "A64 Sempron (256K)" },
{ 15, -1, -1, 15, -1, 1, 512, -1, NC, SEMPRON_ , 0, "A64 Sempron (512K)" },
{ 15, -1, -1, 15, 0x4f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
{ 15, -1, -1, 15, 0x5f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/512K)" },
{ 15, -1, -1, 15, 0x2f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
{ 15, -1, -1, 15, 0x2c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Venice/512K)" },
{ 15, -1, -1, 15, 0x1f, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Winchester/512K)" },
{ 15, -1, -1, 15, 0x0c, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Newcastle/512K)" },
{ 15, -1, -1, 15, 0x27, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
{ 15, -1, -1, 15, 0x37, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/512K)" },
{ 15, -1, -1, 15, 0x04, 1, 512, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/512K)" },
{ 15, -1, -1, 15, 0x5f, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (Orleans/1024K)" },
{ 15, -1, -1, 15, 0x27, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (San Diego/1024K)" },
{ 15, -1, -1, 15, 0x04, 1, 1024, -1, NC, ATHLON_|_64_ , 0, "Athlon 64 (ClawHammer/1024K)" },
{ 15, -1, -1, 15, 0x4b, 2, 256, -1, NC, SEMPRON_ , 0, "Athlon 64 X2 (Windsor/256K)" },
{ 15, -1, -1, 15, 0x23, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/512K)" },
{ 15, -1, -1, 15, 0x4b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
{ 15, -1, -1, 15, 0x43, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/512K)" },
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Brisbane/512K)" },
{ 15, -1, -1, 15, 0x2b, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Manchester/512K)"},
{ 15, -1, -1, 15, 0x23, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Toledo/1024K)" },
{ 15, -1, -1, 15, 0x43, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon 64 X2 (Windsor/1024K)" },
{ 15, -1, -1, 15, 0x08, 1, 128, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/128K)"},
{ 15, -1, -1, 15, 0x08, 1, 256, -1, NC, MOBILE_|SEMPRON_ , 0, "Mobile Sempron 64 (Dublin/256K)"},
{ 15, -1, -1, 15, 0x0c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Paris)" },
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x1c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/128K)"},
{ 15, -1, -1, 15, 0x1c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Sonora/256K)"},
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x2c, 1, 128, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/128K)"},
{ 15, -1, -1, 15, 0x2c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Albany/256K)"},
{ 15, -1, -1, 15, 0x2f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/128K)" },
{ 15, -1, -1, 15, 0x2f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Palermo/256K)" },
{ 15, -1, -1, 15, 0x4f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
{ 15, -1, -1, 15, 0x4f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
{ 15, -1, -1, 15, 0x5f, 1, 128, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/128K)" },
{ 15, -1, -1, 15, 0x5f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Manila/256K)" },
{ 15, -1, -1, 15, 0x6b, 2, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/256K)"},
{ 15, -1, -1, 15, 0x6b, 2, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 Dual (Sherman/512K)"},
{ 15, -1, -1, 15, 0x7f, 1, 256, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/256K)" },
{ 15, -1, -1, 15, 0x7f, 1, 512, -1, NC, SEMPRON_ , 0, "Sempron 64 (Sparta/512K)" },
{ 15, -1, -1, 15, 0x4c, 1, 256, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/256K)"},
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, MOBILE_| SEMPRON_ , 0, "Mobile Sempron 64 (Keene/512K)"},
{ 15, -1, -1, 15, -1, 2, -1, -1, NC, SEMPRON_ , 0, "Sempron Dual Core" },
{ 15, -1, -1, 15, 0x24, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/512K)" },
{ 15, -1, -1, 15, 0x24, 1, 1024, -1, NC, TURION_|_64_ , 0, "Turion 64 (Lancaster/1024K)" },
{ 15, -1, -1, 15, 0x48, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Taylor)" },
{ 15, -1, -1, 15, 0x48, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Trinidad)" },
{ 15, -1, -1, 15, 0x4c, 1, 512, -1, NC, TURION_|_64_ , 0, "Turion 64 (Richmond)" },
{ 15, -1, -1, 15, 0x68, 2, 256, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/256K)" },
{ 15, -1, -1, 15, 0x68, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Tyler/512K)" },
{ 15, -1, -1, 17, 3, 2, 512, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/512K)" },
{ 15, -1, -1, 17, 3, 2, 1024, -1, NC, TURION_|_X2 , 0, "Turion X2 (Griffin/1024K)" },
/* K10 Architecture (2007) */
{ 15, -1, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Unknown AMD Phenom" },
{ 15, 2, -1, 16, -1, 1, -1, -1, PHENOM, 0 , 0, "Phenom" },
{ 15, 2, -1, 16, -1, 3, -1, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman)" },
{ 15, 2, -1, 16, -1, 4, -1, -1, PHENOM, 0 , 0, "Phenom X4 (Agena)" },
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/256K)" },
{ 15, 2, -1, 16, -1, 3, 512, -1, PHENOM, 0 , 0, "Phenom X3 (Toliman/512K)" },
{ 15, 2, -1, 16, -1, 4, 128, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/128K)" },
{ 15, 2, -1, 16, -1, 4, 256, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/256K)" },
{ 15, 2, -1, 16, -1, 4, 512, -1, PHENOM, 0 , 0, "Phenom X4 (Agena/512K)" },
{ 15, 2, -1, 16, -1, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon X2 (Kuma)" },
/* Phenom II derivates: */
{ 15, 4, -1, 16, -1, 4, -1, -1, NC, 0 , 0, "Phenom (Deneb-based)" },
{ 15, 4, -1, 16, -1, 1, 1024, -1, NC, SEMPRON_ , 0, "Sempron (Sargas)" },
{ 15, 4, -1, 16, -1, 2, 512, -1, PHENOM2, 0 , 0, "Phenom II X2 (Callisto)" },
{ 15, 4, -1, 16, -1, 3, 512, -1, PHENOM2, 0 , 0, "Phenom II X3 (Heka)" },
{ 15, 4, -1, 16, -1, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4" },
{ 15, 4, -1, 16, 4, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
{ 15, 5, -1, 16, 5, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Deneb)" },
{ 15, 4, -1, 16, 10, 4, 512, -1, PHENOM2, 0 , 0, "Phenom II X4 (Zosma)" },
{ 15, 4, -1, 16, 10, 6, 512, -1, PHENOM2, 0 , 0, "Phenom II X6 (Thuban)" },
/* Athlon II derivates: */
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_X2 , 0, "Athlon II (Champlain)" },
{ 15, 6, -1, 16, 6, 2, 512, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
{ 15, 6, -1, 16, 6, 2, 1024, -1, NC, ATHLON_|_64_|_X2 , 0, "Athlon II X2 (Regor)" },
{ 15, 5, -1, 16, 5, 3, 512, -1, NC, ATHLON_|_64_|_X3 , 0, "Athlon II X3 (Rana)" },
{ 15, 5, -1, 16, 5, 4, 512, -1, NC, ATHLON_|_64_|_X4 , 0, "Athlon II X4 (Propus)" },
/* Llano APUs (2011): */
{ 15, 1, -1, 18, 1, 2, -1, -1, FUSION_EA, 0 , 0, "Llano X2" },
{ 15, 1, -1, 18, 1, 3, -1, -1, FUSION_EA, 0 , 0, "Llano X3" },
{ 15, 1, -1, 18, 1, 4, -1, -1, FUSION_EA, 0 , 0, "Llano X4" },
/* Family 14h: Bobcat Architecture (2011) */
{ 15, 2, -1, 20, -1, 1, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario" },
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_C, 0 , 0, "Brazos Ontario (Dual-core)" },
{ 15, 1, -1, 20, -1, 1, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate" },
{ 15, 1, -1, 20, -1, 2, -1, -1, FUSION_E, 0 , 0, "Brazos Zacate (Dual-core)" },
{ 15, 2, -1, 20, -1, 2, -1, -1, FUSION_Z, 0 , 0, "Brazos Desna (Dual-core)" },
/* Family 15h: Bulldozer Architecture (2011) */
{ 15, -1, -1, 21, 0, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
{ 15, -1, -1, 21, 1, 4, -1, -1, NC, 0 , 0, "Bulldozer X2" },
{ 15, -1, -1, 21, 1, 6, -1, -1, NC, 0 , 0, "Bulldozer X3" },
{ 15, -1, -1, 21, 1, 8, -1, -1, NC, 0 , 0, "Bulldozer X4" },
/* 2nd-gen, Piledriver core (2012): */
{ 15, -1, -1, 21, 2, 4, -1, -1, NC, 0 , 0, "Vishera X2" },
{ 15, -1, -1, 21, 2, 6, -1, -1, NC, 0 , 0, "Vishera X3" },
{ 15, -1, -1, 21, 2, 8, -1, -1, NC, 0 , 0, "Vishera X4" },
{ 15, 0, -1, 21, 16, 2, -1, -1, FUSION_A, 0 , 0, "Trinity X2" },
{ 15, 0, -1, 21, 16, 4, -1, -1, FUSION_A, 0 , 0, "Trinity X4" },
{ 15, 3, -1, 21, 19, 2, -1, -1, FUSION_A, 0 , 0, "Richland X2" },
{ 15, 3, -1, 21, 19, 4, -1, -1, FUSION_A, 0 , 0, "Richland X4" },
/* 3rd-gen, Steamroller core (2014): */
{ 15, 0, -1, 21, 48, 2, -1, -1, FUSION_A, 0 , 0, "Kaveri X2" },
{ 15, 0, -1, 21, 48, 4, -1, -1, FUSION_A, 0 , 0, "Kaveri X4" },
{ 15, 8, -1, 21, 56, 4, -1, -1, FUSION_A, 0 , 0, "Godavari X4" },
/* 4th-gen, Excavator core (2015): */
{ 15, 1, -1, 21, 96, 2, -1, -1, FUSION_A, 0 , 0, "Carrizo X2" },
{ 15, 1, -1, 21, 96, 4, -1, -1, FUSION_A, 0 , 0, "Carrizo X4" },
{ 15, 5, -1, 21, 101, 2, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X2" },
{ 15, 5, -1, 21, 101, 4, -1, -1, FUSION_A, 0 , 0, "Bristol Ridge X4" },
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_A, 0 , 0, "Stoney Ridge X2" },
{ 15, 0, -1, 21, 112, 2, -1, -1, FUSION_E, 0 , 0, "Stoney Ridge X2" },
/* Family 16h: Jaguar Architecture (2013) */
{ 15, 0, -1, 22, 0, 2, -1, -1, FUSION_A, 0 , 0, "Kabini X2" },
{ 15, 0, -1, 22, 0, 4, -1, -1, FUSION_A, 0 , 0, "Kabini X4" },
/* 2nd-gen, Puma core (2013): */
{ 15, 0, -1, 22, 48, 2, -1, -1, FUSION_E, 0 , 0, "Mullins X2" },
{ 15, 0, -1, 22, 48, 4, -1, -1, FUSION_A, 0 , 0, "Mullins X4" },
/* Family 17h: Zen Architecture (2017) */
{ 15, -1, -1, 23, 1, 8, -1, -1, NC, 0 , 0, "Ryzen 7" },
{ 15, -1, -1, 23, 1, 6, -1, -1, NC, 0 , _1600, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1500, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , _1400, "Ryzen 5" },
{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Ryzen 3" },
//{ 15, -1, -1, 23, 1, 4, -1, -1, NC, 0 , 0, "Raven Ridge" }, //TBA
/* Newer Opterons: */
{ 15, 9, -1, 22, 9, 8, -1, -1, NC, OPTERON_ , 0, "Magny-Cours Opteron" },
};
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_NX },
{ 22, CPU_FEATURE_MMXEXT },
{ 25, CPU_FEATURE_FXSR_OPT },
{ 30, CPU_FEATURE_3DNOWEXT },
{ 31, CPU_FEATURE_3DNOW },
};
const struct feature_map_t matchtable_ecx81[] = {
{ 1, CPU_FEATURE_CMP_LEGACY },
{ 2, CPU_FEATURE_SVM },
{ 5, CPU_FEATURE_ABM },
{ 6, CPU_FEATURE_SSE4A },
{ 7, CPU_FEATURE_MISALIGNSSE },
{ 8, CPU_FEATURE_3DNOWPREFETCH },
{ 9, CPU_FEATURE_OSVW },
{ 10, CPU_FEATURE_IBS },
{ 11, CPU_FEATURE_XOP },
{ 12, CPU_FEATURE_SKINIT },
{ 13, CPU_FEATURE_WDT },
{ 16, CPU_FEATURE_FMA4 },
{ 21, CPU_FEATURE_TBM },
};
const struct feature_map_t matchtable_edx87[] = {
{ 0, CPU_FEATURE_TS },
{ 1, CPU_FEATURE_FID },
{ 2, CPU_FEATURE_VID },
{ 3, CPU_FEATURE_TTP },
{ 4, CPU_FEATURE_TM_AMD },
{ 5, CPU_FEATURE_STC },
{ 6, CPU_FEATURE_100MHZSTEPS },
{ 7, CPU_FEATURE_HWPSTATE },
/* id 8 is handled in common */
{ 9, CPU_FEATURE_CPB },
{ 10, CPU_FEATURE_APERFMPERF },
{ 11, CPU_FEATURE_PFI },
{ 12, CPU_FEATURE_PA },
};
if (raw->ext_cpuid[0][0] >= 0x80000001) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000007)
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
/* We have the extended info about SSE unit size */
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
}
}
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int l3_result;
const int assoc_table[16] = {
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
};
unsigned n = raw->ext_cpuid[0][0];
if (n >= 0x80000005) {
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
}
if (n >= 0x80000006) {
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
l3_result = (raw->ext_cpuid[6][3] >> 18);
if (l3_result > 0) {
l3_result = 512 * l3_result; /* AMD spec says it's a range,
but we take the lower bound */
data->l3_cache = l3_result;
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
} else {
data->l3_cache = 0;
}
}
}
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->ext_cpuid[0][0] >= 8) {
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
if (data->ext_family >= 23)
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
static int amd_has_turion_modelname(const char *bs)
{
/* We search for something like TL-60. Ahh, I miss regexes...*/
int i, l, k;
char code[3] = {0};
const char* codes[] = { "ML", "MT", "MK", "TK", "TL", "RM", "ZM", "" };
l = (int) strlen(bs);
for (i = 3; i < l - 2; i++) {
if (bs[i] == '-' &&
isupper(bs[i-1]) && isupper(bs[i-2]) && !isupper(bs[i-3]) &&
isdigit(bs[i+1]) && isdigit(bs[i+2]) && !isdigit(bs[i+3]))
{
code[0] = bs[i-2];
code[1] = bs[i-1];
for (k = 0; codes[k][0]; k++)
if (!strcmp(codes[k], code)) return 1;
}
}
return 0;
}
static struct amd_code_and_bits_t decode_amd_codename_part1(const char *bs)
{
amd_code_t code = NC;
uint64_t bits = 0;
struct amd_code_and_bits_t result;
if (strstr(bs, "Dual Core") ||
strstr(bs, "Dual-Core") ||
strstr(bs, " X2 "))
bits |= _X2;
if (strstr(bs, " X4 ")) bits |= _X4;
if (strstr(bs, " X3 ")) bits |= _X3;
if (strstr(bs, "Opteron")) bits |= OPTERON_;
if (strstr(bs, "Phenom")) {
code = (strstr(bs, "II")) ? PHENOM2 : PHENOM;
}
if (amd_has_turion_modelname(bs)) {
bits |= TURION_;
}
if (strstr(bs, "Athlon(tm)")) bits |= ATHLON_;
if (strstr(bs, "Sempron(tm)")) bits |= SEMPRON_;
if (strstr(bs, "Duron")) bits |= DURON_;
if (strstr(bs, " 64 ")) bits |= _64_;
if (strstr(bs, " FX")) bits |= _FX;
if (strstr(bs, " MP")) bits |= _MP_;
if (strstr(bs, "Athlon(tm) 64") || strstr(bs, "Athlon(tm) II X") || match_pattern(bs, "Athlon(tm) X#")) {
bits |= ATHLON_ | _64_;
}
if (strstr(bs, "Turion")) bits |= TURION_;
if (strstr(bs, "mobile") || strstr(bs, "Mobile")) {
bits |= MOBILE_;
}
if (strstr(bs, "XP")) bits |= _XP_;
if (strstr(bs, "XP-M")) bits |= _M_;
if (strstr(bs, "(LV)")) bits |= _LV_;
if (match_pattern(bs, "C-##")) code = FUSION_C;
if (match_pattern(bs, "E-###")) code = FUSION_E;
if (match_pattern(bs, "Z-##")) code = FUSION_Z;
if (match_pattern(bs, "E#-####") || match_pattern(bs, "A#-####")) code = FUSION_EA;
result.code = code;
result.bits = bits;
return result;
}
static int decode_amd_ryzen_model_code(const char* bs)
{
const struct {
int model_code;
const char* match_str;
} patterns[] = {
{ _1600, "1600" },
{ _1500, "1500" },
{ _1400, "1400" },
};
int i;
for (i = 0; i < COUNT_OF(patterns); i++)
if (strstr(bs, patterns[i].match_str))
return patterns[i].model_code;
//
return 0;
}
static void decode_amd_codename(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
struct amd_code_and_bits_t code_and_bits = decode_amd_codename_part1(data->brand_str);
int i = 0;
char* code_str = NULL;
int model_code;
for (i = 0; i < COUNT_OF(amd_code_str); i++) {
if (code_and_bits.code == amd_code_str[i].code) {
code_str = amd_code_str[i].str;
break;
}
}
if (/*code == ATHLON_64_X2*/ match_all(code_and_bits.bits, ATHLON_|_64_|_X2) && data->l2_cache < 512) {
code_and_bits.bits &= ~(ATHLON_ | _64_);
code_and_bits.bits |= SEMPRON_;
}
if (code_str)
debugf(2, "Detected AMD brand code: %d (%s)\n", code_and_bits.code, code_str);
else
debugf(2, "Detected AMD brand code: %d\n", code_and_bits.code);
if (code_and_bits.bits) {
debugf(2, "Detected AMD bits: ");
debug_print_lbits(2, code_and_bits.bits);
}
// is it Ryzen? if so, we need to detect discern between the four-core 1400/1500 (Ryzen 5) and the four-core Ryzen 3:
model_code = (data->ext_family == 23) ? decode_amd_ryzen_model_code(data->brand_str) : 0;
internal->code.amd = code_and_bits.code;
internal->bits = code_and_bits.bits;
internal->score = match_cpu_codename(cpudb_amd, COUNT_OF(cpudb_amd), data, code_and_bits.code,
code_and_bits.bits, model_code);
}
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
load_amd_features(raw, data);
decode_amd_cache_info(raw, data);
decode_amd_number_of_cores(raw, data);
decode_amd_codename(raw, data, internal);
return 0;
}
void cpuid_get_list_amd(struct cpu_list_t* list)
{
generic_get_cpu_list(cpudb_amd, COUNT_OF(cpudb_amd), list);
}

View file

@ -1,935 +0,0 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_intel.h"
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "intel_code_t.h"
#undef CODE
};
typedef struct {
int code;
uint64_t bits;
} intel_code_and_bits_t;
enum _intel_model_t {
UNKNOWN = -1,
_3000 = 100,
_3100,
_3200,
X3200,
_3300,
X3300,
_5100,
_5200,
_5300,
_5400,
_2xxx, /* Core i[357] 2xxx */
_3xxx, /* Core i[357] 3xxx */
};
typedef enum _intel_model_t intel_model_t;
enum _intel_bits_t {
PENTIUM_ = LBIT( 0 ),
CELERON_ = LBIT( 1 ),
MOBILE_ = LBIT( 2 ),
CORE_ = LBIT( 3 ),
_I_ = LBIT( 4 ),
_M_ = LBIT( 5 ),
_3 = LBIT( 6 ),
_5 = LBIT( 7 ),
_7 = LBIT( 8 ),
XEON_ = LBIT( 9 ),
_MP = LBIT( 10 ),
ATOM_ = LBIT( 11 ),
};
typedef enum _intel_bits_t intel_bits_t;
const struct match_entry_t cpudb_intel[] = {
{ -1, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Intel CPU" },
/* i486 */
{ 4, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown i486" },
{ 4, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-25/33" },
{ 4, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX-50" },
{ 4, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX" },
{ 4, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2" },
{ 4, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SL" },
{ 4, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 SX2" },
{ 4, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX2 WriteBack" },
{ 4, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4" },
{ 4, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "i486 DX4 WriteBack" },
/* All Pentia:
Pentium 1 */
{ 5, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium" },
{ 5, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium A-Step" },
{ 5, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.8u)" },
{ 5, 2, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium OverDrive" },
{ 5, 4, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium 1 (0.35u)" },
{ 5, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium MMX (0.25u)" },
/* Pentium 2 / 3 / M / Conroe / whatsnext - all P6 based. */
{ 6, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown P6" },
{ 6, 0, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
{ 6, 1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium Pro" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Klamath)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium II (Deschutes)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile Pentium II (Tonga)"},
{ 6, 6, -1, -1, -1, 1, -1, -1, NC,0 , 0, "Pentium II (Dixon)" },
{ 6, 3, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Klamath)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Drake)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-II Xeon (Dixon)" },
{ 6, 5, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Covington)" },
{ 6, 6, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-II Celeron (Mendocino)" },
/* -------------------------------------------------- */
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Katmai)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Coppermine)"},
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Pentium III (Tualatin)" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tanner)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Cascades)" },
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, XEON_ , 0, "P-III Xeon (Tualatin)" },
{ 6, 7, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Katmai)" },
{ 6, 8, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
{ 6, 10, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Coppermine)" },
{ 6, 11, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "P-III Celeron (Tualatin)" },
/* Netburst based (Pentium 4 and later)
classic P4s */
{ 15, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium 4" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "Unknown P-4 Celeron" },
{ 15, -1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Unknown Xeon" },
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium 4 (Cedar Mill)" },
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, MOBILE_|PENTIUM_, 0, "Mobile P-4 (Cedar Mill)" },
/* server CPUs */
{ 15, 0, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Foster)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Prestonia)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Gallatin)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Nocona)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, IRWIN, XEON_ , 0, "Xeon (Irwindale)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, XEON_|_MP , 0, "Xeon (Cranford)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, POTOMAC, XEON_ , 0, "Xeon (Potomac)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, XEON_ , 0, "Xeon (Dempsey)" },
/* Pentium Ds */
{ 15, 4, 4, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (SmithField)" },
{ 15, 4, 7, 15, -1, 1, -1, -1, NC, 0 , 0, "Pentium D (SmithField)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, PENTIUM_D, 0 , 0, "Pentium D (Presler)" },
/* Celeron and Celeron Ds */
{ 15, 1, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Willamette)" },
{ 15, 2, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron (Northwood)" },
{ 15, 3, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
{ 15, 4, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Prescott)" },
{ 15, 6, -1, 15, -1, 1, -1, -1, NC, CELERON_ , 0, "P-4 Celeron D (Cedar Mill)" },
/* -------------------------------------------------- */
/* Intel Core microarchitecture - P6-based */
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Pentium M" },
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Unknown Pentium M" },
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Banias)" },
{ 6, 9, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Banias)" },
{ 6, 9, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, PENTIUM_ , 0, "Pentium M (Dothan)" },
{ 6, 13, -1, -1, -1, 1, -1, -1, PENTIUM_M, 0 , 0, "Pentium M (Dothan)" },
{ 6, 13, -1, -1, -1, 1, -1, -1, NC, CELERON_ , 0, "Celeron M" },
{ 6, 12, -1, -1, -1, -1, -1, -1, NC, ATOM_ , 0, "Unknown Atom" },
{ 6, 12, -1, -1, -1, -1, -1, -1, DIAMONDVILLE,ATOM_, 0, "Atom (Diamondville)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, SILVERTHORNE,ATOM_, 0, "Atom (Silverthorne)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
{ 6, 6, -1, -1, -1, -1, -1, -1, CEDARVIEW, ATOM_ , 0, "Atom (Cedarview)" },
{ 6, 12, -1, -1, -1, -1, -1, -1, PINEVIEW, ATOM_ , 0, "Atom (Pineview)" },
/* -------------------------------------------------- */
{ 6, 14, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Yonah" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO, 0 , 0, "Yonah (Core Duo)" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, MOBILE_, 0, "Yonah (Core Solo)" },
{ 6, 14, -1, -1, -1, 2, -1, -1, CORE_DUO , MOBILE_, 0, "Yonah (Core Duo)" },
{ 6, 14, -1, -1, -1, 1, -1, -1, CORE_SOLO, 0 , 0, "Yonah (Core Solo)" },
{ 6, 15, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Unknown Core 2" },
{ 6, 15, -1, -1, -1, 2, 4096, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, 1024, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 1024K" },
{ 6, 15, -1, -1, -1, 2, 512, -1, CORE_DUO, 0 , 0, "Conroe (Core 2 Duo) 512K" },
{ 6, 15, -1, -1, -1, 4, -1, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
{ 6, 15, -1, -1, -1, 4, 4096, -1, QUAD_CORE, 0 , 0, "Kentsfield (Core 2 Quad)" },
{ 6, 15, -1, -1, -1, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 15, -1, -1, -1, 2, 2048, -1, CORE_DUO, 0 , 0, "Allendale (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, -1, -1, MOBILE_CORE_DUO, 0, 0, "Merom (Core 2 Duo)" },
{ 6, 15, -1, -1, -1, 2, 2048, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 2048K" },
{ 6, 15, -1, -1, -1, 2, 4096, -1, MEROM, 0 , 0, "Merom (Core 2 Duo) 4096K" },
{ 6, 15, -1, -1, 15, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, CELERON_ , 0, "Conroe-L (Celeron)" },
{ 6, 15, -1, -1, 15, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
{ 6, 6, -1, -1, 22, 2, -1, -1, NC, CELERON_ , 0, "Conroe-L (Allendale)" },
{ 6, 6, -1, -1, 22, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
{ 6, 7, -1, -1, 23, 1, -1, -1, NC, 0 , 0, "Unknown Core ?" },
{ 6, 6, -1, -1, 22, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 7, -1, -1, 23, 400, -1, -1, MORE_THAN_QUADCORE, 0, 0, "More than quad-core" },
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_SOLO , 0, 0, "Unknown Core 45nm" },
{ 6, 7, -1, -1, 23, 1, -1, -1, CORE_DUO , 0, 0, "Unknown Core 45nm" },
{ 6, 7, -1, -1, 23, 2, 1024, -1, WOLFDALE , 0, 0, "Celeron Wolfdale 1M" },
{ 6, 7, -1, -1, 23, 2, 2048, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 2M" },
{ 6, 7, -1, -1, 23, 2, 3072, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 3M" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, WOLFDALE , 0, 0, "Wolfdale (Core 2 Duo) 6M" },
{ 6, 7, -1, -1, 23, 1, -1, -1, MOBILE_CORE_DUO , 0, 0, "Penryn (Core 2 Duo)" },
{ 6, 7, -1, -1, 23, 2, 1024, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo)" },
{ 6, 7, -1, -1, 23, 2, 3072, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 3M" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, PENRYN , 0, 0, "Penryn (Core 2 Duo) 6M" },
{ 6, 7, -1, -1, 23, 4, 2048, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 2M"},
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 3M"},
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC , 0, 0, "Yorkfield (Core 2 Quad) 6M"},
/* Core microarchitecture-based Xeons: */
{ 6, 14, -1, -1, 14, 1, -1, -1, NC, XEON_ , 0, "Xeon LV" },
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _5100, "Xeon (Woodcrest)" },
{ 6, 15, -1, -1, 15, 2, 2048, -1, NC, XEON_ , _3000, "Xeon (Conroe/2M)" },
{ 6, 15, -1, -1, 15, 2, 4096, -1, NC, XEON_ , _3000, "Xeon (Conroe/4M)" },
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , X3200, "Xeon (Kentsfield)" },
{ 6, 15, -1, -1, 15, 4, 4096, -1, NC, XEON_ , _5300, "Xeon (Clovertown)" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _3100, "Xeon (Wolfdale)" },
{ 6, 7, -1, -1, 23, 2, 6144, -1, NC, XEON_ , _5200, "Xeon (Wolfdale DP)" },
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , _5400, "Xeon (Harpertown)" },
{ 6, 7, -1, -1, 23, 4, 3072, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/3M)" },
{ 6, 7, -1, -1, 23, 4, 6144, -1, NC, XEON_ , X3300, "Xeon (Yorkfield/6M)" },
/* Nehalem CPUs (45nm): */
{ 6, 10, -1, -1, 26, 4, -1, -1, GAINESTOWN, XEON_ , 0, "Gainestown (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, 4096, GAINESTOWN, XEON_ , 0, "Gainestown 4M (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, 8192, GAINESTOWN, XEON_ , 0, "Gainestown 8M (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, XEON_|_7 , 0, "Bloomfield (Xeon)" },
{ 6, 10, -1, -1, 26, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Bloomfield (Core i7)" },
{ 6, 10, -1, -1, 30, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Lynnfield (Core i7)" },
{ 6, 5, -1, -1, 37, 4, -1, 8192, NC, CORE_|_I_|_5 , 0, "Lynnfield (Core i5)" },
/* Westmere CPUs (32nm): */
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, 0 , 0, "Unknown Core i3/i5" },
{ 6, 12, -1, -1, 44, -1, -1, -1, WESTMERE, XEON_ , 0, "Westmere (Xeon)" },
{ 6, 12, -1, -1, 44, -1, -1, 12288, WESTMERE, XEON_ , 0, "Gulftown (Xeon)" },
{ 6, 12, -1, -1, 44, 4, -1, 12288, NC, CORE_|_I_|_7 , 0, "Gulftown (Core i7)" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_5 , 0, "Clarkdale (Core i5)" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_3 , 0, "Clarkdale (Core i3)" },
{ 6, 5, -1, -1, 37, 2, -1, -1, NC, PENTIUM_ , 0, "Arrandale" },
{ 6, 5, -1, -1, 37, 2, -1, 4096, NC, CORE_|_I_|_7 , 0, "Arrandale (Core i7)" },
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_5 , 0, "Arrandale (Core i5)" },
{ 6, 5, -1, -1, 37, 2, -1, 3072, NC, CORE_|_I_|_3 , 0, "Arrandale (Core i3)" },
/* Sandy Bridge CPUs (32nm): */
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, 0 , 0, "Unknown Sandy Bridge" },
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge (Xeon)" },
{ 6, 10, -1, -1, 42, -1, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Sandy Bridge (Core i7)" },
{ 6, 10, -1, -1, 42, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Sandy Bridge (Core i5)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge (Core i3)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, PENTIUM_ , 0, "Sandy Bridge (Pentium)" },
{ 6, 10, -1, -1, 42, 1, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
{ 6, 10, -1, -1, 42, 2, -1, -1, NC, CELERON_ , 0, "Sandy Bridge (Celeron)" },
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, CORE_|_I_|_3 , 0, "Sandy Bridge-E" },
{ 6, 13, -1, -1, 45, -1, -1, -1, NC, XEON_ , 0, "Sandy Bridge-E (Xeon)" },
/* Ivy Bridge CPUs (22nm): */
{ 6, 10, -1, -1, 58, -1, -1, -1, NC, XEON_ , 0, "Ivy Bridge (Xeon)" },
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Ivy Bridge (Core i7)" },
{ 6, 10, -1, -1, 58, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Ivy Bridge (Core i5)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Ivy Bridge (Core i3)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, PENTIUM_ , 0, "Ivy Bridge (Pentium)" },
{ 6, 10, -1, -1, 58, 1, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
{ 6, 10, -1, -1, 58, 2, -1, -1, NC, CELERON_ , 0, "Ivy Bridge (Celeron)" },
{ 6, 14, -1, -1, 62, -1, -1, -1, NC, 0 , 0, "Ivy Bridge-E" },
/* Haswell CPUs (22nm): */
{ 6, 12, -1, -1, 60, -1, -1, -1, NC, XEON_ , 0, "Haswell (Xeon)" },
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 6, -1, -1, 70, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Haswell (Core i7)" },
{ 6, 12, -1, -1, 60, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 5, -1, -1, 69, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Haswell (Core i5)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
{ 6, 5, -1, -1, 69, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Haswell (Core i3)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, PENTIUM_ , 0, "Haswell (Pentium)" },
{ 6, 12, -1, -1, 60, 2, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
{ 6, 12, -1, -1, 60, 1, -1, -1, NC, CELERON_ , 0, "Haswell (Celeron)" },
{ 6, 15, -1, -1, 63, -1, -1, -1, NC, 0 , 0, "Haswell-E" },
/* Broadwell CPUs (14nm): */
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell (Core i7)" },
{ 6, 7, -1, -1, 71, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell (Core i5)" },
{ 6, 13, -1, -1, 61, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-U (Core i7)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-U (Core i5)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-U (Core i3)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, PENTIUM_ , 0, "Broadwell-U (Pentium)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NC, CELERON_ , 0, "Broadwell-U (Celeron)" },
{ 6, 13, -1, -1, 61, 2, -1, -1, NA, 0 , 0, "Broadwell-U (Core M)" },
{ 6, 15, -1, -1, 79, -1, -1, -1, NC, XEON_ , 0, "Broadwell-E (Xeon)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Broadwell-E (Core i3)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Broadwell-E (Core i5)" },
{ 6, 15, -1, -1, 79, 2, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
{ 6, 15, -1, -1, 79, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Broadwell-E (Core i7)" },
/* Skylake CPUs (14nm): */
{ 6, 14, -1, -1, 94, -1, -1, -1, NC, XEON_ , 0, "Skylake (Xeon)" },
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Skylake (Core i7)" },
{ 6, 14, -1, -1, 94, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Skylake (Core i5)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Skylake (Core i3)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, PENTIUM_ , 0, "Skylake (Pentium)" },
{ 6, 14, -1, -1, 94, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CELERON_ , 0, "Skylake (Celeron)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_7 , 0, "Skylake (Core m7)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_5 , 0, "Skylake (Core m5)" },
{ 6, 14, -1, -1, 78, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Skylake (Core m3)" },
/* Kaby Lake CPUs (14nm): */
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_7 , 0, "Kaby Lake (Core i7)" },
{ 6, 14, -1, -1, 158, 4, -1, -1, NC, CORE_|_I_|_5 , 0, "Kaby Lake (Core i5)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_I_|_3 , 0, "Kaby Lake (Core i3)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, PENTIUM_ , 0, "Kaby Lake (Pentium)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CELERON_ , 0, "Kaby Lake (Celeron)" },
{ 6, 14, -1, -1, 158, 2, -1, -1, NC, CORE_|_M_|_3 , 0, "Kaby Lake (Core m3)" },
/* Itaniums */
{ 7, -1, -1, -1, -1, 1, -1, -1, NC, 0 , 0, "Itanium" },
{ 15, -1, -1, 16, -1, 1, -1, -1, NC, 0 , 0, "Itanium 2" },
};
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx1[] = {
{ 18, CPU_FEATURE_PN },
{ 21, CPU_FEATURE_DTS },
{ 22, CPU_FEATURE_ACPI },
{ 27, CPU_FEATURE_SS },
{ 29, CPU_FEATURE_TM },
{ 30, CPU_FEATURE_IA64 },
{ 31, CPU_FEATURE_PBE },
};
const struct feature_map_t matchtable_ecx1[] = {
{ 2, CPU_FEATURE_DTS64 },
{ 4, CPU_FEATURE_DS_CPL },
{ 5, CPU_FEATURE_VMX },
{ 6, CPU_FEATURE_SMX },
{ 7, CPU_FEATURE_EST },
{ 8, CPU_FEATURE_TM2 },
{ 10, CPU_FEATURE_CID },
{ 14, CPU_FEATURE_XTPR },
{ 15, CPU_FEATURE_PDCM },
{ 18, CPU_FEATURE_DCA },
{ 21, CPU_FEATURE_X2APIC },
};
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_XD },
};
const struct feature_map_t matchtable_ebx7[] = {
{ 2, CPU_FEATURE_SGX },
{ 4, CPU_FEATURE_HLE },
{ 11, CPU_FEATURE_RTM },
{ 16, CPU_FEATURE_AVX512F },
{ 17, CPU_FEATURE_AVX512DQ },
{ 18, CPU_FEATURE_RDSEED },
{ 19, CPU_FEATURE_ADX },
{ 26, CPU_FEATURE_AVX512PF },
{ 27, CPU_FEATURE_AVX512ER },
{ 28, CPU_FEATURE_AVX512CD },
{ 29, CPU_FEATURE_SHA_NI },
{ 30, CPU_FEATURE_AVX512BW },
{ 31, CPU_FEATURE_AVX512VL },
};
if (raw->basic_cpuid[0][0] >= 1) {
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 1) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
}
// detect TSX/AVX512:
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
}
enum _cache_type_t {
L1I,
L1D,
L2,
L3,
L4
};
typedef enum _cache_type_t cache_type_t;
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
{
if (!on) return;
switch (cache) {
case L1I:
data->l1_instruction_cache = size;
break;
case L1D:
data->l1_data_cache = size;
data->l1_assoc = assoc;
data->l1_cacheline = linesize;
break;
case L2:
data->l2_cache = size;
data->l2_assoc = assoc;
data->l2_cacheline = linesize;
break;
case L3:
data->l3_cache = size;
data->l3_assoc = assoc;
data->l3_cacheline = linesize;
break;
case L4:
data->l4_cache = size;
data->l4_assoc = assoc;
data->l4_cacheline = linesize;
break;
default:
break;
}
}
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
uint8_t f[256] = {0};
int reg, off;
uint32_t x;
for (reg = 0; reg < 4; reg++) {
x = raw->basic_cpuid[2][reg];
if (x & 0x80000000) continue;
for (off = 0; off < 4; off++) {
f[x & 0xff] = 1;
x >>= 8;
}
}
check_case(f[0x06], L1I, 8, 4, 32, data);
check_case(f[0x08], L1I, 16, 4, 32, data);
check_case(f[0x0A], L1D, 8, 2, 32, data);
check_case(f[0x0C], L1D, 16, 4, 32, data);
check_case(f[0x22], L3, 512, 4, 64, data);
check_case(f[0x23], L3, 1024, 8, 64, data);
check_case(f[0x25], L3, 2048, 8, 64, data);
check_case(f[0x29], L3, 4096, 8, 64, data);
check_case(f[0x2C], L1D, 32, 8, 64, data);
check_case(f[0x30], L1I, 32, 8, 64, data);
check_case(f[0x39], L2, 128, 4, 64, data);
check_case(f[0x3A], L2, 192, 6, 64, data);
check_case(f[0x3B], L2, 128, 2, 64, data);
check_case(f[0x3C], L2, 256, 4, 64, data);
check_case(f[0x3D], L2, 384, 6, 64, data);
check_case(f[0x3E], L2, 512, 4, 64, data);
check_case(f[0x41], L2, 128, 4, 32, data);
check_case(f[0x42], L2, 256, 4, 32, data);
check_case(f[0x43], L2, 512, 4, 32, data);
check_case(f[0x44], L2, 1024, 4, 32, data);
check_case(f[0x45], L2, 2048, 4, 32, data);
check_case(f[0x46], L3, 4096, 4, 64, data);
check_case(f[0x47], L3, 8192, 8, 64, data);
check_case(f[0x4A], L3, 6144, 12, 64, data);
check_case(f[0x4B], L3, 8192, 16, 64, data);
check_case(f[0x4C], L3, 12288, 12, 64, data);
check_case(f[0x4D], L3, 16384, 16, 64, data);
check_case(f[0x4E], L2, 6144, 24, 64, data);
check_case(f[0x60], L1D, 16, 8, 64, data);
check_case(f[0x66], L1D, 8, 4, 64, data);
check_case(f[0x67], L1D, 16, 4, 64, data);
check_case(f[0x68], L1D, 32, 4, 64, data);
/* The following four entries are trace cache. Intel does not
* specify a cache-line size, so we use -1 instead
*/
check_case(f[0x70], L1I, 12, 8, -1, data);
check_case(f[0x71], L1I, 16, 8, -1, data);
check_case(f[0x72], L1I, 32, 8, -1, data);
check_case(f[0x73], L1I, 64, 8, -1, data);
check_case(f[0x78], L2, 1024, 4, 64, data);
check_case(f[0x79], L2, 128, 8, 64, data);
check_case(f[0x7A], L2, 256, 8, 64, data);
check_case(f[0x7B], L2, 512, 8, 64, data);
check_case(f[0x7C], L2, 1024, 8, 64, data);
check_case(f[0x7D], L2, 2048, 8, 64, data);
check_case(f[0x7F], L2, 512, 2, 64, data);
check_case(f[0x82], L2, 256, 8, 32, data);
check_case(f[0x83], L2, 512, 8, 32, data);
check_case(f[0x84], L2, 1024, 8, 32, data);
check_case(f[0x85], L2, 2048, 8, 32, data);
check_case(f[0x86], L2, 512, 4, 64, data);
check_case(f[0x87], L2, 1024, 8, 64, data);
if (f[0x49]) {
/* This flag is overloaded with two meanings. On Xeon MP
* (family 0xf, model 0x6) this means L3 cache. On all other
* CPUs (notably Conroe et al), this is L2 cache. In both cases
* it means 4MB, 16-way associative, 64-byte line size.
*/
if (data->family == 0xf && data->model == 0x6) {
data->l3_cache = 4096;
data->l3_assoc = 16;
data->l3_cacheline = 64;
} else {
data->l2_cache = 4096;
data->l2_assoc = 16;
data->l2_cacheline = 64;
}
}
if (f[0x40]) {
/* Again, a special flag. It means:
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
*/
if (data->l2_cache == -1) {
data->l2_cache = 0;
} else {
data->l3_cache = 0;
}
}
}
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int ecx;
int ways, partitions, linesize, sets, size, level, typenumber;
cache_type_t type;
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
if (typenumber == 0) break;
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
if (level == 1 && typenumber == 1)
type = L1D;
else if (level == 1 && typenumber == 2)
type = L1I;
else if (level == 2 && typenumber == 3)
type = L2;
else if (level == 3 && typenumber == 3)
type = L3;
else if (level == 4 && typenumber == 3)
type = L4;
else {
warnf("deterministic_cache: unknown level/typenumber combo (%d/%d), cannot\n", level, typenumber);
warnf("deterministic_cache: recognize cache type\n");
continue;
}
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
sets = raw->intel_fn4[ecx][2] + 1;
size = ways * partitions * linesize * sets / 1024;
check_case(1, type, size, ways, linesize, data);
}
}
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int i, level_type, num_smt = -1, num_core = -1;
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
switch (level_type) {
case 0x01:
num_smt = raw->intel_fn11[i][1] & 0xffff;
break;
case 0x02:
num_core = raw->intel_fn11[i][1] & 0xffff;
break;
default:
break;
}
}
if (num_smt == -1 || num_core == -1) return 0;
data->num_logical_cpus = num_core;
data->num_cores = num_core / num_smt;
// make sure num_cores is at least 1. In VMs, the CPUID instruction
// is rigged and may give nonsensical results, but we should at least
// avoid outputs like data->num_cores == 0.
if (data->num_cores <= 0) data->num_cores = 1;
return 1;
}
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 11) {
if (decode_intel_extended_topology(raw, data)) return;
}
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->basic_cpuid[0][0] >= 4) {
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
if (data->num_logical_cpus == 1)
data->flags[CPU_FEATURE_HT] = 0;
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
{
intel_code_t code = (intel_code_t) NC;
intel_code_and_bits_t result;
uint64_t bits = 0;
int i = 0;
const char* bs = data->brand_str;
const char* s;
const struct { intel_code_t c; const char *search; } matchtable[] = {
{ PENTIUM_M, "Pentium(R) M" },
{ CORE_SOLO, "Pentium(R) Dual CPU" },
{ CORE_SOLO, "Pentium(R) Dual-Core" },
{ PENTIUM_D, "Pentium(R) D" },
{ CORE_SOLO, "Genuine Intel(R) CPU" },
{ CORE_SOLO, "Intel(R) Core(TM)" },
{ DIAMONDVILLE, "CPU [N ][23]## " },
{ SILVERTHORNE, "CPU Z" },
{ PINEVIEW, "CPU [ND][45]## " },
{ CEDARVIEW, "CPU [ND]#### " },
};
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
{ XEON_, "Xeon" },
{ _MP, " MP" },
{ ATOM_, "Atom(TM) CPU" },
{ MOBILE_, "Mobile" },
{ CELERON_, "Celeron" },
{ PENTIUM_, "Pentium" },
};
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
if (match_pattern(bs, bit_matchtable[i].search))
bits |= bit_matchtable[i].bit;
}
if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
bits |= CORE_;
i--;
switch (bs[i + 9]) {
case 'i': bits |= _I_; break;
case 'm': bits |= _M_; break;
}
switch (bs[i + 10]) {
case '3': bits |= _3; break;
case '5': bits |= _5; break;
case '7': bits |= _7; break;
}
}
for (i = 0; i < COUNT_OF(matchtable); i++)
if (match_pattern(bs, matchtable[i].search)) {
code = matchtable[i].c;
break;
}
debugf(2, "intel matchtable result is %d\n", code);
if (bits & XEON_) {
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
bits |= _7;
else if (match_pattern(bs, "[ELXW]55##"))
code = GAINESTOWN;
else if (match_pattern(bs, "[ELXW]56##"))
code = WESTMERE;
else if (data->l3_cache > 0 && data->family == 16)
/* restrict by family, since later Xeons also have L3 ... */
code = IRWIN;
}
if (match_all(bits, XEON_ + _MP) && data->l3_cache > 0)
code = POTOMAC;
if (code == CORE_SOLO) {
s = strstr(bs, "CPU");
if (s) {
s += 3;
while (*s == ' ') s++;
if (*s == 'T')
bits |= MOBILE_;
}
}
if (code == CORE_SOLO) {
switch (data->num_cores) {
case 1: break;
case 2:
{
code = CORE_DUO;
if (data->num_logical_cpus > 2)
code = DUAL_CORE_HT;
break;
}
case 4:
{
code = QUAD_CORE;
if (data->num_logical_cpus > 4)
code = QUAD_CORE_HT;
break;
}
default:
code = MORE_THAN_QUADCORE; break;
}
}
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
if (data->ext_model < 23) {
code = MEROM;
} else {
code = PENRYN;
}
}
if (data->ext_model == 23 &&
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
code = WOLFDALE;
}
result.code = code;
result.bits = bits;
return result;
}
static intel_model_t get_model_code(struct cpu_id_t* data)
{
int i = 0;
int l = (int) strlen(data->brand_str);
const char *bs = data->brand_str;
int mod_flags = 0, model_no = 0, ndigs = 0;
/* If the CPU is a Core ix, then just return the model number generation: */
if ((i = match_pattern(bs, "Core(TM) i[357]")) != 0) {
i += 11;
if (i + 4 >= l) return UNKNOWN;
if (bs[i] == '2') return _2xxx;
if (bs[i] == '3') return _3xxx;
return UNKNOWN;
}
/* For Core2-based Xeons: */
while (i < l - 3) {
if (bs[i] == 'C' && bs[i+1] == 'P' && bs[i+2] == 'U')
break;
i++;
}
if (i >= l - 3) return UNKNOWN;
i += 3;
while (i < l - 4 && bs[i] == ' ') i++;
if (i >= l - 4) return UNKNOWN;
while (i < l - 4 && !isdigit(bs[i])) {
if (bs[i] >= 'A' && bs[i] <= 'Z')
mod_flags |= (1 << (bs[i] - 'A'));
i++;
}
if (i >= l - 4) return UNKNOWN;
while (isdigit(bs[i])) {
ndigs++;
model_no = model_no * 10 + (int) (bs[i] - '0');
i++;
}
if (ndigs != 4) return UNKNOWN;
#define HAVE(ch, flags) ((flags & (1 << ((int)(ch-'A')))) != 0)
switch (model_no / 100) {
case 30: return _3000;
case 31: return _3100;
case 32:
{
return (HAVE('X', mod_flags)) ? X3200 : _3200;
}
case 33:
{
return (HAVE('X', mod_flags)) ? X3300 : _3300;
}
case 51: return _5100;
case 52: return _5200;
case 53: return _5300;
case 54: return _5400;
default:
return UNKNOWN;
}
#undef HAVE
}
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
struct cpu_epc_t epc;
int i;
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
// decode sub-leaf 0:
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
data->sgx.present = 1;
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
// decode sub-leaf 1:
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
// decode higher-order subleafs, whenever present:
data->sgx.num_epc_sections = -1;
for (i = 0; i < 1000000; i++) {
epc = cpuid_get_epc(i, raw);
if (epc.length == 0) {
debugf(2, "SGX: epc section request for %d returned null, no more EPC sections.\n", i);
data->sgx.num_epc_sections = i;
break;
}
}
if (data->sgx.num_epc_sections == -1) {
debugf(1, "SGX: warning: seems to be infinitude of EPC sections.\n");
data->sgx.num_epc_sections = 1000000;
}
}
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
{
uint32_t regs[4];
struct cpu_epc_t retval = {0, 0};
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
// this was queried already, use the data:
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
} else {
// query this ourselves:
regs[0] = 0x12;
regs[2] = 2 + index;
regs[1] = regs[3] = 0;
cpu_exec_cpuid_ext(regs);
}
// decode values:
if ((regs[0] & 0xf) == 0x1) {
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
}
return retval;
}
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
intel_code_and_bits_t brand;
intel_model_t model_code;
int i;
char* brand_code_str = NULL;
load_intel_features(raw, data);
if (raw->basic_cpuid[0][0] >= 4) {
/* Deterministic way is preferred, being more generic */
decode_intel_deterministic_cache_info(raw, data);
} else if (raw->basic_cpuid[0][0] >= 2) {
decode_intel_oldstyle_cache_info(raw, data);
}
decode_intel_number_of_cores(raw, data);
brand = get_brand_code_and_bits(data);
model_code = get_model_code(data);
for (i = 0; i < COUNT_OF(intel_bcode_str); i++) {
if (brand.code == intel_bcode_str[i].code) {
brand_code_str = intel_bcode_str[i].str;
break;
}
}
if (brand_code_str)
debugf(2, "Detected Intel brand code: %d (%s)\n", brand.code, brand_code_str);
else
debugf(2, "Detected Intel brand code: %d\n", brand.code);
if (brand.bits) {
debugf(2, "Detected Intel bits: ");
debug_print_lbits(2, brand.bits);
}
debugf(2, "Detected Intel model code: %d\n", model_code);
internal->code.intel = brand.code;
internal->bits = brand.bits;
if (data->flags[CPU_FEATURE_SGX]) {
debugf(2, "SGX seems to be present, decoding...\n");
// if SGX is indicated by the CPU, verify its presence:
decode_intel_sgx_features(raw, data);
}
internal->score = match_cpu_codename(cpudb_intel, COUNT_OF(cpudb_intel), data,
brand.code, brand.bits, model_code);
return 0;
}
void cpuid_get_list_intel(struct cpu_list_t* list)
{
generic_get_cpu_list(cpudb_intel, COUNT_OF(cpudb_intel), list);
}

View file

@ -1,24 +0,0 @@
// Copyright (c) 2012-2013 The Cryptonote developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "hash-ops.h"
#include "c_keccak.h"
void hash_permutation(union hash_state *state) {
keccakf((uint64_t*)state, 24);
}
void hash_process(union hash_state *state, const uint8_t *buf, size_t count) {
keccak1600(buf, count, (uint8_t*)state);
}
void cn_fast_hash(const void *data, size_t length, char *hash) {
union hash_state state;
hash_process(&state, data, length);
memcpy(hash, &state, HASH_SIZE);
}

274
elist.h
View file

@ -1,274 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _LINUX_LIST_H
#define _LINUX_LIST_H
/*
* Simple doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
* sometimes we already know the next/prev entries and we can
* generate better code by using them directly rather than
* using the generic single-entry routines.
*/
struct list_head {
struct list_head *next, *prev;
};
#define LIST_HEAD_INIT(name) { &(name), &(name) }
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
#define INIT_LIST_HEAD(ptr) do { \
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
} while (0)
/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
/**
* list_add - add a new entry
* @new: new entry to be added
* @head: list head to add it after
*
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
/**
* list_add_tail - add a new entry
* @new: new entry to be added
* @head: list head to add it before
*
* Insert a new entry before the specified head.
* This is useful for implementing queues.
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}
/*
* Delete a list entry by making the prev/next entries
* point to each other.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_del(struct list_head *prev, struct list_head *next)
{
next->prev = prev;
prev->next = next;
}
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty on entry does not return true after this, the entry is in an undefined state.
*/
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = (void *) 0;
entry->prev = (void *) 0;
}
/**
* list_del_init - deletes entry from list and reinitialize it.
* @entry: the element to delete from the list.
*/
static inline void list_del_init(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
INIT_LIST_HEAD(entry);
}
/**
* list_move - delete from one list and add as another's head
* @list: the entry to move
* @head: the head that will precede our entry
*/
static inline void list_move(struct list_head *list, struct list_head *head)
{
__list_del(list->prev, list->next);
list_add(list, head);
}
/**
* list_move_tail - delete from one list and add as another's tail
* @list: the entry to move
* @head: the head that will follow our entry
*/
static inline void list_move_tail(struct list_head *list,
struct list_head *head)
{
__list_del(list->prev, list->next);
list_add_tail(list, head);
}
/**
* list_empty - tests whether a list is empty
* @head: the list to test.
*/
static inline int list_empty(struct list_head *head)
{
return head->next == head;
}
static inline void __list_splice(struct list_head *list,
struct list_head *head)
{
struct list_head *first = list->next;
struct list_head *last = list->prev;
struct list_head *at = head->next;
first->prev = head;
head->next = first;
last->next = at;
at->prev = last;
}
/**
* list_splice - join two lists
* @list: the new list to add.
* @head: the place to add it in the first list.
*/
static inline void list_splice(struct list_head *list, struct list_head *head)
{
if (!list_empty(list))
__list_splice(list, head);
}
/**
* list_splice_init - join two lists and reinitialise the emptied list.
* @list: the new list to add.
* @head: the place to add it in the first list.
*
* The list at @list is reinitialised
*/
static inline void list_splice_init(struct list_head *list,
struct list_head *head)
{
if (!list_empty(list)) {
__list_splice(list, head);
INIT_LIST_HEAD(list);
}
}
/**
* list_entry - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop counter.
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); \
pos = pos->next)
/**
* list_for_each_prev - iterate over a list backwards
* @pos: the &struct list_head to use as a loop counter.
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
for (pos = (head)->prev; pos != (head); \
pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
* @pos: the &struct list_head to use as a loop counter.
* @n: another &struct list_head to use as temporary storage
* @head: the head for your list.
*/
#define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter.
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
*/
#define list_for_each_entry(pos, head, member) \
for (pos = list_entry((head)->next, typeof(*pos), member); \
&pos->member != (head); \
pos = list_entry(pos->member.next, typeof(*pos), member))
/**
* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
* @pos: the type * to use as a loop counter.
* @n: another type * to use as temporary storage
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
*/
#define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_entry((head)->next, typeof(*pos), member), \
n = list_entry(pos->member.next, typeof(*pos), member); \
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
/**
* list_for_each_entry_continue - iterate over list of given type
* continuing after existing point
* @pos: the type * to use as a loop counter.
* @head: the head for your list.
* @member: the name of the list_struct within the struct.
*/
#define list_for_each_entry_continue(pos, head, member) \
for (pos = list_entry(pos->member.next, typeof(*pos), member), \
prefetch(pos->member.next); \
&pos->member != (head); \
pos = list_entry(pos->member.next, typeof(*pos), member), \
prefetch(pos->member.next))
#endif

View file

@ -1,76 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <mm_malloc.h>
#include <sys/mman.h>
#include <mach/vm_statistics.h>
#include "persistent_memory.h"
#include "options.h"
#include "utils/applog.h"
char *persistent_memory;
int persistent_memory_flags = 0;
const char * persistent_memory_allocate() {
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const int size = MEMORY * (opt_n_threads * ratio + 1);
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
persistent_memory = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
if (persistent_memory == MAP_FAILED) {
persistent_memory = _mm_malloc(size, 16);
return persistent_memory;
}
persistent_memory_flags |= MEMORY_HUGEPAGES_ENABLED;
if (madvise(persistent_memory, size, MADV_RANDOM | MADV_WILLNEED) != 0) {
applog(LOG_ERR, "madvise failed");
}
if (mlock(persistent_memory, size) == 0) {
persistent_memory_flags |= MEMORY_LOCK;
}
return persistent_memory;
}
void persistent_memory_free() {
const int size = MEMORY * (opt_n_threads + 1);
if (persistent_memory_flags & MEMORY_HUGEPAGES_ENABLED) {
if (persistent_memory_flags & MEMORY_LOCK) {
munlock(persistent_memory, size);
}
munmap(persistent_memory, size);
}
else {
_mm_free(persistent_memory);
}
}

View file

@ -1,91 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include "options.h"
#include "cpu.h"
#include "utils/applog.h"
static void signal_handler(int sig)
{
switch (sig) {
case SIGHUP:
applog(LOG_WARNING, "SIGHUP received");
break;
case SIGINT:
applog(LOG_WARNING, "SIGINT received, exiting");
proper_exit(0);
break;
case SIGTERM:
applog(LOG_WARNING, "SIGTERM received, exiting");
proper_exit(0);
break;
}
}
void proper_exit(int reason) {
exit(reason);
}
void os_specific_init()
{
if (opt_affinity != -1) {
affine_to_cpu_mask(-1, opt_affinity);
}
if (opt_background) {
int i = fork();
if (i < 0) {
exit(1);
}
if (i > 0) {
exit(0);
}
i = setsid();
if (i < 0) {
applog(LOG_ERR, "setsid() failed (errno = %d)", errno);
}
i = chdir("/");
if (i < 0) {
applog(LOG_ERR, "chdir() failed (errno = %d)", errno);
}
signal(SIGHUP, signal_handler);
signal(SIGTERM, signal_handler);
}
signal(SIGINT, signal_handler);
}

516
options.c
View file

@ -1,516 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <jansson.h>
#include <curl/curl.h>
#include <getopt.h>
#include "version.h"
#include "utils/applog.h"
#include "options.h"
#include "cpu.h"
#include "donate.h"
#include "algo/cryptonight/cryptonight.h"
int64_t opt_affinity = -1L;
int opt_n_threads = 0;
int opt_algo_variant = 0;
int opt_retries = 5;
int opt_retry_pause = 5;
int opt_donate_level = DONATE_LEVEL;
int opt_max_cpu_usage = 75;
bool opt_colors = true;
bool opt_keepalive = false;
bool opt_background = false;
bool opt_double_hash = false;
bool opt_safe = false;
bool opt_nicehash = false;
char *opt_url = NULL;
char *opt_backup_url = NULL;
char *opt_userpass = NULL;
char *opt_user = NULL;
char *opt_pass = NULL;
enum mining_algo opt_algo = ALGO_CRYPTONIGHT;
static char const usage[] = "\
Usage: " APP_ID " [OPTIONS]\n\
Options:\n\
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
-o, --url=URL URL of mining server\n\
-b, --backup-url=URL URL of backup mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\
-u, --user=USERNAME username for mining server\n\
-p, --pass=PASSWORD password for mining server\n\
-t, --threads=N number of miner threads\n\
-v, --av=N algorithm variation, 0 auto select\n\
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
--no-color disable colored output\n\
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\
-B, --background run the miner in the background\n\
-c, --config=FILE load a JSON-format configuration file\n\
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
--safe safe adjust threads and av settings for current CPU\n\
--nicehash enable nicehash support\n\
-h, --help display this help and exit\n\
-V, --version output version information and exit\n\
";
static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vb:";
static struct option const options[] = {
{ "algo", 1, NULL, 'a' },
{ "av", 1, NULL, 'v' },
{ "background", 0, NULL, 'B' },
{ "backup-url", 1, NULL, 'b' },
{ "config", 1, NULL, 'c' },
{ "cpu-affinity", 1, NULL, 1020 },
{ "donate-level", 1, NULL, 1003 },
{ "help", 0, NULL, 'h' },
{ "keepalive", 0, NULL ,'k' },
{ "max-cpu-usage", 1, NULL, 1004 },
{ "nicehash", 0, NULL, 1006 },
{ "no-color", 0, NULL, 1002 },
{ "pass", 1, NULL, 'p' },
{ "retries", 1, NULL, 'r' },
{ "retry-pause", 1, NULL, 'R' },
{ "safe", 0, NULL, 1005 },
{ "threads", 1, NULL, 't' },
{ "url", 1, NULL, 'o' },
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
{ 0, 0, 0, 0 }
};
static const char *algo_names[] = {
[ALGO_CRYPTONIGHT] = "cryptonight",
# ifndef XMRIG_NO_AEON
[ALGO_CRYPTONIGHT_LITE] = "cryptonight-lite"
# endif
};
#ifndef XMRIG_NO_AEON
static int get_cryptonight_lite_variant(int variant) {
if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) {
return (cpu_info.flags & CPU_FLAG_AES) ? AEON_AV2_AESNI_DOUBLE : AEON_AV4_SOFT_AES_DOUBLE;
}
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AEON_AV2_AESNI_DOUBLE) {
return variant + 2;
}
return variant;
}
#endif
static int get_algo_variant(int algo, int variant) {
# ifndef XMRIG_NO_AEON
if (algo == ALGO_CRYPTONIGHT_LITE) {
return get_cryptonight_lite_variant(variant);
}
# endif
if (variant <= XMR_AV0_AUTO || variant >= XMR_AV_MAX) {
return (cpu_info.flags & CPU_FLAG_AES) ? XMR_AV1_AESNI : XMR_AV3_SOFT_AES;
}
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= XMR_AV2_AESNI_DOUBLE) {
return variant + 2;
}
return variant;
}
static void parse_config(json_t *config, char *ref);
static char *parse_url(const char *arg);
static void parse_arg(int key, char *arg) {
char *p;
int v;
uint64_t ul;
switch (key)
{
case 'a':
for (int i = 0; i < ARRAY_SIZE(algo_names); i++) {
if (algo_names[i] && !strcmp(arg, algo_names[i])) {
opt_algo = i;
break;
}
# ifndef XMRIG_NO_AEON
if (i == ARRAY_SIZE(algo_names) - 1 && !strcmp(arg, "cryptonight-light")) {
opt_algo = i = ALGO_CRYPTONIGHT_LITE;
}
# endif
}
break;
case 'O': /* --userpass */
p = strchr(arg, ':');
if (!p) {
show_usage_and_exit(1);
}
free(opt_userpass);
opt_userpass = strdup(arg);
free(opt_user);
opt_user = calloc(p - arg + 1, 1);
strncpy(opt_user, arg, p - arg);
free(opt_pass);
opt_pass = strdup(p + 1);
break;
case 'o': /* --url */
p = parse_url(arg);
if (p) {
free(opt_url);
opt_url = p;
}
break;
case 'b': /* --backup-url */
p = parse_url(arg);
if (p) {
free(opt_backup_url);
opt_backup_url = p;
}
break;
case 'u': /* --user */
free(opt_user);
opt_user = strdup(arg);
break;
case 'p': /* --pass */
free(opt_pass);
opt_pass = strdup(arg);
break;
case 'r': /* --retries */
v = atoi(arg);
if (v < 1 || v > 1000) {
show_usage_and_exit(1);
}
opt_retries = v;
break;
case 'R': /* --retry-pause */
v = atoi(arg);
if (v < 1 || v > 3600) {
show_usage_and_exit(1);
}
opt_retry_pause = v;
break;
case 't': /* --threads */
v = atoi(arg);
if (v < 1 || v > 1024) {
show_usage_and_exit(1);
}
opt_n_threads = v;
break;
case 1004: /* --max-cpu-usage */
v = atoi(arg);
if (v < 1 || v > 100) {
show_usage_and_exit(1);
}
opt_max_cpu_usage = v;
break;
case 1005: /* --safe */
opt_safe = true;
break;
case 'k': /* --keepalive */
opt_keepalive = true;
break;
case 'V': /* --version */
show_version_and_exit();
break;
case 'h': /* --help */
show_usage_and_exit(0);
break;
case 'c': { /* --config */
json_error_t err;
json_t *config = json_load_file(arg, 0, &err);
if (!json_is_object(config)) {
if (err.line < 0) {
applog(LOG_ERR, "%s\n", err.text);
}
else {
applog(LOG_ERR, "%s:%d: %s\n", arg, err.line, err.text);
}
} else {
parse_config(config, arg);
json_decref(config);
}
break;
}
case 'B': /* --background */
opt_background = true;
opt_colors = false;
break;
case 'v': /* --av */
v = atoi(arg);
if (v < 0 || v > 1000) {
show_usage_and_exit(1);
}
opt_algo_variant = v;
break;
case 1020: /* --cpu-affinity */
p = strstr(arg, "0x");
ul = p ? strtoul(p, NULL, 16) : atol(arg);
if (ul > (1UL << cpu_info.total_logical_cpus) -1) {
ul = -1;
}
opt_affinity = ul;
break;
case 1002: /* --no-color */
opt_colors = false;
break;
case 1003: /* --donate-level */
v = atoi(arg);
if (v < 1 || v > 99) {
show_usage_and_exit(1);
}
opt_donate_level = v;
break;
case 1006: /* --nicehash */
opt_nicehash = true;
break;
default:
show_usage_and_exit(1);
}
}
static void parse_config(json_t *config, char *ref)
{
int i;
char buf[16];
json_t *val;
applog(LOG_ERR, ref);
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (!options[i].name) {
break;
}
val = json_object_get(config, options[i].name);
if (!val) {
continue;
}
if (options[i].has_arg && json_is_string(val)) {
char *s = strdup(json_string_value(val));
if (!s) {
break;
}
parse_arg(options[i].val, s);
free(s);
}
else if (options[i].has_arg && json_is_integer(val)) {
sprintf(buf, "%d", (int) json_integer_value(val));
parse_arg(options[i].val, buf);
}
else if (options[i].has_arg && json_is_real(val)) {
sprintf(buf, "%f", json_real_value(val));
parse_arg(options[i].val, buf);
}
else if (!options[i].has_arg) {
if (json_is_true(val)) {
parse_arg(options[i].val, "");
}
}
else {
applog(LOG_ERR, "JSON option %s invalid", options[i].name);
}
}
}
static char *parse_url(const char *arg)
{
char *p = strstr(arg, "://");
if (p) {
if (strncasecmp(arg, "stratum+tcp://", 14)) {
show_usage_and_exit(1);
}
return strdup(arg);
}
if (!strlen(arg) || *arg == '/') {
show_usage_and_exit(1);
}
char *dest = malloc(strlen(arg) + 16);
sprintf(dest, "stratum+tcp://%s", arg);
return dest;
}
/**
* Parse application command line via getopt.
*/
void parse_cmdline(int argc, char *argv[]) {
opt_user = strdup("x");
opt_pass = strdup("x");
int key;
while (1) {
key = getopt_long(argc, argv, short_options, options, NULL);
if (key < 0) {
break;
}
parse_arg(key, optarg);
}
if (optind < argc) {
fprintf(stderr, "%s: unsupported non-option argument '%s'\n", argv[0], argv[optind]);
show_usage_and_exit(1);
}
if (!opt_url) {
applog_notime(LOG_ERR, "No pool URL supplied. Exiting.\n", argv[0]);
proper_exit(1);
}
if (strstr(opt_url, ".nicehash.com:") != NULL) {
opt_nicehash = true;
}
if (!opt_userpass) {
opt_userpass = malloc(strlen(opt_user) + strlen(opt_pass) + 2);
if (!opt_userpass) {
proper_exit(1);
}
sprintf(opt_userpass, "%s:%s", opt_user, opt_pass);
}
opt_algo_variant = get_algo_variant(opt_algo, opt_algo_variant);
if (!cryptonight_init(opt_algo_variant)) {
applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
proper_exit(1);
}
if (!opt_n_threads) {
opt_n_threads = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
}
if (opt_safe) {
const int count = get_optimal_threads_count(opt_algo, opt_double_hash, opt_max_cpu_usage);
if (opt_n_threads > count) {
opt_n_threads = count;
}
}
}
void show_usage_and_exit(int status) {
if (status) {
fprintf(stderr, "Try \"" APP_ID "\" --help' for more information.\n");
}
else {
printf(usage);
}
proper_exit(status);
}
void show_version_and_exit(void) {
printf(APP_NAME " " APP_VERSION "\n built on " __DATE__
#ifdef __GNUC__
" with GCC");
printf(" %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
#endif
printf("\n features:"
#ifdef __i386__
" i386"
#endif
#ifdef __x86_64__
" x86_64"
#endif
#ifdef __AES__
" AES-NI"
#endif
"\n");
printf("\n%s\n", curl_version());
#ifdef JANSSON_VERSION
printf("libjansson/%s\n", JANSSON_VERSION);
#endif
proper_exit(0);
}
const char* get_current_algo_name(void) {
return algo_names[opt_algo];
}

View file

@ -1,91 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __OPTIONS_H__
#define __OPTIONS_H__
#include <stdbool.h>
#include <stdint.h>
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
enum mining_algo {
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
};
enum xmr_algo_variant {
XMR_AV0_AUTO,
XMR_AV1_AESNI,
XMR_AV2_AESNI_DOUBLE,
XMR_AV3_SOFT_AES,
XMR_AV4_SOFT_AES_DOUBLE,
XMR_AV_MAX
};
#ifndef XMRIG_NO_AEON
enum aeon_algo_variant {
AEON_AV0_AUTO,
AEON_AV1_AESNI,
AEON_AV2_AESNI_DOUBLE,
AEON_AV3_SOFT_AES,
AEON_AV4_SOFT_AES_DOUBLE,
AEON_AV_MAX
};
#endif
extern bool opt_colors;
extern bool opt_keepalive;
extern bool opt_background;
extern bool opt_double_hash;
extern bool opt_safe;
extern bool opt_nicehash;
extern char *opt_url;
extern char *opt_backup_url;
extern char *opt_userpass;
extern char *opt_user;
extern char *opt_pass;
extern int opt_n_threads;
extern int opt_algo_variant;
extern int opt_retry_pause;
extern int opt_retries;
extern int opt_donate_level;
extern int opt_max_cpu_usage;
extern int64_t opt_affinity;
extern enum mining_algo opt_algo;
void parse_cmdline(int argc, char *argv[]);
void show_usage_and_exit(int status);
void show_version_and_exit(void);
const char* get_current_algo_name(void);
extern void proper_exit(int reason);
#endif /* __OPTIONS_H__ */

View file

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View file

@ -1,5 +1,5 @@
#include <windows.h>
#include "../version.h"
#include "../src/version.h"
IDI_ICON1 ICON DISCARDABLE "app.ico"

33
src/3rdparty/align.h vendored Normal file
View file

@ -0,0 +1,33 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ALIGN_H__
#define __ALIGN_H__
#ifdef _MSC_VER
# define VAR_ALIGN(x, decl) __declspec(align(x)) decl
#else
# define VAR_ALIGN(x, decl) decl __attribute__ ((aligned(x)))
#endif
#endif /* __ALIGN_H__ */

653
src/3rdparty/getopt/getopt.h vendored Normal file
View file

@ -0,0 +1,653 @@
#ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file is part of the mingw-w64 runtime package.
*
* The mingw-w64 runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#pragma warning(disable:4996);
#define __GETOPT_H__
/* All the headers include this file. */
#include <crtdefs.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <windows.h>
#ifdef __cplusplus
extern "C" {
#endif
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
#ifdef REPLACE_GETOPT
int opterr = 1; /* if error message should be printed */
int optind = 1; /* index into parent argv vector */
int optopt = '?'; /* character checked for validity */
#undef optreset /* see getopt.h */
#define optreset __mingw_optreset
int optreset; /* reset getopt */
char *optarg; /* argument associated with option */
#endif
//extern int optind; /* index of first non-option in argv */
//extern int optopt; /* single option character, as parsed */
//extern int opterr; /* flag to enable built-in diagnostics... */
// /* (user may set to zero, to suppress) */
//
//extern char *optarg; /* pointer to argument of current option */
#define PRINT_ERROR ((opterr) && (*options != ':'))
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
/* return values */
#define BADCH (int)'?'
#define BADARG ((*options == ':') ? (int)':' : (int)'?')
#define INORDER (int)1
#ifndef __CYGWIN__
#define __progname __argv[0]
#else
extern char __declspec(dllimport) *__progname;
#endif
#ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char * const *, const char *,
const struct option *, int *, int);
static int parse_long_options(char * const *, const char *,
const struct option *, int *, int);
static int gcd(int, int);
static void permute_args(int, int, int, char * const *);
static char *place = EMSG; /* option letter processing */
/* XXX: set optreset to 1 rather than these two */
static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
static const char recargchar[] = "option requires an argument -- %c";
static const char recargstring[] = "option requires an argument -- %s";
static const char ambig[] = "ambiguous option -- %.*s";
static const char noarg[] = "option doesn't take an argument -- %.*s";
static const char illoptchar[] = "unknown option -- %c";
static const char illoptstring[] = "unknown option -- %s";
static void
_vwarnx(const char *fmt,va_list ap)
{
(void)fprintf(stderr,"%s: ",__progname);
if (fmt != NULL)
(void)vfprintf(stderr,fmt,ap);
(void)fprintf(stderr,"\n");
}
static void
warnx(const char *fmt,...)
{
va_list ap;
va_start(ap,fmt);
_vwarnx(fmt,ap);
va_end(ap);
}
/*
* Compute the greatest common divisor of a and b.
*/
static int
gcd(int a, int b)
{
int c;
c = a % b;
while (c != 0) {
a = b;
b = c;
c = a % b;
}
return (b);
}
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void
permute_args(int panonopt_start, int panonopt_end, int opt_end,
char * const *nargv)
{
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
char *swap;
/*
* compute lengths of blocks and number and size of cycles
*/
nnonopts = panonopt_end - panonopt_start;
nopts = opt_end - panonopt_end;
ncycle = gcd(nnonopts, nopts);
cyclelen = (opt_end - panonopt_start) / ncycle;
for (i = 0; i < ncycle; i++) {
cstart = panonopt_end+i;
pos = cstart;
for (j = 0; j < cyclelen; j++) {
if (pos >= panonopt_end)
pos -= nnonopts;
else
pos += nopts;
swap = nargv[pos];
/* LINTED const cast */
((char **) nargv)[pos] = nargv[cstart];
/* LINTED const cast */
((char **)nargv)[cstart] = swap;
}
}
}
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int
getopt(int nargc, char * const *nargv, const char *options)
{
/*
* We don't pass FLAG_PERMUTE to getopt_internal() since
* the BSD getopt(3) (unlike GNU) has never done this.
*
* Furthermore, since many privileged programs call getopt()
* before dropping privileges it makes sense to keep things
* as simple (and bug-free) as possible.
*/
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
}
#endif /* REPLACE_GETOPT */
//extern int getopt(int nargc, char * const *nargv, const char *options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
# define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C" {
#endif
struct option /* specification for a long form option... */
{
const char *name; /* option name, without leading hyphens */
int has_arg; /* does it take an argument? */
int *flag; /* where to save its status, or NULL */
int val; /* its associated status value */
};
enum /* permitted values for its `has_arg' field... */
{
no_argument = 0, /* option never takes an argument */
required_argument, /* option always requires an argument */
optional_argument /* option may take an argument */
};
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int
parse_long_options(char * const *nargv, const char *options,
const struct option *long_options, int *idx, int short_too)
{
char *current_argv, *has_equal;
size_t current_argv_len;
int i, ambiguous, match;
#define IDENTICAL_INTERPRETATION(_x, _y) \
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && \
long_options[(_x)].flag == long_options[(_y)].flag && \
long_options[(_x)].val == long_options[(_y)].val)
current_argv = place;
match = -1;
ambiguous = 0;
optind++;
if ((has_equal = strchr(current_argv, '=')) != NULL) {
/* argument found (--option=arg) */
current_argv_len = has_equal - current_argv;
has_equal++;
} else
current_argv_len = strlen(current_argv);
for (i = 0; long_options[i].name; i++) {
/* find matching long option */
if (strncmp(current_argv, long_options[i].name,
current_argv_len))
continue;
if (strlen(long_options[i].name) == current_argv_len) {
/* exact match */
match = i;
ambiguous = 0;
break;
}
/*
* If this is a known short option, don't allow
* a partial match of a single character.
*/
if (short_too && current_argv_len == 1)
continue;
if (match == -1) /* partial match */
match = i;
else if (!IDENTICAL_INTERPRETATION(i, match))
ambiguous = 1;
}
if (ambiguous) {
/* ambiguous abbreviation */
if (PRINT_ERROR)
warnx(ambig, (int)current_argv_len,
current_argv);
optopt = 0;
return (BADCH);
}
if (match != -1) { /* option found */
if (long_options[match].has_arg == no_argument
&& has_equal) {
if (PRINT_ERROR)
warnx(noarg, (int)current_argv_len,
current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
return (BADARG);
}
if (long_options[match].has_arg == required_argument ||
long_options[match].has_arg == optional_argument) {
if (has_equal)
optarg = has_equal;
else if (long_options[match].has_arg ==
required_argument) {
/*
* optional argument doesn't use next nargv
*/
optarg = nargv[optind++];
}
}
if ((long_options[match].has_arg == required_argument)
&& (optarg == NULL)) {
/*
* Missing argument; leading ':' indicates no error
* should be generated.
*/
if (PRINT_ERROR)
warnx(recargstring,
current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
--optind;
return (BADARG);
}
} else { /* unknown option */
if (short_too) {
--optind;
return (-1);
}
if (PRINT_ERROR)
warnx(illoptstring, current_argv);
optopt = 0;
return (BADCH);
}
if (idx)
*idx = match;
if (long_options[match].flag) {
*long_options[match].flag = long_options[match].val;
return (0);
} else
return (long_options[match].val);
#undef IDENTICAL_INTERPRETATION
}
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int
getopt_internal(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx, int flags)
{
char *oli; /* option letter list index */
int optchar, short_too;
static int posixly_correct = -1;
if (options == NULL)
return (-1);
/*
* XXX Some GNU programs (like cvs) set optind to 0 instead of
* XXX using optreset. Work around this braindamage.
*/
if (optind == 0)
optind = optreset = 1;
/*
* Disable GNU extensions if POSIXLY_CORRECT is set or options
* string begins with a '+'.
*
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
* optreset != 0 for GNU compatibility.
*/
if (posixly_correct == -1 || optreset != 0)
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
if (*options == '-')
flags |= FLAG_ALLARGS;
else if (posixly_correct || *options == '+')
flags &= ~FLAG_PERMUTE;
if (*options == '+' || *options == '-')
options++;
optarg = NULL;
if (optreset)
nonopt_start = nonopt_end = -1;
start:
if (optreset || !*place) { /* update scanning pointer */
optreset = 0;
if (optind >= nargc) { /* end of argument vector */
place = EMSG;
if (nonopt_end != -1) {
/* do permutation, if we have to */
permute_args(nonopt_start, nonopt_end,
optind, nargv);
optind -= nonopt_end - nonopt_start;
}
else if (nonopt_start != -1) {
/*
* If we skipped non-options, set optind
* to the first of them.
*/
optind = nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
if (*(place = nargv[optind]) != '-' ||
(place[1] == '\0' && strchr(options, '-') == NULL)) {
place = EMSG; /* found non-option */
if (flags & FLAG_ALLARGS) {
/*
* GNU extension:
* return non-option as argument to option 1
*/
optarg = nargv[optind++];
return (INORDER);
}
if (!(flags & FLAG_PERMUTE)) {
/*
* If no permutation wanted, stop parsing
* at first non-option.
*/
return (-1);
}
/* do permutation */
if (nonopt_start == -1)
nonopt_start = optind;
else if (nonopt_end != -1) {
permute_args(nonopt_start, nonopt_end,
optind, nargv);
nonopt_start = optind -
(nonopt_end - nonopt_start);
nonopt_end = -1;
}
optind++;
/* process next argument */
goto start;
}
if (nonopt_start != -1 && nonopt_end == -1)
nonopt_end = optind;
/*
* If we have "-" do nothing, if "--" we are done.
*/
if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
optind++;
place = EMSG;
/*
* We found an option (--), so if we skipped
* non-options, we have to permute.
*/
if (nonopt_end != -1) {
permute_args(nonopt_start, nonopt_end,
optind, nargv);
optind -= nonopt_end - nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
}
/*
* Check long options if:
* 1) we were passed some
* 2) the arg is not just "-"
* 3) either the arg starts with -- we are getopt_long_only()
*/
if (long_options != NULL && place != nargv[optind] &&
(*place == '-' || (flags & FLAG_LONGONLY))) {
short_too = 0;
if (*place == '-')
place++; /* --foo long option */
else if (*place != ':' && strchr(options, *place) != NULL)
short_too = 1; /* could be short option too */
optchar = parse_long_options(nargv, options, long_options,
idx, short_too);
if (optchar != -1) {
place = EMSG;
return (optchar);
}
}
if ((optchar = (int)*place++) == (int)':' ||
(optchar == (int)'-' && *place != '\0') ||
(oli = (char*)strchr(options, optchar)) == NULL) {
/*
* If the user specified "-" and '-' isn't listed in
* options, return -1 (non-option) as per POSIX.
* Otherwise, it is an unknown option character (or ':').
*/
if (optchar == (int)'-' && *place == '\0')
return (-1);
if (!*place)
++optind;
if (PRINT_ERROR)
warnx(illoptchar, optchar);
optopt = optchar;
return (BADCH);
}
if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
/* -W long-option */
if (*place) /* no space */
/* NOTHING */;
else if (++optind >= nargc) { /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
} else /* white space */
place = nargv[optind];
optchar = parse_long_options(nargv, options, long_options,
idx, 0);
place = EMSG;
return (optchar);
}
if (*++oli != ':') { /* doesn't take argument */
if (!*place)
++optind;
} else { /* takes (optional) argument */
optarg = NULL;
if (*place) /* no white space */
optarg = place;
else if (oli[1] != ':') { /* arg not optional */
if (++optind >= nargc) { /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
} else
optarg = nargv[optind];
}
place = EMSG;
++optind;
}
/* dump back option letter */
return (optchar);
}
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int
getopt_long(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx,
FLAG_PERMUTE));
}
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int
getopt_long_only(int nargc, char * const *nargv, const char *options,
const struct option *long_options, int *idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx,
FLAG_PERMUTE|FLAG_LONGONLY));
}
//extern int getopt_long(int nargc, char * const *nargv, const char *options,
// const struct option *long_options, int *idx);
//extern int getopt_long_only(int nargc, char * const *nargv, const char *options,
// const struct option *long_options, int *idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
# define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
}
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */

View file

@ -2,7 +2,9 @@
/* jansson_private_config.h.in. Generated from configure.ac by autoheader. */
/* Define to 1 if gcc's __atomic builtins are available */
#define HAVE_ATOMIC_BUILTINS 1
#ifndef _MSC_VER
# define HAVE_ATOMIC_BUILTINS 1
#endif
/* Define to 1 if you have the `close' function. */
#define HAVE_CLOSE 1
@ -20,7 +22,9 @@
#define HAVE_GETPID 1
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
#ifndef _MSC_VER
# define HAVE_GETTIMEOFDAY 1
#endif
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
@ -44,10 +48,14 @@
#define HAVE_READ 1
/* Define to 1 if you have the <sched.h> header file. */
#define HAVE_SCHED_H 1
#ifndef _MSC_VER
# define HAVE_SCHED_H 1
#endif
/* Define to 1 if you have the `sched_yield' function. */
#define HAVE_SCHED_YIELD 1
#ifndef _MSC_VER
# define HAVE_SCHED_YIELD 1
#endif
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
@ -68,19 +76,25 @@
#define HAVE_SYNC_BUILTINS 1
/* Define to 1 if you have the <sys/param.h> header file. */
#define HAVE_SYS_PARAM_H 1
#ifndef _MSC_VER
# define HAVE_SYS_PARAM_H 1
#endif
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
#ifndef _MSC_VER
# define HAVE_SYS_TIME_H 1
#endif
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
#ifndef _MSC_VER
# define HAVE_UNISTD_H 1
#endif
/* Define to 1 if the system has the type 'unsigned long long int'. */
#define HAVE_UNSIGNED_LONG_LONG_INT 1

View file

@ -3,17 +3,19 @@ project (cpuid C)
add_definitions(/DVERSION="0.4.0")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
set(HEADERS
libcpuid.h
libcpuid_types.h
libcpuid_constants.h
libcpuid_internal.h
amd_code_t.h
intel_code_t.h
recog_amd.h
recog_intel.h
asm-bits.h
libcpuid_util.h
amd_code_t.h
intel_code_t.h
recog_amd.h
recog_intel.h
asm-bits.h
libcpuid_util.h
)
set(SOURCES
@ -24,7 +26,13 @@ set(SOURCES
libcpuid_util.c
)
if (CMAKE_CL_64)
enable_language(ASM_MASM)
set(SOURCES_ASM masm-x64.asm)
endif()
add_library(cpuid STATIC
${HEADERS}
${SOURCES}
${SOURCES_ASM}
)

View file

@ -46,11 +46,6 @@ int set_error(cpu_error_t err)
return (int) err;
}
static void raw_data_t_constructor(struct cpu_raw_data_t* raw)
{
memset(raw, 0, sizeof(struct cpu_raw_data_t));
}
static void cpu_id_t_constructor(struct cpu_id_t* id)
{
memset(id, 0, sizeof(struct cpu_id_t));
@ -60,29 +55,6 @@ static void cpu_id_t_constructor(struct cpu_id_t* id)
id->sse_size = -1;
}
static int parse_token(const char* expected_token, const char *token,
const char *value, uint32_t array[][4], int limit, int *recognized)
{
char format[32];
int veax, vebx, vecx, vedx;
int index;
if (*recognized) return 1; /* already recognized */
if (strncmp(token, expected_token, strlen(expected_token))) return 1; /* not what we search for */
sprintf(format, "%s[%%d]", expected_token);
*recognized = 1;
if (1 == sscanf(token, format, &index) && index >=0 && index < limit) {
if (4 == sscanf(value, "%x%x%x%x", &veax, &vebx, &vecx, &vedx)) {
array[index][0] = veax;
array[index][1] = vebx;
array[index][2] = vecx;
array[index][3] = vedx;
return 1;
}
}
return 0;
}
/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */
#ifdef __APPLE__
#include <unistd.h>
@ -320,27 +292,6 @@ static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* dat
return set_error(ERR_OK);
}
static void make_list_from_string(const char* csv, struct cpu_list_t* list)
{
int i, n, l, last;
l = (int) strlen(csv);
n = 0;
for (i = 0; i < l; i++) if (csv[i] == ',') n++;
n++;
list->num_entries = n;
list->names = (char**) malloc(sizeof(char*) * n);
last = -1;
n = 0;
for (i = 0; i <= l; i++) if (i == l || csv[i] == ',') {
list->names[n] = (char*) malloc(i - last);
memcpy(list->names[n], &csv[last + 1], i - last - 1);
list->names[n][i - last - 1] = '\0';
n++;
last = i;
}
}
/* Interface: */
int cpuid_get_total_cpus(void)

View file

@ -610,39 +610,6 @@ void cpu_exec_cpuid_ext(uint32_t* regs);
*/
int cpuid_get_raw_data(struct cpu_raw_data_t* data);
/**
* @brief Writes the raw CPUID data to a text file
* @param data - a pointer to cpu_raw_data_t structure
* @param filename - the path of the file, where the serialized data should be
* written. If empty, stdout will be used.
* @note This is intended primarily for debugging. On some processor, which is
* not currently supported or not completely recognized by cpu_identify,
* one can still successfully get the raw data and write it to a file.
* libcpuid developers can later import this file and debug the detection
* code as if running on the actual hardware.
* The file is simple text format of "something=value" pairs. Version info
* is also written, but the format is not intended to be neither backward-
* nor forward compatible.
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int cpuid_serialize_raw_data(struct cpu_raw_data_t* data, const char* filename);
/**
* @brief Reads raw CPUID data from file
* @param data - a pointer to cpu_raw_data_t structure. The deserialized data will
* be written here.
* @param filename - the path of the file, containing the serialized raw data.
* If empty, stdin will be used.
* @note This function may fail, if the file is created by different version of
* the library. Also, see the notes on cpuid_serialize_raw_data.
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename);
/**
* @brief Identifies the CPU
* @param raw - Input - a pointer to the raw CPUID data, which is obtained
@ -668,222 +635,6 @@ int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename
*/
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data);
/**
* @brief Returns the short textual representation of a CPU flag
* @param feature - the feature, whose textual representation is wanted.
* @returns a constant string like "fpu", "tsc", "sse2", etc.
* @note the names of the returned flags are compatible with those from
* /proc/cpuinfo in Linux, with the exception of `tm_amd'
*/
const char* cpu_feature_str(cpu_feature_t feature);
/**
* @brief Returns textual description of the last error
*
* libcpuid stores an `errno'-style error status, whose description
* can be obtained with this function.
* @note This function is not thread-safe
* @see cpu_error_t
*/
const char* cpuid_error(void);
/**
* @brief Executes RDTSC
*
* The RDTSC (ReaD Time Stamp Counter) instruction gives access to an
* internal 64-bit counter, which usually increments at each clock cycle.
* This can be used for various timing routines, and as a very precise
* clock source. It is set to zero on system startup. Beware that may not
* increment at the same frequency as the CPU. Consecutive calls of RDTSC
* are, however, guaranteed to return monotonically-increasing values.
*
* @param result - a pointer to a 64-bit unsigned integer, where the TSC value
* will be stored
*
* @note If 100% compatibility is a concern, you must first check if the
* RDTSC instruction is present (if it is not, your program will crash
* with "invalid opcode" exception). Only some very old processors (i486,
* early AMD K5 and some Cyrix CPUs) lack that instruction - they should
* have become exceedingly rare these days. To verify RDTSC presence,
* run cpu_identify() and check flags[CPU_FEATURE_TSC].
*
* @note The monotonically increasing nature of the TSC may be violated
* on SMP systems, if their TSC clocks run at different rate. If the OS
* doesn't account for that, the TSC drift may become arbitrary large.
*/
void cpu_rdtsc(uint64_t* result);
/**
* @brief Store TSC and timing info
*
* This function stores the current TSC value and current
* time info from a precise OS-specific clock source in the cpu_mark_t
* structure. The sys_clock field contains time with microsecond resolution.
* The values can later be used to measure time intervals, number of clocks,
* FPU frequency, etc.
* @see cpu_rdtsc
*
* @param mark [out] - a pointer to a cpu_mark_t structure
*/
void cpu_tsc_mark(struct cpu_mark_t* mark);
/**
* @brief Calculate TSC and timing difference
*
* @param mark - input/output: a pointer to a cpu_mark_t sturcture, which has
* already been initialized by cpu_tsc_mark. The difference in
* TSC and time will be written here.
*
* This function calculates the TSC and time difference, by obtaining the
* current TSC and timing values and subtracting the contents of the `mark'
* structure from them. Results are written in the same structure.
*
* Example:
* @code
* ...
* struct cpu_mark_t mark;
* cpu_tsc_mark(&mark);
* foo();
* cpu_tsc_unmark(&mark);
* printf("Foo finished. Executed in %llu cycles and %llu usecs\n",
* mark.tsc, mark.sys_clock);
* ...
* @endcode
*/
void cpu_tsc_unmark(struct cpu_mark_t* mark);
/**
* @brief Calculates the CPU clock
*
* @param mark - pointer to a cpu_mark_t structure, which has been initialized
* with cpu_tsc_mark and later `stopped' with cpu_tsc_unmark.
*
* @note For reliable results, the marked time interval should be at least about
* 10 ms.
*
* @returns the CPU clock frequency, in MHz. Due to measurement error, it will
* differ from the true value in a few least-significant bits. Accuracy depends
* on the timing interval - the more, the better. If the timing interval is
* insufficient, the result is -1. Also, see the comment on cpu_clock_measure
* for additional issues and pitfalls in using RDTSC for CPU frequency
* measurements.
*/
int cpu_clock_by_mark(struct cpu_mark_t* mark);
/**
* @brief Returns the CPU clock, as reported by the OS
*
* This function uses OS-specific functions to obtain the CPU clock. It may
* differ from the true clock for several reasons:<br><br>
*
* i) The CPU might be in some power saving state, while the OS reports its
* full-power frequency, or vice-versa.<br>
* ii) In some cases you can raise or lower the CPU frequency with overclocking
* utilities and the OS will not notice.
*
* @returns the CPU clock frequency in MHz. If the OS is not (yet) supported
* or lacks the necessary reporting machinery, the return value is -1
*/
int cpu_clock_by_os(void);
/**
* @brief Measure the CPU clock frequency
*
* @param millis - How much time to waste in the busy-wait cycle. In millisecs.
* Useful values 10 - 1000
* @param quad_check - Do a more thorough measurement if nonzero
* (see the explanation).
*
* The function performs a busy-wait cycle for the given time and calculates
* the CPU frequency by the difference of the TSC values. The accuracy of the
* calculation depends on the length of the busy-wait cycle: more is better,
* but 100ms should be enough for most purposes.
*
* While this will calculate the CPU frequency correctly in most cases, there are
* several reasons why it might be incorrect:<br>
*
* i) RDTSC doesn't guarantee it will run at the same clock as the CPU.
* Apparently there aren't CPUs at the moment, but still, there's no
* guarantee.<br>
* ii) The CPU might be in a low-frequency power saving mode, and the CPU
* might be switched to higher frequency at any time. If this happens
* during the measurement, the result can be anywhere between the
* low and high frequencies. Also, if you're interested in the
* high frequency value only, this function might return the low one
* instead.<br>
* iii) On SMP systems exhibiting TSC drift (see \ref cpu_rdtsc)
*
* the quad_check option will run four consecutive measurements and
* then return the average of the two most-consistent results. The total
* runtime of the function will still be `millis' - consider using
* a bit more time for the timing interval.
*
* Finally, for benchmarking / CPU intensive applications, the best strategy is
* to use the cpu_tsc_mark() / cpu_tsc_unmark() / cpu_clock_by_mark() method.
* Begin by mark()-ing about one second after application startup (allowing the
* power-saving manager to kick in and rise the frequency during that time),
* then unmark() just before application finishing. The result will most
* acurately represent at what frequency your app was running.
*
* @returns the CPU clock frequency in MHz (within some measurement error
* margin). If RDTSC is not supported, the result is -1.
*/
int cpu_clock_measure(int millis, int quad_check);
/**
* @brief Measure the CPU clock frequency using instruction-counting
*
* @param millis - how much time to allocate for each run, in milliseconds
* @param runs - how many runs to perform
*
* The function performs a busy-wait cycle using a known number of "heavy" (SSE)
* instructions. These instructions run at (more or less guaranteed) 1 IPC rate,
* so by running a busy loop for a fixed amount of time, and measuring the
* amount of instructions done, the CPU clock is accurately measured.
*
* Of course, this function is still affected by the power-saving schemes, so
* the warnings as of cpu_clock_measure() still apply. However, this function is
* immune to problems with detection, related to the Intel Nehalem's "Turbo"
* mode, where the internal clock is raised, but the RDTSC rate is unaffected.
*
* The function will run for about (millis * runs) milliseconds.
* You can make only a single busy-wait run (runs == 1); however, this can
* be affected by task scheduling (which will break the counting), so allowing
* more than one run is recommended. As run length is not imperative for
* accurate readings (e.g., 50ms is sufficient), you can afford a lot of short
* runs, e.g. 10 runs of 50ms or 20 runs of 25ms.
*
* Recommended values - millis = 50, runs = 4. For more robustness,
* increase the number of runs.
*
* NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus
* the results are skewed. This is corrected internally by dividing the resulting
* value by 1.4.
* However, this only occurs if the thread is executed on a single CMT
* module - if there are other threads competing for resources, the results are
* unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from
* competing threads, or if there are such threads, they shouldn't exceed the
* number of modules. On a Bulldozer X8, that means 4 threads.
*
* @returns the CPU clock frequency in MHz (within some measurement error
* margin). If SSE is not supported, the result is -1. If the input parameters
* are incorrect, or some other internal fault is detected, the result is -2.
*/
int cpu_clock_by_ic(int millis, int runs);
/**
* @brief Get the CPU clock frequency (all-in-one method)
*
* This is an all-in-one method for getting the CPU clock frequency.
* It tries to use the OS for that. If the OS doesn't have this info, it
* uses cpu_clock_measure with 200ms time interval and quadruple checking.
*
* @returns the CPU clock frequency in MHz. If every possible method fails,
* the result is -1.
*/
int cpu_clock(void);
/**
* @brief The return value of cpuid_get_epc().
* @details
@ -916,230 +667,6 @@ struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw);
*/
const char* cpuid_lib_version(void);
typedef void (*libcpuid_warn_fn_t) (const char *msg);
/**
* @brief Sets the warning print function
*
* In some cases, the internal libcpuid machinery would like to emit useful
* debug warnings. By default, these warnings are written to stderr. However,
* you can set a custom function that will receive those warnings.
*
* @param warn_fun - the warning function you want to set. If NULL, warnings
* are disabled. The function takes const char* argument.
*
* @returns the current warning function. You can use the return value to
* keep the previous warning function and restore it at your discretion.
*/
libcpuid_warn_fn_t cpuid_set_warn_function(libcpuid_warn_fn_t warn_fun);
/**
* @brief Sets the verbosiness level
*
* When the verbosiness level is above zero, some functions might print
* diagnostic information about what are they doing. The higher the level is,
* the more detail is printed. Level zero is guaranteed to omit all such
* output. The output is written using the same machinery as the warnings,
* @see cpuid_set_warn_function()
*
* @param level the desired verbosiness level. Useful values 0..2 inclusive
*/
void cpuid_set_verbosiness_level(int level);
/**
* @brief Obtains the CPU vendor from CPUID from the current CPU
* @note The result is cached.
* @returns VENDOR_UNKNOWN if failed, otherwise the CPU vendor type.
* @see cpu_vendor_t
*/
cpu_vendor_t cpuid_get_vendor(void);
/**
* @brief a structure that holds a list of processor names
*/
struct cpu_list_t {
/** Number of entries in the list */
int num_entries;
/** Pointers to names. There will be num_entries of them */
char **names;
};
/**
* @brief Gets a list of all known CPU names from a specific vendor.
*
* This function compiles a list of all known CPU (code)names
* (i.e. the possible values of cpu_id_t::cpu_codename) for the given vendor.
*
* There are about 100 entries for Intel and AMD, and a few for the other
* vendors. The list is written out in approximate chronological introduction
* order of the parts.
*
* @param vendor the vendor to be queried
* @param list [out] the resulting list will be written here.
* NOTE: As the memory is dynamically allocated, be sure to call
* cpuid_free_cpu_list() after you're done with the data
* @see cpu_list_t
*/
void cpuid_get_cpu_list(cpu_vendor_t vendor, struct cpu_list_t* list);
/**
* @brief Frees a CPU list
*
* This function deletes all the memory associated with a CPU list, as obtained
* by cpuid_get_cpu_list()
*
* @param list - the list to be free()'d.
*/
void cpuid_free_cpu_list(struct cpu_list_t* list);
struct msr_driver_t;
/**
* @brief Starts/opens a driver, needed to read MSRs (Model Specific Registers)
*
* On systems that support it, this function will create a temporary
* system driver, that has privileges to execute the RDMSR instruction.
* After the driver is created, you can read MSRs by calling \ref cpu_rdmsr
*
* @returns a handle to the driver on success, and NULL on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
struct msr_driver_t* cpu_msr_driver_open(void);
/**
* @brief Similar to \ref cpu_msr_driver_open, but accept one parameter
*
* This function works on certain operating systems (GNU/Linux, FreeBSD)
*
* @param core_num specify the core number for MSR.
* The first core number is 0.
* The last core number is \ref cpuid_get_total_cpus - 1.
*
* @returns a handle to the driver on success, and NULL on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
struct msr_driver_t* cpu_msr_driver_open_core(unsigned core_num);
/**
* @brief Reads a Model-Specific Register (MSR)
*
* If the CPU has MSRs (as indicated by the CPU_FEATURE_MSR flag), you can
* read a MSR with the given index by calling this function.
*
* There are several prerequisites you must do before reading MSRs:
* 1) You must ensure the CPU has RDMSR. Check the CPU_FEATURE_MSR flag
* in cpu_id_t::flags
* 2) You must ensure that the CPU implements the specific MSR you intend to
* read.
* 3) You must open a MSR-reader driver. RDMSR is a privileged instruction and
* needs ring-0 access in order to work. This temporary driver is created
* by calling \ref cpu_msr_driver_open
*
* @param handle - a handle to the MSR reader driver, as created by
* cpu_msr_driver_open
* @param msr_index - the numeric ID of the MSR you want to read
* @param result - a pointer to a 64-bit integer, where the MSR value is stored
*
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int cpu_rdmsr(struct msr_driver_t* handle, uint32_t msr_index, uint64_t* result);
typedef enum {
INFO_MPERF, /*!< Maximum performance frequency clock. This
is a counter, which increments as a
proportion of the actual processor speed. */
INFO_APERF, /*!< Actual performance frequency clock. This
accumulates the core clock counts when the
core is active. */
INFO_MIN_MULTIPLIER, /*!< Minimum CPU:FSB ratio for this CPU,
multiplied by 100. */
INFO_CUR_MULTIPLIER, /*!< Current CPU:FSB ratio, multiplied by 100.
e.g., a CPU:FSB value of 18.5 reads as
"1850". */
INFO_MAX_MULTIPLIER, /*!< Maximum CPU:FSB ratio for this CPU,
multiplied by 100. */
INFO_TEMPERATURE, /*!< The current core temperature in Celsius. */
INFO_THROTTLING, /*!< 1 if the current logical processor is
throttling. 0 if it is running normally. */
INFO_VOLTAGE, /*!< The current core voltage in Volt,
multiplied by 100. */
INFO_BCLK, /*!< See \ref INFO_BUS_CLOCK. */
INFO_BUS_CLOCK, /*!< The main bus clock in MHz,
e.g., FSB/QPI/DMI/HT base clock,
multiplied by 100. */
} cpu_msrinfo_request_t;
/**
* @brief Similar to \ref cpu_rdmsr, but extract a range of bits
*
* @param handle - a handle to the MSR reader driver, as created by
* cpu_msr_driver_open
* @param msr_index - the numeric ID of the MSR you want to read
* @param highbit - the high bit in range, must be inferior to 64
* @param lowbit - the low bit in range, must be equal or superior to 0
* @param result - a pointer to a 64-bit integer, where the MSR value is stored
*
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int cpu_rdmsr_range(struct msr_driver_t* handle, uint32_t msr_index, uint8_t highbit,
uint8_t lowbit, uint64_t* result);
/**
* @brief Reads extended CPU information from Model-Specific Registers.
* @param handle - a handle to an open MSR driver, @see cpu_msr_driver_open
* @param which - which info field should be returned. A list of
* available information entities is listed in the
* cpu_msrinfo_request_t enum.
* @retval - if the requested information is available for the current
* processor model, the respective value is returned.
* if no information is available, or the CPU doesn't support
* the query, the special value CPU_INVALID_VALUE is returned
* @note This function is not MT-safe. If you intend to call it from multiple
* threads, guard it through a mutex or a similar primitive.
*/
int cpu_msrinfo(struct msr_driver_t* handle, cpu_msrinfo_request_t which);
#define CPU_INVALID_VALUE 0x3fffffff
/**
* @brief Writes the raw MSR data to a text file
* @param data - a pointer to msr_driver_t structure
* @param filename - the path of the file, where the serialized data should be
* written. If empty, stdout will be used.
* @note This is intended primarily for debugging. On some processor, which is
* not currently supported or not completely recognized by cpu_identify,
* one can still successfully get the raw data and write it to a file.
* libcpuid developers can later import this file and debug the detection
* code as if running on the actual hardware.
* The file is simple text format of "something=value" pairs. Version info
* is also written, but the format is not intended to be neither backward-
* nor forward compatible.
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int msr_serialize_raw_data(struct msr_driver_t* handle, const char* filename);
/**
* @brief Closes an open MSR driver
*
* This function unloads the MSR driver opened by cpu_msr_driver_open and
* frees any resources associated with it.
*
* @param handle - a handle to the MSR reader driver, as created by
* cpu_msr_driver_open
*
* @returns zero if successful, and some negative number on error.
* The error message can be obtained by calling \ref cpuid_error.
* @see cpu_error_t
*/
int cpu_msr_driver_close(struct msr_driver_t* handle);
#ifdef __cplusplus
}; /* extern "C" */
#endif

View file

@ -58,6 +58,48 @@ struct internal_id_info_t {
int score; // detection (matchtable) score
};
#define LBIT(x) (((long long) 1) << x)
enum _common_bits_t {
_M_ = LBIT( 0 ),
MOBILE_ = LBIT( 1 ),
_MP_ = LBIT( 2 ),
};
// additional detection bits for Intel CPUs:
enum _intel_bits_t {
PENTIUM_ = LBIT( 10 ),
CELERON_ = LBIT( 11 ),
CORE_ = LBIT( 12 ),
_I_ = LBIT( 13 ),
_3 = LBIT( 14 ),
_5 = LBIT( 15 ),
_7 = LBIT( 16 ),
XEON_ = LBIT( 17 ),
ATOM_ = LBIT( 18 ),
};
typedef enum _intel_bits_t intel_bits_t;
enum _amd_bits_t {
ATHLON_ = LBIT( 10 ),
_XP_ = LBIT( 11 ),
DURON_ = LBIT( 12 ),
SEMPRON_ = LBIT( 13 ),
OPTERON_ = LBIT( 14 ),
TURION_ = LBIT( 15 ),
_LV_ = LBIT( 16 ),
_64_ = LBIT( 17 ),
_X2 = LBIT( 18 ),
_X3 = LBIT( 19 ),
_X4 = LBIT( 20 ),
_X6 = LBIT( 21 ),
_FX = LBIT( 22 ),
_APU_ = LBIT( 23 ),
};
typedef enum _amd_bits_t amd_bits_t;
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data,
struct internal_id_info_t* internal);

93
src/3rdparty/libcpuid/libcpuid_util.c vendored Normal file
View file

@ -0,0 +1,93 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
{
int i;
for (i = 0; i < count; i++)
if (reg & (1u << matchtable[i].bit))
data->flags[matchtable[i].feature] = 1;
}
static int xmatch_entry(char c, const char* p)
{
int i, j;
if (c == 0) return -1;
if (c == p[0]) return 1;
if (p[0] == '.') return 1;
if (p[0] == '#' && isdigit(c)) return 1;
if (p[0] == '[') {
j = 1;
while (p[j] && p[j] != ']') j++;
if (!p[j]) return -1;
for (i = 1; i < j; i++)
if (p[i] == c) return j + 1;
}
return -1;
}
int match_pattern(const char* s, const char* p)
{
int i, j, dj, k, n, m;
n = (int) strlen(s);
m = (int) strlen(p);
for (i = 0; i < n; i++) {
if (xmatch_entry(s[i], p) != -1) {
j = 0;
k = 0;
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
k++;
j += dj;
}
if (j == m) return i + 1;
}
}
return 0;
}
struct cpu_id_t* get_cached_cpuid(void)
{
static int initialized = 0;
static struct cpu_id_t id;
if (initialized) return &id;
if (cpu_identify(NULL, &id))
memset(&id, 0, sizeof(id));
initialized = 1;
return &id;
}
int match_all(uint64_t bits, uint64_t mask)
{
return (bits & mask) == mask;
}

View file

@ -28,8 +28,6 @@
#define COUNT_OF(array) (sizeof(array) / sizeof(array[0]))
#define LBIT(x) (((long long) 1) << x)
struct feature_map_t {
unsigned bit;
cpu_feature_t feature;
@ -50,20 +48,6 @@ struct match_entry_t {
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
struct cpu_id_t* data, int brand_code, uint64_t bits,
int model_code);
void warnf(const char* format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 1, 2)))
#endif
;
void debugf(int verboselevel, const char* format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
;
void generic_get_cpu_list(const struct match_entry_t* matchtable, int count,
struct cpu_list_t* list);
/*
* Seek for a pattern in `haystack'.
* Pattern may be an fixed string, or contain the special metacharacters
@ -86,15 +70,9 @@ struct cpu_id_t* get_cached_cpuid(void);
/* returns true if all bits of mask are present in `bits'. */
int match_all(uint64_t bits, uint64_t mask);
/* print what bits a mask consists of */
void debug_print_lbits(int debuglevel, uint64_t mask);
/*
* Sets the current errno
*/
int set_error(cpu_error_t err);
extern libcpuid_warn_fn_t _warn_fun;
extern int _current_verboselevel;
#endif /* __LIBCPUID_UTIL_H__ */

359
src/3rdparty/libcpuid/masm-x64.asm vendored Normal file
View file

@ -0,0 +1,359 @@
.code
; procedure exec_cpuid
; Signature: void exec_cpiud(uint32_t *regs)
exec_cpuid Proc
push rbx
push rcx
push rdx
push rdi
mov rdi, rcx
mov eax, [rdi]
mov ebx, [rdi+4]
mov ecx, [rdi+8]
mov edx, [rdi+12]
cpuid
mov [rdi], eax
mov [rdi+4], ebx
mov [rdi+8], ecx
mov [rdi+12], edx
pop rdi
pop rdx
pop rcx
pop rbx
ret
exec_cpuid endp
; procedure cpu_rdtsc
; Signature: void cpu_rdtsc(uint64_t *result)
cpu_rdtsc Proc
push rdx
rdtsc
mov [rcx], eax
mov [rcx+4], edx
pop rdx
ret
cpu_rdtsc endp
; procedure busy_sse_loop
; Signature: void busy_sse_loop(int cycles)
busy_sse_loop Proc
; save xmm6 & xmm7 into the shadow area, as Visual C++ 2008
; expects that we don't touch them:
movups [rsp + 8], xmm6
movups [rsp + 24], xmm7
xorps xmm0, xmm0
xorps xmm1, xmm1
xorps xmm2, xmm2
xorps xmm3, xmm3
xorps xmm4, xmm4
xorps xmm5, xmm5
xorps xmm6, xmm6
xorps xmm7, xmm7
; --
align 16
bsLoop:
; 0:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 1:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 2:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 3:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 4:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 5:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 6:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 7:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 8:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 9:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 10:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 11:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 12:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 13:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 14:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 15:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 16:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 17:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 18:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 19:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 20:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 21:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 22:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 23:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 24:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 25:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 26:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 27:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 28:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 29:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 30:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 31:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; ----------------------
dec ecx
jnz bsLoop
; restore xmm6 & xmm7:
movups xmm6, [rsp + 8]
movups xmm7, [rsp + 24]
ret
busy_sse_loop endp
END

169
src/3rdparty/libcpuid/recog_amd.c vendored Normal file
View file

@ -0,0 +1,169 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_amd.h"
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "amd_code_t.h"
#undef CODE
};
struct amd_code_and_bits_t {
int code;
uint64_t bits;
};
enum _amd_model_codes_t {
// Only for Ryzen CPUs:
_1400,
_1500,
_1600,
};
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_NX },
{ 22, CPU_FEATURE_MMXEXT },
{ 25, CPU_FEATURE_FXSR_OPT },
{ 30, CPU_FEATURE_3DNOWEXT },
{ 31, CPU_FEATURE_3DNOW },
};
const struct feature_map_t matchtable_ecx81[] = {
{ 1, CPU_FEATURE_CMP_LEGACY },
{ 2, CPU_FEATURE_SVM },
{ 5, CPU_FEATURE_ABM },
{ 6, CPU_FEATURE_SSE4A },
{ 7, CPU_FEATURE_MISALIGNSSE },
{ 8, CPU_FEATURE_3DNOWPREFETCH },
{ 9, CPU_FEATURE_OSVW },
{ 10, CPU_FEATURE_IBS },
{ 11, CPU_FEATURE_XOP },
{ 12, CPU_FEATURE_SKINIT },
{ 13, CPU_FEATURE_WDT },
{ 16, CPU_FEATURE_FMA4 },
{ 21, CPU_FEATURE_TBM },
};
const struct feature_map_t matchtable_edx87[] = {
{ 0, CPU_FEATURE_TS },
{ 1, CPU_FEATURE_FID },
{ 2, CPU_FEATURE_VID },
{ 3, CPU_FEATURE_TTP },
{ 4, CPU_FEATURE_TM_AMD },
{ 5, CPU_FEATURE_STC },
{ 6, CPU_FEATURE_100MHZSTEPS },
{ 7, CPU_FEATURE_HWPSTATE },
/* id 8 is handled in common */
{ 9, CPU_FEATURE_CPB },
{ 10, CPU_FEATURE_APERFMPERF },
{ 11, CPU_FEATURE_PFI },
{ 12, CPU_FEATURE_PA },
};
if (raw->ext_cpuid[0][0] >= 0x80000001) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000007)
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
/* We have the extended info about SSE unit size */
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
}
}
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int l3_result;
const int assoc_table[16] = {
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
};
unsigned n = raw->ext_cpuid[0][0];
if (n >= 0x80000005) {
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
}
if (n >= 0x80000006) {
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
l3_result = (raw->ext_cpuid[6][3] >> 18);
if (l3_result > 0) {
l3_result = 512 * l3_result; /* AMD spec says it's a range,
but we take the lower bound */
data->l3_cache = l3_result;
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
} else {
data->l3_cache = 0;
}
}
}
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->ext_cpuid[0][0] >= 8) {
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
if (data->ext_family >= 23)
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
load_amd_features(raw, data);
decode_amd_cache_info(raw, data);
decode_amd_number_of_cores(raw, data);
return 0;
}

View file

@ -27,6 +27,5 @@
#define __RECOG_AMD_H__
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
void cpuid_get_list_amd(struct cpu_list_t* list);
#endif /* __RECOG_AMD_H__ */

542
src/3rdparty/libcpuid/recog_intel.c vendored Normal file
View file

@ -0,0 +1,542 @@
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <ctype.h>
#include "libcpuid.h"
#include "libcpuid_util.h"
#include "libcpuid_internal.h"
#include "recog_intel.h"
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
#define CODE(x) { x, #x }
#define CODE2(x, y) CODE(x)
#include "intel_code_t.h"
#undef CODE
};
typedef struct {
int code;
uint64_t bits;
} intel_code_and_bits_t;
enum _intel_model_t {
UNKNOWN = -1,
_3000 = 100,
_3100,
_3200,
X3200,
_3300,
X3300,
_5100,
_5200,
_5300,
_5400,
_2xxx, /* Core i[357] 2xxx */
_3xxx, /* Core i[357] 3xxx */
};
typedef enum _intel_model_t intel_model_t;
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
const struct feature_map_t matchtable_edx1[] = {
{ 18, CPU_FEATURE_PN },
{ 21, CPU_FEATURE_DTS },
{ 22, CPU_FEATURE_ACPI },
{ 27, CPU_FEATURE_SS },
{ 29, CPU_FEATURE_TM },
{ 30, CPU_FEATURE_IA64 },
{ 31, CPU_FEATURE_PBE },
};
const struct feature_map_t matchtable_ecx1[] = {
{ 2, CPU_FEATURE_DTS64 },
{ 4, CPU_FEATURE_DS_CPL },
{ 5, CPU_FEATURE_VMX },
{ 6, CPU_FEATURE_SMX },
{ 7, CPU_FEATURE_EST },
{ 8, CPU_FEATURE_TM2 },
{ 10, CPU_FEATURE_CID },
{ 14, CPU_FEATURE_XTPR },
{ 15, CPU_FEATURE_PDCM },
{ 18, CPU_FEATURE_DCA },
{ 21, CPU_FEATURE_X2APIC },
};
const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_XD },
};
const struct feature_map_t matchtable_ebx7[] = {
{ 2, CPU_FEATURE_SGX },
{ 4, CPU_FEATURE_HLE },
{ 11, CPU_FEATURE_RTM },
{ 16, CPU_FEATURE_AVX512F },
{ 17, CPU_FEATURE_AVX512DQ },
{ 18, CPU_FEATURE_RDSEED },
{ 19, CPU_FEATURE_ADX },
{ 26, CPU_FEATURE_AVX512PF },
{ 27, CPU_FEATURE_AVX512ER },
{ 28, CPU_FEATURE_AVX512CD },
{ 29, CPU_FEATURE_SHA_NI },
{ 30, CPU_FEATURE_AVX512BW },
{ 31, CPU_FEATURE_AVX512VL },
};
if (raw->basic_cpuid[0][0] >= 1) {
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
}
if (raw->ext_cpuid[0][0] >= 1) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
}
// detect TSX/AVX512:
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
}
enum _cache_type_t {
L1I,
L1D,
L2,
L3,
L4
};
typedef enum _cache_type_t cache_type_t;
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
{
if (!on) return;
switch (cache) {
case L1I:
data->l1_instruction_cache = size;
break;
case L1D:
data->l1_data_cache = size;
data->l1_assoc = assoc;
data->l1_cacheline = linesize;
break;
case L2:
data->l2_cache = size;
data->l2_assoc = assoc;
data->l2_cacheline = linesize;
break;
case L3:
data->l3_cache = size;
data->l3_assoc = assoc;
data->l3_cacheline = linesize;
break;
case L4:
data->l4_cache = size;
data->l4_assoc = assoc;
data->l4_cacheline = linesize;
break;
default:
break;
}
}
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
uint8_t f[256] = {0};
int reg, off;
uint32_t x;
for (reg = 0; reg < 4; reg++) {
x = raw->basic_cpuid[2][reg];
if (x & 0x80000000) continue;
for (off = 0; off < 4; off++) {
f[x & 0xff] = 1;
x >>= 8;
}
}
check_case(f[0x06], L1I, 8, 4, 32, data);
check_case(f[0x08], L1I, 16, 4, 32, data);
check_case(f[0x0A], L1D, 8, 2, 32, data);
check_case(f[0x0C], L1D, 16, 4, 32, data);
check_case(f[0x22], L3, 512, 4, 64, data);
check_case(f[0x23], L3, 1024, 8, 64, data);
check_case(f[0x25], L3, 2048, 8, 64, data);
check_case(f[0x29], L3, 4096, 8, 64, data);
check_case(f[0x2C], L1D, 32, 8, 64, data);
check_case(f[0x30], L1I, 32, 8, 64, data);
check_case(f[0x39], L2, 128, 4, 64, data);
check_case(f[0x3A], L2, 192, 6, 64, data);
check_case(f[0x3B], L2, 128, 2, 64, data);
check_case(f[0x3C], L2, 256, 4, 64, data);
check_case(f[0x3D], L2, 384, 6, 64, data);
check_case(f[0x3E], L2, 512, 4, 64, data);
check_case(f[0x41], L2, 128, 4, 32, data);
check_case(f[0x42], L2, 256, 4, 32, data);
check_case(f[0x43], L2, 512, 4, 32, data);
check_case(f[0x44], L2, 1024, 4, 32, data);
check_case(f[0x45], L2, 2048, 4, 32, data);
check_case(f[0x46], L3, 4096, 4, 64, data);
check_case(f[0x47], L3, 8192, 8, 64, data);
check_case(f[0x4A], L3, 6144, 12, 64, data);
check_case(f[0x4B], L3, 8192, 16, 64, data);
check_case(f[0x4C], L3, 12288, 12, 64, data);
check_case(f[0x4D], L3, 16384, 16, 64, data);
check_case(f[0x4E], L2, 6144, 24, 64, data);
check_case(f[0x60], L1D, 16, 8, 64, data);
check_case(f[0x66], L1D, 8, 4, 64, data);
check_case(f[0x67], L1D, 16, 4, 64, data);
check_case(f[0x68], L1D, 32, 4, 64, data);
/* The following four entries are trace cache. Intel does not
* specify a cache-line size, so we use -1 instead
*/
check_case(f[0x70], L1I, 12, 8, -1, data);
check_case(f[0x71], L1I, 16, 8, -1, data);
check_case(f[0x72], L1I, 32, 8, -1, data);
check_case(f[0x73], L1I, 64, 8, -1, data);
check_case(f[0x78], L2, 1024, 4, 64, data);
check_case(f[0x79], L2, 128, 8, 64, data);
check_case(f[0x7A], L2, 256, 8, 64, data);
check_case(f[0x7B], L2, 512, 8, 64, data);
check_case(f[0x7C], L2, 1024, 8, 64, data);
check_case(f[0x7D], L2, 2048, 8, 64, data);
check_case(f[0x7F], L2, 512, 2, 64, data);
check_case(f[0x82], L2, 256, 8, 32, data);
check_case(f[0x83], L2, 512, 8, 32, data);
check_case(f[0x84], L2, 1024, 8, 32, data);
check_case(f[0x85], L2, 2048, 8, 32, data);
check_case(f[0x86], L2, 512, 4, 64, data);
check_case(f[0x87], L2, 1024, 8, 64, data);
if (f[0x49]) {
/* This flag is overloaded with two meanings. On Xeon MP
* (family 0xf, model 0x6) this means L3 cache. On all other
* CPUs (notably Conroe et al), this is L2 cache. In both cases
* it means 4MB, 16-way associative, 64-byte line size.
*/
if (data->family == 0xf && data->model == 0x6) {
data->l3_cache = 4096;
data->l3_assoc = 16;
data->l3_cacheline = 64;
} else {
data->l2_cache = 4096;
data->l2_assoc = 16;
data->l2_cacheline = 64;
}
}
if (f[0x40]) {
/* Again, a special flag. It means:
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
*/
if (data->l2_cache == -1) {
data->l2_cache = 0;
} else {
data->l3_cache = 0;
}
}
}
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int ecx;
int ways, partitions, linesize, sets, size, level, typenumber;
cache_type_t type;
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
if (typenumber == 0) break;
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
if (level == 1 && typenumber == 1)
type = L1D;
else if (level == 1 && typenumber == 2)
type = L1I;
else if (level == 2 && typenumber == 3)
type = L2;
else if (level == 3 && typenumber == 3)
type = L3;
else if (level == 4 && typenumber == 3)
type = L4;
else {
continue;
}
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
sets = raw->intel_fn4[ecx][2] + 1;
size = ways * partitions * linesize * sets / 1024;
check_case(1, type, size, ways, linesize, data);
}
}
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int i, level_type, num_smt = -1, num_core = -1;
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
switch (level_type) {
case 0x01:
num_smt = raw->intel_fn11[i][1] & 0xffff;
break;
case 0x02:
num_core = raw->intel_fn11[i][1] & 0xffff;
break;
default:
break;
}
}
if (num_smt == -1 || num_core == -1) return 0;
data->num_logical_cpus = num_core;
data->num_cores = num_core / num_smt;
// make sure num_cores is at least 1. In VMs, the CPUID instruction
// is rigged and may give nonsensical results, but we should at least
// avoid outputs like data->num_cores == 0.
if (data->num_cores <= 0) data->num_cores = 1;
return 1;
}
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
struct cpu_id_t* data)
{
int logical_cpus = -1, num_cores = -1;
if (raw->basic_cpuid[0][0] >= 11) {
if (decode_intel_extended_topology(raw, data)) return;
}
if (raw->basic_cpuid[0][0] >= 1) {
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
if (raw->basic_cpuid[0][0] >= 4) {
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
}
}
if (data->flags[CPU_FEATURE_HT]) {
if (num_cores > 1) {
data->num_cores = num_cores;
data->num_logical_cpus = logical_cpus;
} else {
data->num_cores = 1;
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
if (data->num_logical_cpus == 1)
data->flags[CPU_FEATURE_HT] = 0;
}
} else {
data->num_cores = data->num_logical_cpus = 1;
}
}
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
{
intel_code_t code = (intel_code_t) NC;
intel_code_and_bits_t result;
uint64_t bits = 0;
int i = 0;
const char* bs = data->brand_str;
const char* s;
const struct { intel_code_t c; const char *search; } matchtable[] = {
{ PENTIUM_M, "Pentium(R) M" },
{ CORE_SOLO, "Pentium(R) Dual CPU" },
{ CORE_SOLO, "Pentium(R) Dual-Core" },
{ PENTIUM_D, "Pentium(R) D" },
{ CORE_SOLO, "Genuine Intel(R) CPU" },
{ CORE_SOLO, "Intel(R) Core(TM)" },
{ DIAMONDVILLE, "CPU [N ][23]## " },
{ SILVERTHORNE, "CPU Z" },
{ PINEVIEW, "CPU [ND][45]## " },
{ CEDARVIEW, "CPU [ND]#### " },
};
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
{ XEON_, "Xeon" },
{ _MP_, " MP" },
{ ATOM_, "Atom(TM) CPU" },
{ MOBILE_, "Mobile" },
{ CELERON_, "Celeron" },
{ PENTIUM_, "Pentium" },
};
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
if (match_pattern(bs, bit_matchtable[i].search))
bits |= bit_matchtable[i].bit;
}
if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
bits |= CORE_;
i--;
switch (bs[i + 9]) {
case 'i': bits |= _I_; break;
case 'm': bits |= _M_; break;
}
switch (bs[i + 10]) {
case '3': bits |= _3; break;
case '5': bits |= _5; break;
case '7': bits |= _7; break;
}
}
for (i = 0; i < COUNT_OF(matchtable); i++)
if (match_pattern(bs, matchtable[i].search)) {
code = matchtable[i].c;
break;
}
if (bits & XEON_) {
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
bits |= _7;
else if (match_pattern(bs, "[ELXW]55##"))
code = GAINESTOWN;
else if (match_pattern(bs, "[ELXW]56##"))
code = WESTMERE;
else if (data->l3_cache > 0 && data->family == 16)
/* restrict by family, since later Xeons also have L3 ... */
code = IRWIN;
}
if (match_all(bits, XEON_ + _MP_) && data->l3_cache > 0)
code = POTOMAC;
if (code == CORE_SOLO) {
s = strstr(bs, "CPU");
if (s) {
s += 3;
while (*s == ' ') s++;
if (*s == 'T')
bits |= MOBILE_;
}
}
if (code == CORE_SOLO) {
switch (data->num_cores) {
case 1: break;
case 2:
{
code = CORE_DUO;
if (data->num_logical_cpus > 2)
code = DUAL_CORE_HT;
break;
}
case 4:
{
code = QUAD_CORE;
if (data->num_logical_cpus > 4)
code = QUAD_CORE_HT;
break;
}
default:
code = MORE_THAN_QUADCORE; break;
}
}
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
if (data->ext_model < 23) {
code = MEROM;
} else {
code = PENRYN;
}
}
if (data->ext_model == 23 &&
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
code = WOLFDALE;
}
result.code = code;
result.bits = bits;
return result;
}
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
struct cpu_epc_t epc;
int i;
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
// decode sub-leaf 0:
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
data->sgx.present = 1;
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
// decode sub-leaf 1:
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
// decode higher-order subleafs, whenever present:
data->sgx.num_epc_sections = -1;
for (i = 0; i < 1000000; i++) {
epc = cpuid_get_epc(i, raw);
if (epc.length == 0) {
data->sgx.num_epc_sections = i;
break;
}
}
if (data->sgx.num_epc_sections == -1) {
data->sgx.num_epc_sections = 1000000;
}
}
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
{
uint32_t regs[4];
struct cpu_epc_t retval = {0, 0};
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
// this was queried already, use the data:
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
} else {
// query this ourselves:
regs[0] = 0x12;
regs[2] = 2 + index;
regs[1] = regs[3] = 0;
cpu_exec_cpuid_ext(regs);
}
// decode values:
if ((regs[0] & 0xf) == 0x1) {
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
}
return retval;
}
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
{
intel_code_and_bits_t brand;
load_intel_features(raw, data);
if (raw->basic_cpuid[0][0] >= 4) {
/* Deterministic way is preferred, being more generic */
decode_intel_deterministic_cache_info(raw, data);
} else if (raw->basic_cpuid[0][0] >= 2) {
decode_intel_oldstyle_cache_info(raw, data);
}
decode_intel_number_of_cores(raw, data);
brand = get_brand_code_and_bits(data);
internal->code.intel = brand.code;
internal->bits = brand.bits;
if (data->flags[CPU_FEATURE_SGX]) {
// if SGX is indicated by the CPU, verify its presence:
decode_intel_sgx_features(raw, data);
}
return 0;
}

View file

@ -27,6 +27,5 @@
#define __RECOG_INTEL_H__
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
void cpuid_get_list_intel(struct cpu_list_t* list);
#endif /*__RECOG_INTEL_H__*/

View file

@ -11,7 +11,7 @@
#include <stdio.h>
#include <io.h>
#include "compat/winansi.h"
#include "winansi.h"
/*
* Copyright 2008 Peter Harris <git@peter.is-a-geek.org>
*/
@ -344,8 +344,8 @@ int winansi_vfprintf(FILE *stream, const char *format, va_list list)
#endif
va_end(cp);
if (len > sizeof(small_buf) - 1) {
buf = malloc(len + 1);
if ((unsigned) len > sizeof(small_buf) - 1) {
buf = (char*)malloc(len + 1);
if (!buf)
goto abort;

View file

@ -1,12 +1,11 @@
/*
* ANSI emulation wrappers
*/
#ifdef WIN32
#include <windows.h>
#include <stddef.h>
#include <stdio.h>
#define isatty(fd) _isatty(fd)
#define fileno(fd) _fileno(fd)
#ifdef __cplusplus
@ -28,5 +27,3 @@ extern "C" {
#define printf winansi_printf
#define fprintf winansi_fprintf
#define vfprintf winansi_vfprintf
#endif

151
src/App.cpp Normal file
View file

@ -0,0 +1,151 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <uv.h>
#include "App.h"
#include "Cpu.h"
#include "crypto/CryptoNight.h"
#include "log/ConsoleLog.h"
#include "log/FileLog.h"
#include "log/Log.h"
#include "Mem.h"
#include "net/Network.h"
#include "Options.h"
#include "Summary.h"
#include "version.h"
#include "workers/Workers.h"
#ifdef HAVE_SYSLOG_H
# include "log/SysLog.h"
#endif
App *App::m_self = nullptr;
App::App(int argc, char **argv) :
m_network(nullptr),
m_options(nullptr)
{
m_self = this;
Cpu::init();
m_options = Options::parse(argc, argv);
Log::init();
if (!m_options->background()) {
Log::add(new ConsoleLog(m_options->colors()));
}
if (m_options->logFile()) {
Log::add(new FileLog(m_options->logFile()));
}
# ifdef HAVE_SYSLOG_H
if (m_options->syslog()) {
Log::add(new SysLog());
}
# endif
m_network = new Network(m_options);
uv_signal_init(uv_default_loop(), &m_signal);
}
App::~App()
{
}
int App::exec()
{
if (!m_options->isReady()) {
return 0;
}
uv_signal_start(&m_signal, App::onSignal, SIGHUP);
uv_signal_start(&m_signal, App::onSignal, SIGTERM);
uv_signal_start(&m_signal, App::onSignal, SIGINT);
background();
if (!CryptoNight::init(m_options->algo(), m_options->algoVariant())) {
LOG_ERR("\"%s\" hash self-test failed.", m_options->algoName());
return 1;
}
Mem::allocate(m_options->algo(), m_options->threads(), m_options->doubleHash());
Summary::print();
Workers::start(m_options->affinity());
m_network->connect();
const int r = uv_run(uv_default_loop(), UV_RUN_DEFAULT);
uv_loop_close(uv_default_loop());
free(m_network);
free(m_options);
Mem::release();
return r;
}
void App::close()
{
uv_stop(uv_default_loop());
}
void App::onSignal(uv_signal_t *handle, int signum)
{
switch (signum)
{
case SIGHUP:
LOG_WARN("SIGHUP received, exiting");
break;
case SIGTERM:
LOG_WARN("SIGTERM received, exiting");
break;
case SIGINT:
LOG_WARN("SIGINT received, exiting");
break;
default:
break;
}
m_self->close();
}

57
src/App.h Normal file
View file

@ -0,0 +1,57 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __APP_H__
#define __APP_H__
#include <uv.h>
class Network;
class Options;
class App
{
public:
App(int argc, char **argv);
~App();
int exec();
private:
void background();
void close();
static void onSignal(uv_signal_t *handle, int signum);
static App *m_self;
Network *m_network;
Options *m_options;
uv_signal_t m_signal;
};
#endif /* __APP_H__ */

66
src/App_unix.cpp Normal file
View file

@ -0,0 +1,66 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include "App.h"
#include "Cpu.h"
#include "log/Log.h"
#include "Options.h"
void App::background()
{
if (m_options->affinity() != -1L) {
Cpu::setAffinity(-1, m_options->affinity());
}
if (!m_options->background()) {
return;
}
int i = fork();
if (i < 0) {
exit(1);
}
if (i > 0) {
exit(0);
}
i = setsid();
if (i < 0) {
LOG_ERR("setsid() failed (errno = %d)", errno);
}
i = chdir("/");
if (i < 0) {
LOG_ERR("chdir() failed (errno = %d)", errno);
}
}

View file

@ -21,23 +21,32 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __UTIL_H__
#define __UTIL_H__
#include <jansson.h>
#include <winsock2.h>
#include <windows.h>
json_t *json_decode(const char *s);
char *bin2hex(const unsigned char *p, size_t len);
bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
struct thread_q *tq_new(void);
void tq_free(struct thread_q *tq);
bool tq_push(struct thread_q *tq, void *data);
void *tq_pop(struct thread_q *tq, const struct timespec *abstime);
void tq_freeze(struct thread_q *tq);
void tq_thaw(struct thread_q *tq);
#include "App.h"
#include "Options.h"
#include "Cpu.h"
#endif /* __UTIL_H__ */
void App::background()
{
if (m_options->affinity() != -1L) {
Cpu::setAffinity(-1, m_options->affinity());
}
if (!m_options->background()) {
return;
}
HWND hcon = GetConsoleWindow();
if (hcon) {
ShowWindow(hcon, SW_HIDE);
} else {
HANDLE h = GetStdHandle(STD_OUTPUT_HANDLE);
CloseHandle(h);
FreeConsole();
}
}

View file

@ -21,80 +21,85 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cpuid.h>
#include <string.h>
#include <stdbool.h>
#include <libcpuid.h>
#include <math.h>
#include <string.h>
#ifndef BUILD_TEST
# include <libcpuid.h>
#endif
#include "cpu.h"
#include "Cpu.h"
#ifndef BUILD_TEST
void cpu_init_common() {
char Cpu::m_brand[64] = { 0 };
int Cpu::m_flags = 0;
int Cpu::m_l2_cache = 0;
int Cpu::m_l3_cache = 0;
int Cpu::m_sockets = 1;
int Cpu::m_totalCores = 0;
int Cpu::m_totalThreads = 0;
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
{
if (m_totalThreads == 1) {
return 1;
}
int cache = m_l3_cache ? m_l3_cache : m_l2_cache;
int count = 0;
const int size = (algo ? 1024 : 2048) * (doubleHash ? 2 : 1);
if (cache) {
count = cache / size;
}
else {
count = m_totalThreads / 2;
}
if (count > m_totalThreads) {
count = m_totalThreads;
}
if (((float) count / m_totalThreads * 100) > maxCpuUsage) {
count = ceil((float) m_totalThreads * (maxCpuUsage / 100.0));
}
return count < 1 ? 1 : count;
}
void Cpu::initCommon()
{
struct cpu_raw_data_t raw = { 0 };
struct cpu_id_t data = { 0 };
cpuid_get_raw_data(&raw);
cpu_identify(&raw, &data);
strncpy(cpu_info.brand, data.brand_str, sizeof(cpu_info.brand) - 1);
strncpy(m_brand, data.brand_str, sizeof(m_brand) - 1);
cpu_info.total_logical_cpus = data.total_logical_cpus;
cpu_info.sockets = data.total_logical_cpus / data.num_logical_cpus;
cpu_info.total_cores = data.num_cores * cpu_info.sockets;
cpu_info.l3_cache = data.l3_cache > 0 ? data.l3_cache * cpu_info.sockets : 0;
m_totalThreads = data.total_logical_cpus;
m_sockets = m_totalThreads / data.num_logical_cpus;
m_totalCores = data.num_cores *m_sockets;
m_l3_cache = data.l3_cache > 0 ? data.l3_cache * m_sockets : 0;
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
if (data.vendor == VENDOR_AMD && data.l3_cache <= 0 && data.l2_assoc == 16 && data.ext_family >= 21) {
cpu_info.l2_cache = data.l2_cache * (cpu_info.total_cores / 2) * cpu_info.sockets;
m_l2_cache = data.l2_cache * (m_totalCores / 2) * m_sockets;
}
else {
cpu_info.l2_cache = data.l2_cache > 0 ? data.l2_cache * cpu_info.total_cores * cpu_info.sockets : 0;
m_l2_cache = data.l2_cache > 0 ? data.l2_cache * m_totalCores * m_sockets : 0;
}
# ifdef __x86_64__
cpu_info.flags |= CPU_FLAG_X86_64;
# if defined(__x86_64__) || defined(_M_AMD64)
m_flags |= X86_64;
# endif
if (data.flags[CPU_FEATURE_AES]) {
cpu_info.flags |= CPU_FLAG_AES;
m_flags |= AES;
}
if (data.flags[CPU_FEATURE_BMI2]) {
cpu_info.flags |= CPU_FLAG_BMI2;
m_flags |= BMI2;
}
}
#endif
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
if (cpu_info.total_logical_cpus == 1) {
return 1;
}
int cache = cpu_info.l3_cache ? cpu_info.l3_cache : cpu_info.l2_cache;
int count = 0;
const int size = (algo ? 1024 : 2048) * (double_hash ? 2 : 1);
if (cache) {
count = cache / size;
}
else {
count = cpu_info.total_logical_cpus / 2;
}
if (count > cpu_info.total_logical_cpus) {
count = cpu_info.total_logical_cpus;
}
if (((float) count / cpu_info.total_logical_cpus * 100) > max_cpu_usage) {
count = ceil((float) cpu_info.total_logical_cpus * (max_cpu_usage / 100.0));
}
return count < 1 ? 1 : count;
}

66
src/Cpu.h Normal file
View file

@ -0,0 +1,66 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CPU_H__
#define __CPU_H__
#include <stdint.h>
class Cpu
{
public:
enum Flags {
X86_64 = 1,
AES = 2,
BMI2 = 4
};
static int optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage);
static void init();
static void setAffinity(int id, uint64_t mask);
static inline bool hasAES() { return m_flags & AES; }
static inline bool isX64() { return m_flags & X86_64; }
static inline const char *brand() { return m_brand; }
static inline int cores() { return m_totalCores; }
static inline int l2() { return m_l2_cache; }
static inline int l3() { return m_l3_cache; }
static inline int sockets() { return m_sockets; }
static inline int threads() { return m_totalThreads; }
private:
static void initCommon();
static char m_brand[64];
static int m_flags;
static int m_l2_cache;
static int m_l3_cache;
static int m_sockets;
static int m_totalCores;
static int m_totalThreads;
};
#endif /* __CPU_H__ */

View file

@ -21,27 +21,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include "cpu.h"
#include <sched.h>
#include <unistd.h>
struct cpu_info cpu_info = { 0 };
void cpu_init_common();
#include "Cpu.h"
void cpu_init() {
void Cpu::init()
{
# ifdef XMRIG_NO_LIBCPUID
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
# endif
cpu_init_common();
initCommon();
}
int affine_to_cpu_mask(int id, unsigned long mask)
void Cpu::setAffinity(int id, unsigned long mask)
{
return 0;
}

View file

@ -21,10 +21,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cpuid.h>
#ifdef _MSC_VER
# include <intrin.h>
# define bit_AES (1 << 25)
# define bit_BMI2 (1 << 8)
#else
# include <cpuid.h>
#endif
#include <string.h>
#include <stdbool.h>
#include "cpu.h"
#include "Cpu.h"
#define VENDOR_ID (0)
@ -41,6 +51,11 @@
#define EDX_Reg (3)
#ifdef _MSC_VER
static inline void cpuid(int level, int output[4]) {
__cpuid(output, level);
}
#else
static inline void cpuid(int level, int output[4]) {
int a, b, c, d;
__cpuid_count(level, 0, a, b, c, d);
@ -50,9 +65,10 @@ static inline void cpuid(int level, int output[4]) {
output[2] = c;
output[3] = d;
}
#endif
static void cpu_brand_string(char* s) {
static inline void cpu_brand_string(char* s) {
int cpu_info[4] = { 0 };
cpuid(VENDOR_ID, cpu_info);
@ -66,7 +82,7 @@ static void cpu_brand_string(char* s) {
}
static bool has_aes_ni()
static inline bool has_aes_ni()
{
int cpu_info[4] = { 0 };
cpuid(PROCESSOR_INFO, cpu_info);
@ -75,7 +91,7 @@ static bool has_aes_ni()
}
static bool has_bmi2() {
static inline bool has_bmi2() {
int cpu_info[4] = { 0 };
cpuid(EXTENDED_FEATURES, cpu_info);
@ -83,25 +99,35 @@ static bool has_bmi2() {
}
void cpu_init_common() {
cpu_info.sockets = 1;
cpu_brand_string(cpu_info.brand);
char Cpu::m_brand[64] = { 0 };
int Cpu::m_flags = 0;
int Cpu::m_l2_cache = 0;
int Cpu::m_l3_cache = 0;
int Cpu::m_sockets = 1;
int Cpu::m_totalCores = 0;
int Cpu::m_totalThreads = 0;
# ifdef __x86_64__
cpu_info.flags |= CPU_FLAG_X86_64;
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
{
int count = m_totalThreads / 2;
return count < 1 ? 1 : count;
}
void Cpu::initCommon()
{
cpu_brand_string(m_brand);
# if defined(__x86_64__) || defined(_M_AMD64)
m_flags |= X86_64;
# endif
if (has_aes_ni()) {
cpu_info.flags |= CPU_FLAG_AES;
m_flags |= AES;
}
if (has_bmi2()) {
cpu_info.flags |= CPU_FLAG_BMI2;
m_flags |= BMI2;
}
}
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage) {
int count = cpu_info.total_logical_cpus / 2;
return count < 1 ? 1 : count;
}

View file

@ -21,32 +21,31 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include "cpu.h"
#include <sched.h>
#include <unistd.h>
struct cpu_info cpu_info = { 0 };
void cpu_init_common();
#include "Cpu.h"
void cpu_init() {
void Cpu::init()
{
# ifdef XMRIG_NO_LIBCPUID
cpu_info.total_logical_cpus = sysconf(_SC_NPROCESSORS_CONF);
m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
# endif
cpu_init_common();
initCommon();
}
int affine_to_cpu_mask(int id, unsigned long mask)
void Cpu::setAffinity(int id, uint64_t mask)
{
cpu_set_t set;
CPU_ZERO(&set);
for (unsigned i = 0; i < cpu_info.total_logical_cpus; i++) {
for (int i = 0; i < m_totalThreads; i++) {
if (mask & (1UL << i)) {
CPU_SET(i, &set);
}

View file

@ -21,29 +21,27 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <windows.h>
#include <stdbool.h>
#include "cpu.h"
struct cpu_info cpu_info = { 0 };
void cpu_init_common();
#include "Cpu.h"
void cpu_init() {
void Cpu::init()
{
# ifdef XMRIG_NO_LIBCPUID
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
cpu_info.total_logical_cpus = sysinfo.dwNumberOfProcessors;
m_totalThreads = sysinfo.dwNumberOfProcessors;
# endif
cpu_init_common();
initCommon();
}
int affine_to_cpu_mask(int id, unsigned long mask)
void Cpu::setAffinity(int id, uint64_t mask)
{
if (id == -1) {
SetProcessAffinityMask(GetCurrentProcess(), mask);

View file

@ -21,38 +21,45 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include "persistent_memory.h"
#include "algo/cryptonight/cryptonight.h"
#include "options.h"
static size_t offset = 0;
#include <memory.h>
#ifndef XMRIG_NO_AEON
static void * create_persistent_ctx_lite(int thr_id) {
struct cryptonight_ctx *ctx = NULL;
#include "crypto/CryptoNight.h"
#include "Mem.h"
#include "Options.h"
if (!opt_double_hash) {
const size_t offset = MEMORY * (thr_id + 1);
ctx = (struct cryptonight_ctx *) &persistent_memory[offset + MEMORY_LITE];
ctx->memory = (uint8_t*) &persistent_memory[offset];
return ctx;
bool Mem::m_doubleHash = false;
int Mem::m_algo = 0;
int Mem::m_flags = 0;
int Mem::m_threads = 0;
size_t Mem::m_offset = 0;
uint8_t *Mem::m_memory = nullptr;
cryptonight_ctx *Mem::create(int threadId)
{
# ifndef XMRIG_NO_AEON
if (m_algo == Options::ALGO_CRYPTONIGHT_LITE) {
return createLite(threadId);
}
# endif
ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id + 1)];
cryptonight_ctx *ctx = reinterpret_cast<cryptonight_ctx *>(&m_memory[MEMORY - sizeof(cryptonight_ctx) * (threadId + 1)]);
const int ratio = m_doubleHash ? 2 : 1;
ctx->memory = &m_memory[MEMORY * (threadId * ratio + 1)];
return ctx;
}
#endif
void * persistent_calloc(size_t num, size_t size) {
void *mem = &persistent_memory[offset];
offset += (num * size);
void *Mem::calloc(size_t num, size_t size)
{
void *mem = &m_memory[m_offset];
m_offset += (num * size);
memset(mem, 0, num * size);
@ -60,17 +67,21 @@ void * persistent_calloc(size_t num, size_t size) {
}
void * create_persistent_ctx(int thr_id) {
# ifndef XMRIG_NO_AEON
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
return create_persistent_ctx_lite(thr_id);
#ifndef XMRIG_NO_AEON
cryptonight_ctx *Mem::createLite(int threadId) {
cryptonight_ctx *ctx;
if (!m_doubleHash) {
const size_t offset = MEMORY * (threadId + 1);
ctx = reinterpret_cast<cryptonight_ctx *>(&m_memory[offset + MEMORY_LITE]);
ctx->memory = &m_memory[offset];
return ctx;
}
# endif
struct cryptonight_ctx *ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
const int ratio = opt_double_hash ? 2 : 1;
ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id * ratio + 1)];
ctx = reinterpret_cast<cryptonight_ctx *>(&m_memory[MEMORY - sizeof(cryptonight_ctx) * (threadId + 1)]);
ctx->memory = &m_memory[MEMORY * (threadId + 1)];
return ctx;
}
#endif

72
src/Mem.h Normal file
View file

@ -0,0 +1,72 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MEM_H__
#define __MEM_H__
#include <stddef.h>
#include <stdint.h>
#include "align.h"
struct cryptonight_ctx;
class Mem
{
public:
enum Flags {
HugepagesAvailable = 1,
HugepagesEnabled = 2,
Lock = 4
};
static bool allocate(int algo, int threads, bool doubleHash);
static cryptonight_ctx *create(int threadId);
static void *calloc(size_t num, size_t size);
static void release();
static inline bool isDoubleHash() { return m_doubleHash; }
static inline bool isHugepagesAvailable() { return m_flags & HugepagesAvailable; }
static inline bool isHugepagesEnabled() { return m_flags & HugepagesEnabled; }
static inline int flags() { return m_flags; }
static inline int threads() { return m_threads; }
private:
static bool m_doubleHash;
static int m_algo;
static int m_flags;
static int m_threads;
static size_t m_offset;
VAR_ALIGN(16, static uint8_t *m_memory);
# ifndef XMRIG_NO_AEON
static cryptonight_ctx *createLite(int threadId);
# endif
};
#endif /* __MEM_H__ */

86
src/Mem_unix.cpp Normal file
View file

@ -0,0 +1,86 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <mm_malloc.h>
#include <sys/mman.h>
#include "crypto/CryptoNight.h"
#include "log/Log.h"
#include "Mem.h"
#include "Options.h"
bool Mem::allocate(int algo, int threads, bool doubleHash)
{
m_algo = algo;
m_threads = threads;
m_doubleHash = doubleHash;
const int ratio = (doubleHash && algo != Options::ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const size_t size = MEMORY * (threads * ratio + 1);
m_flags |= HugepagesAvailable;
# if defined(__APPLE__)
m_memory = static_cast<uint8_t*>(mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0));
# else
m_memory = static_cast<uint8_t*>(mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0));
# endif
if (m_memory == MAP_FAILED) {
m_memory = static_cast<uint8_t*>(_mm_malloc(size, 16));
return true;
}
m_flags |= HugepagesEnabled;
if (madvise(m_memory, size, MADV_RANDOM | MADV_WILLNEED) != 0) {
LOG_ERR("madvise failed");
}
if (mlock(m_memory, size) == 0) {
m_flags |= Lock;
}
return true;
}
void Mem::release()
{
const int size = MEMORY * (m_threads + 1);
if (m_flags & HugepagesEnabled) {
if (m_flags & Lock) {
munlock(m_memory, size);
}
munmap(m_memory, size);
}
else {
_mm_free(m_memory);
}
}

View file

@ -21,20 +21,22 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MEMORY_H__
#define __MEMORY_H__
#include <winsock2.h>
#include <windows.h>
#include <ntsecapi.h>
#include <tchar.h>
#include "options.h"
#include "persistent_memory.h"
#include "utils/applog.h"
#ifdef __GNUC__
# include <mm_malloc.h>
#else
# include <malloc.h>
#endif
char *persistent_memory;
int persistent_memory_flags = 0;
#include "log/Log.h"
#include "crypto/CryptoNight.h"
#include "Mem.h"
#include "Options.h"
/*****************************************************************
@ -122,7 +124,7 @@ static BOOL ObtainLockPagesPrivilege() {
LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME));
if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) {
applog_notime(LOG_WARNING, "Huge pages support was successfully enabled, but reboot required to use it");
LOG_DEBUG("Huge pages support was successfully enabled, but reboot required to use it");
result = TRUE;
}
@ -143,33 +145,37 @@ static BOOL TrySetLockPagesPrivilege() {
}
const char * persistent_memory_allocate() {
const int ratio = (opt_double_hash && opt_algo != ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const int size = MEMORY * (opt_n_threads * ratio + 1);
bool Mem::allocate(int algo, int threads, bool doubleHash)
{
m_algo = algo;
m_threads = threads;
m_doubleHash = doubleHash;
const int ratio = (doubleHash && algo != Options::ALGO_CRYPTONIGHT_LITE) ? 2 : 1;
const size_t size = MEMORY * (threads * ratio + 1);
if (TrySetLockPagesPrivilege()) {
persistent_memory_flags |= MEMORY_HUGEPAGES_AVAILABLE;
m_flags |= HugepagesAvailable;
}
persistent_memory = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
if (!persistent_memory) {
persistent_memory = _mm_malloc(size, 16);
m_memory = static_cast<uint8_t*>(VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE));
if (!m_memory) {
m_memory = static_cast<uint8_t*>(_mm_malloc(size, 16));
}
else {
persistent_memory_flags |= MEMORY_HUGEPAGES_ENABLED;
m_flags |= HugepagesEnabled;
}
return persistent_memory;
return true;
}
void persistent_memory_free() {
if (persistent_memory_flags & MEMORY_HUGEPAGES_ENABLED) {
VirtualFree(persistent_memory, 0, MEM_RELEASE);
void Mem::release()
{
if (m_flags & HugepagesEnabled) {
VirtualFree(m_memory, 0, MEM_RELEASE);
}
else {
_mm_free(persistent_memory);
_mm_free(m_memory);
}
}
#endif /* __MEMORY_H__ */

492
src/Options.cpp Normal file
View file

@ -0,0 +1,492 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <jansson.h>
#include <string.h>
#include <uv.h>
#ifdef _MSC_VER
# include "getopt/getopt.h"
#else
# include <getopt.h>
#endif
#include "Cpu.h"
#include "donate.h"
#include "net/Url.h"
#include "Options.h"
#include "version.h"
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
Options *Options::m_self = nullptr;
static char const usage[] = "\
Usage: " APP_ID " [OPTIONS]\n\
Options:\n\
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
-o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\
-u, --user=USERNAME username for mining server\n\
-p, --pass=PASSWORD password for mining server\n\
-t, --threads=N number of miner threads\n\
-v, --av=N algorithm variation, 0 auto select\n\
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
--no-color disable colored output\n\
--donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\
-B, --background run the miner in the background\n\
-c, --config=FILE load a JSON-format configuration file\n\
-l, --log-file=FILE log all output to a file\n"
# ifdef HAVE_SYSLOG_H
"\
-S, --syslog use system log for output messages\n"
# endif
"\
--max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\
--safe safe adjust threads and av settings for current CPU\n\
--nicehash enable nicehash support\n\
--print-time=N print hashrate report every N seconds\n\
-h, --help display this help and exit\n\
-V, --version output version information and exit\n\
";
static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vl:S";
static struct option const options[] = {
{ "algo", 1, nullptr, 'a' },
{ "av", 1, nullptr, 'v' },
{ "background", 0, nullptr, 'B' },
{ "config", 1, nullptr, 'c' },
{ "cpu-affinity", 1, nullptr, 1020 },
{ "donate-level", 1, nullptr, 1003 },
{ "help", 0, nullptr, 'h' },
{ "keepalive", 0, nullptr ,'k' },
{ "log-file", 1, nullptr, 'l' },
{ "max-cpu-usage", 1, nullptr, 1004 },
{ "nicehash", 0, nullptr, 1006 },
{ "no-color", 0, nullptr, 1002 },
{ "pass", 1, nullptr, 'p' },
{ "print-time", 1, nullptr, 1007 },
{ "retries", 1, nullptr, 'r' },
{ "retry-pause", 1, nullptr, 'R' },
{ "safe", 0, nullptr, 1005 },
{ "syslog", 0, nullptr, 'S' },
{ "threads", 1, nullptr, 't' },
{ "url", 1, nullptr, 'o' },
{ "user", 1, nullptr, 'u' },
{ "userpass", 1, nullptr, 'O' },
{ "version", 0, nullptr, 'V' },
{ 0, 0, 0, 0 }
};
static const char *algo_names[] = {
"cryptonight",
# ifndef XMRIG_NO_AEON
"cryptonight-lite"
# endif
};
Options *Options::parse(int argc, char **argv)
{
if (!m_self) {
m_self = new Options(argc, argv);
}
return m_self;
}
const char *Options::algoName() const
{
return algo_names[m_algo];
}
Options::Options(int argc, char **argv) :
m_background(false),
m_colors(true),
m_doubleHash(false),
m_ready(false),
m_safe(false),
m_syslog(false),
m_logFile(nullptr),
m_algo(0),
m_algoVariant(0),
m_donateLevel(kDonateLevel),
m_maxCpuUsage(75),
m_printTime(60),
m_retries(5),
m_retryPause(5),
m_threads(0),
m_affinity(-1L)
{
m_pools.push_back(new Url());
int key;
while (1) {
key = getopt_long(argc, argv, short_options, options, NULL);
if (key < 0) {
break;
}
if (!parseArg(key, optarg)) {
return;
}
}
if (optind < argc) {
fprintf(stderr, "%s: unsupported non-option argument '%s'\n", argv[0], argv[optind]);
return;
}
if (!m_pools[0]->isValid()) {
fprintf(stderr, "No pool URL supplied. Exiting.");
return;
}
m_algoVariant = getAlgoVariant();
if (m_algoVariant == AV2_AESNI_DOUBLE || m_algoVariant == AV4_SOFT_AES_DOUBLE) {
m_doubleHash = true;
}
if (!m_threads) {
m_threads = Cpu::optimalThreadsCount(m_algo, m_doubleHash, m_maxCpuUsage);
}
else if (m_safe) {
const int count = Cpu::optimalThreadsCount(m_algo, m_doubleHash, m_maxCpuUsage);
if (m_threads > count) {
m_threads = count;
}
}
m_ready = true;
}
Options::~Options()
{
}
bool Options::parseArg(int key, char *arg)
{
char *p;
int v;
switch (key) {
case 'a': /* --algo */
if (!setAlgo(arg)) {
return false;
}
break;
case 'O': /* --userpass */
if (!m_pools.back()->setUserpass(arg)) {
return false;
}
break;
case 'o': /* --url */
if (m_pools.size() > 1 || m_pools[0]->isValid()) {
Url *url = new Url(arg);
if (url->isValid()) {
m_pools.push_back(url);
}
else {
delete url;
}
}
else {
m_pools[0]->parse(arg);
}
if (!m_pools.back()->isValid()) {
return false;
}
break;
case 'u': /* --user */
m_pools.back()->setUser(arg);
break;
case 'p': /* --pass */
m_pools.back()->setPassword(arg);
break;
case 'l': /* --log-file */
free(m_logFile);
m_logFile = strdup(arg);
m_colors = false;
break;
case 'r': /* --retries */
v = strtol(arg, nullptr, 10);
if (v < 1 || v > 1000) {
showUsage(1);
return false;
}
m_retries = v;
break;
case 'R': /* --retry-pause */
v = strtol(arg, nullptr, 10);
if (v < 1 || v > 3600) {
showUsage(1);
return false;
}
m_retryPause = v;
break;
case 't': /* --threads */
v = strtol(arg, nullptr, 10);
if (v < 1 || v > 1024) {
showUsage(1);
return false;
}
m_threads = v;
break;
case 1004: /* --max-cpu-usage */
v = strtol(arg, nullptr, 10);
if (v < 1 || v > 100) {
showUsage(1);
return false;
}
m_maxCpuUsage = v;
break;
case 1005: /* --safe */
m_safe = true;
break;
case 'k': /* --keepalive */
m_pools.back()->setKeepAlive(true);
break;
case 'V': /* --version */
showVersion();
return false;
case 'h': /* --help */
showUsage(0);
return false;
case 'B': /* --background */
m_background = true;
m_colors = false;
break;
case 'S': /* --syslog */
m_syslog = true;
m_colors = false;
break;
case 'v': /* --av */
v = strtol(arg, nullptr, 10);
if (v < 0 || v > 1000) {
showUsage(1);
return false;
}
m_algoVariant = v;
break;
case 1020: /* --cpu-affinity */
p = strstr(arg, "0x");
m_affinity = p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10);
break;
case 1002: /* --no-color */
m_colors = false;
break;
case 1003: /* --donate-level */
v = strtol(arg, nullptr, 10);
if (v < 1 || v > 99) {
showUsage(1);
return false;
}
m_donateLevel = v;
break;
case 1006: /* --nicehash */
m_pools.back()->setNicehash(true);
break;
case 1007: /* --print-time */
v = strtol(arg, nullptr, 10);
if (v < 0 || v > 1000) {
showUsage(1);
return false;
}
m_printTime = v;
break;
default:
showUsage(1);
return false;
}
return true;
}
Url *Options::parseUrl(const char *arg) const
{
auto url = new Url(arg);
if (!url->isValid()) {
delete url;
return nullptr;
}
return url;
}
void Options::showUsage(int status) const
{
if (status) {
fprintf(stderr, "Try \"" APP_ID "\" --help' for more information.\n");
}
else {
printf(usage);
}
}
void Options::showVersion()
{
printf(APP_NAME " " APP_VERSION "\n built on " __DATE__
# if defined(__clang__)
" with clang " __clang_version__);
# elif defined(__GNUC__)
" with GCC");
printf(" %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
# elif defined(_MSC_VER)
" with MSVC");
printf(" %d", MSVC_VERSION);
# else
);
# endif
printf("\n features:"
# if defined(__i386__) || defined(_M_IX86)
" i386"
# elif defined(__x86_64__) || defined(_M_AMD64)
" x86_64"
# endif
# if defined(__AES__) || defined(_MSC_VER)
" AES-NI"
# endif
"\n");
printf("\nlibuv/%s\n", uv_version_string());
printf("libjansson/%s\n", JANSSON_VERSION);
}
bool Options::setAlgo(const char *algo)
{
for (size_t i = 0; i < ARRAY_SIZE(algo_names); i++) {
if (algo_names[i] && !strcmp(algo, algo_names[i])) {
m_algo = i;
break;
}
# ifndef XMRIG_NO_AEON
if (i == ARRAY_SIZE(algo_names) - 1 && !strcmp(algo, "cryptonight-light")) {
m_algo = ALGO_CRYPTONIGHT_LITE;
break;
}
# endif
if (i == ARRAY_SIZE(algo_names) - 1) {
showUsage(1);
return false;
}
}
return true;
}
int Options::getAlgoVariant() const
{
# ifndef XMRIG_NO_AEON
if (m_algo == ALGO_CRYPTONIGHT_LITE) {
return getAlgoVariantLite();
}
# endif
if (m_algoVariant <= AV0_AUTO || m_algoVariant >= AV_MAX) {
return Cpu::hasAES() ? AV1_AESNI : AV3_SOFT_AES;
}
if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV2_AESNI_DOUBLE) {
return m_algoVariant + 2;
}
return m_algoVariant;
}
#ifndef XMRIG_NO_AEON
int Options::getAlgoVariantLite() const
{
if (m_algoVariant <= AV0_AUTO || m_algoVariant >= AV_MAX) {
return Cpu::hasAES() ? AV2_AESNI_DOUBLE : AV4_SOFT_AES_DOUBLE;
}
if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV2_AESNI_DOUBLE) {
return m_algoVariant + 2;
}
return m_algoVariant;
}
#endif

110
src/Options.h Normal file
View file

@ -0,0 +1,110 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __OPTIONS_H__
#define __OPTIONS_H__
#include <vector>
#include <stdint.h>
class Url;
class Options
{
public:
enum Algo {
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
};
enum AlgoVariant {
AV0_AUTO,
AV1_AESNI,
AV2_AESNI_DOUBLE,
AV3_SOFT_AES,
AV4_SOFT_AES_DOUBLE,
AV_MAX
};
static inline Options* i() { return m_self; }
static Options *parse(int argc, char **argv);
inline bool background() const { return m_background; }
inline bool colors() const { return m_colors; }
inline bool doubleHash() const { return m_doubleHash; }
inline bool isReady() const { return m_ready; }
inline bool syslog() const { return m_syslog; }
inline const char *logFile() const { return m_logFile; }
inline const std::vector<Url*> &pools() const { return m_pools; }
inline int algo() const { return m_algo; }
inline int algoVariant() const { return m_algoVariant; }
inline int donateLevel() const { return m_donateLevel; }
inline int printTime() const { return m_printTime; }
inline int retries() const { return m_retries; }
inline int retryPause() const { return m_retryPause; }
inline int threads() const { return m_threads; }
inline int64_t affinity() const { return m_affinity; }
const char *algoName() const;
private:
Options(int argc, char **argv);
~Options();
static Options *m_self;
bool parseArg(int key, char *arg);
Url *parseUrl(const char *arg) const;
void showUsage(int status) const;
void showVersion(void);
bool setAlgo(const char *algo);
int getAlgoVariant() const;
# ifndef XMRIG_NO_AEON
int getAlgoVariantLite() const;
# endif
bool m_background;
bool m_colors;
bool m_doubleHash;
bool m_ready;
bool m_safe;
bool m_syslog;
char *m_logFile;
int m_algo;
int m_algoVariant;
int m_donateLevel;
int m_maxCpuUsage;
int m_printTime;
int m_retries;
int m_retryPause;
int m_threads;
int64_t m_affinity;
std::vector<Url*> m_pools;
};
#endif /* __OPTIONS_H__ */

142
src/Summary.cpp Normal file
View file

@ -0,0 +1,142 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <uv.h>
#include "Cpu.h"
#include "log/Log.h"
#include "Mem.h"
#include "net/Url.h"
#include "Options.h"
#include "Summary.h"
#include "version.h"
static void print_versions()
{
char buf[16];
# if defined(__clang__)
snprintf(buf, 16, " clang/%d.%d.%d", __clang_major__, __clang_minor__, __clang_patchlevel__);
# elif defined(__GNUC__)
snprintf(buf, 16, " gcc/%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
# elif defined(_MSC_VER)
snprintf(buf, 16, " MSVC/%d", MSVC_VERSION);
# else
buf[0] = '\0';
# endif
if (Options::i()->colors()) {
Log::i()->text("\x1B[01;32m * \x1B[01;37mVERSIONS: \x1B[01;36mXMRig/%s\x1B[01;37m libuv/%s%s", APP_VERSION, uv_version_string(), buf);
} else {
Log::i()->text(" * VERSIONS: XMRig/%s libuv/%s%s", APP_VERSION, uv_version_string(), buf);
}
}
static void print_memory() {
if (Options::i()->colors()) {
Log::i()->text("\x1B[01;32m * \x1B[01;37mHUGE PAGES: %s, %s",
Mem::isHugepagesAvailable() ? "\x1B[01;32mavailable" : "\x1B[01;31munavailable",
Mem::isHugepagesEnabled() ? "\x1B[01;32menabled" : "\x1B[01;31mdisabled");
}
else {
Log::i()->text(" * HUGE PAGES: %s, %s", Mem::isHugepagesAvailable() ? "available" : "unavailable", Mem::isHugepagesEnabled() ? "enabled" : "disabled");
}
}
static void print_cpu()
{
if (Options::i()->colors()) {
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU: %s (%d) %sx64 %sAES-NI",
Cpu::brand(),
Cpu::sockets(),
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-");
# ifndef XMRIG_NO_LIBCPUID
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
# endif
}
else {
Log::i()->text(" * CPU: %s (%d) %sx64 %sAES-NI", Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-");
# ifndef XMRIG_NO_LIBCPUID
Log::i()->text(" * CPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
# endif
}
}
static void print_threads()
{
char buf[32];
if (Options::i()->affinity() != -1L) {
snprintf(buf, 32, ", affinity=0x%llX", Options::i()->affinity());
}
else {
buf[0] = '\0';
}
Log::i()->text(Options::i()->colors() ? "\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, av=%d, %sdonate=%d%%%s" : " * THREADS: %d, %s, av=%d, %sdonate=%d%%%s",
Options::i()->threads(),
Options::i()->algoName(),
Options::i()->algoVariant(),
Options::i()->colors() && Options::i()->donateLevel() == 0 ? "\x1B[01;31m" : "",
Options::i()->donateLevel(),
buf);
}
static void print_pools()
{
const std::vector<Url*> &pools = Options::i()->pools();
for (size_t i = 0; i < pools.size(); ++i) {
Log::i()->text(Options::i()->colors() ? "\x1B[01;32m * \x1B[01;37mPOOL #%d: \x1B[01;36m%s:%d" : " * POOL #%d: %s:%d",
i + 1,
pools[i]->host(),
pools[i]->port());
}
# ifdef APP_DEBUG
for (size_t i = 0; i < pools.size(); ++i) {
Log::i()->text("%s:%d, user: %s, pass: %s, ka: %d, nicehash: %d", pools[i]->host(), pools[i]->port(), pools[i]->user(), pools[i]->password(), pools[i]->isKeepAlive(), pools[i]->isNicehash());
}
# endif
}
void Summary::print()
{
print_versions();
print_memory();
print_cpu();
print_threads();
print_pools();
}

View file

@ -24,6 +24,12 @@
#ifndef __SUMMARY_H__
#define __SUMMARY_H__
void print_summary();
class Summary
{
public:
static void print();
};
#endif /* __SUMMARY_H__ */

144
src/crypto/CryptoNight.cpp Normal file
View file

@ -0,0 +1,144 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "crypto/CryptoNight.h"
#include "crypto/CryptoNight_p.h"
#include "crypto/CryptoNight_test.h"
#include "net/Job.h"
#include "net/JobResult.h"
#include "Options.h"
void (*cryptonight_hash_ctx)(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = nullptr;
static void cryptonight_av1_aesni(const void *input, size_t size, void *output, struct cryptonight_ctx *ctx) {
cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
}
static void cryptonight_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
}
static void cryptonight_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
}
static void cryptonight_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
}
#ifndef XMRIG_NO_AEON
static void cryptonight_lite_av1_aesni(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
}
static void cryptonight_lite_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
}
static void cryptonight_lite_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
}
static void cryptonight_lite_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
}
void (*cryptonight_variations[8])(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = {
cryptonight_av1_aesni,
cryptonight_av2_aesni_double,
cryptonight_av3_softaes,
cryptonight_av4_softaes_double,
cryptonight_lite_av1_aesni,
cryptonight_lite_av2_aesni_double,
cryptonight_lite_av3_softaes,
cryptonight_lite_av4_softaes_double
};
#else
void (*cryptonight_variations[4])(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = {
cryptonight_av1_aesni,
cryptonight_av2_aesni_double,
cryptonight_av3_softaes,
cryptonight_av4_softaes_double
};
#endif
bool CryptoNight::hash(const Job &job, JobResult &result, cryptonight_ctx *ctx)
{
cryptonight_hash_ctx(job.blob(), job.size(), result.result, ctx);
return *reinterpret_cast<uint64_t*>(result.result + 24) < job.target();
}
bool CryptoNight::init(int algo, int variant)
{
if (variant < 1 || variant > 4) {
return false;
}
# ifndef XMRIG_NO_AEON
const int index = algo == Options::ALGO_CRYPTONIGHT_LITE ? (variant + 3) : (variant - 1);
# else
const int index = variant - 1;
# endif
cryptonight_hash_ctx = cryptonight_variations[index];
return selfTest(algo);
}
void CryptoNight::hash(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx *ctx)
{
cryptonight_hash_ctx(input, size, output, ctx);
}
bool CryptoNight::selfTest(int algo) {
if (cryptonight_hash_ctx == nullptr) {
return false;
}
char output[64];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
cryptonight_hash_ctx(test_input, 76, output, ctx);
_mm_free(ctx->memory);
_mm_free(ctx);
return memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, (Options::i()->doubleHash() ? 64 : 32)) == 0;
}

View file

@ -24,24 +24,38 @@
#ifndef __CRYPTONIGHT_H__
#define __CRYPTONIGHT_H__
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include "align.h"
#define MEMORY 2097152 /* 2 MiB */
#define MEMORY_LITE 1048576 /* 1 MiB */
struct cryptonight_ctx {
uint8_t state0[200] __attribute__((aligned(16)));
uint8_t state1[200] __attribute__((aligned(16)));
uint8_t* memory __attribute__((aligned(16)));
VAR_ALIGN(16, uint8_t state0[200]);
VAR_ALIGN(16, uint8_t state1[200]);
VAR_ALIGN(16, uint8_t* memory);
};
extern void (* const extra_hashes[4])(const void *, size_t, char *);
class Job;
class JobResult;
bool cryptonight_init(int variant);
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
class CryptoNight
{
public:
static bool hash(const Job &job, JobResult &result, cryptonight_ctx *ctx);
static bool init(int algo, int variant);
static void hash(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx *ctx);
private:
static bool selfTest(int algo);
};
#endif /* __CRYPTONIGHT_H__ */

452
src/crypto/CryptoNight_p.h Normal file
View file

@ -0,0 +1,452 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_P_H__
#define __CRYPTONIGHT_P_H__
#ifdef __GNUC__
# include <x86intrin.h>
#else
# include <intrin.h>
# define __restrict__ __restrict
#endif
#include "crypto/CryptoNight.h"
extern "C"
{
#include "crypto/c_keccak.h"
#include "crypto/c_groestl.h"
#include "crypto/c_blake256.h"
#include "crypto/c_jh.h"
#include "crypto/c_skein.h"
__m128i soft_aesenc(__m128i in, __m128i key);
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
skein_hash(8 * 32, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
#if defined(__x86_64__) || defined(_M_AMD64)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
# ifdef __GNUC__
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
# else
#define __umul128 _umul128
# endif
#elif defined(__i386__) || defined(_M_IX86)
# define HI32(X) \
_mm_srli_si128((X), 4)
# define EXTRACT64(X) \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
uint64_t a = multiplier >> 32;
uint64_t b = multiplier & 0xFFFFFFFF;
uint64_t c = multiplicand >> 32;
uint64_t d = multiplicand & 0xFFFFFFFF;
//uint64_t ac = a * c;
uint64_t ad = a * d;
//uint64_t bc = b * c;
uint64_t bd = b * d;
uint64_t adbc = ad + (b * c);
uint64_t adbc_carry = adbc < ad ? 1 : 0;
// multiplier * multiplicand = product_hi * 2^64 + product_lo
uint64_t product_lo = bd + (adbc << 32);
uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
return product_lo;
}
#endif
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon>
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
{
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
{
__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft_aeskeygenassist(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
template<bool SOFT_AES>
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory +1 );
*k0 = xout0;
*k1 = xout2;
SOFT_AES ? soft_aes_genkey_sub(&xout0, &xout2, 0x01) : aes_genkey_sub<0x01>(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
SOFT_AES ? soft_aes_genkey_sub(&xout0, &xout2, 0x02) : aes_genkey_sub<0x02>(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
SOFT_AES ? soft_aes_genkey_sub(&xout0, &xout2, 0x04) : aes_genkey_sub<0x04>(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
SOFT_AES ? soft_aes_genkey_sub(&xout0, &xout2, 0x08) : aes_genkey_sub<0x08>(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
template<bool SOFT_AES>
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
{
if (SOFT_AES) {
*x0 = soft_aesenc(*x0, key);
*x1 = soft_aesenc(*x1, key);
*x2 = soft_aesenc(*x2, key);
*x3 = soft_aesenc(*x3, key);
*x4 = soft_aesenc(*x4, key);
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
else {
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
}
template<size_t MEM, bool SOFT_AES>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
{
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xin0 = _mm_load_si128(input + 4);
xin1 = _mm_load_si128(input + 5);
xin2 = _mm_load_si128(input + 6);
xin3 = _mm_load_si128(input + 7);
xin4 = _mm_load_si128(input + 8);
xin5 = _mm_load_si128(input + 9);
xin6 = _mm_load_si128(input + 10);
xin7 = _mm_load_si128(input + 11);
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
}
}
template<size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm_load_si128(output + 4);
xout1 = _mm_load_si128(output + 5);
xout2 = _mm_load_si128(output + 6);
xout3 = _mm_load_si128(output + 7);
xout4 = _mm_load_si128(output + 8);
xout5 = _mm_load_si128(output + 9);
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
{
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
_mm_store_si128(output + 4, xout0);
_mm_store_si128(output + 5, xout1);
_mm_store_si128(output + 6, xout2);
_mm_store_si128(output + 7, xout3);
_mm_store_si128(output + 8, xout4);
_mm_store_si128(output + 9, xout5);
_mm_store_si128(output + 10, xout6);
_mm_store_si128(output + 11, xout7);
}
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
{
keccak(static_cast<const uint8_t*>(input), size, ctx->state0, 200);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
}
else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
}
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
{
keccak((const uint8_t *) input, size, ctx->state0, 200);
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
}
else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
bx0 = cx0;
bx1 = cx1;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
}
#endif /* __CRYPTONIGHT_P_H__ */

View file

@ -0,0 +1,60 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CRYPTONIGHT_TEST_H__
#define __CRYPTONIGHT_TEST_H__
const static uint8_t test_input[152] = {
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
};
const static uint8_t test_output0[64] = {
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
};
#ifndef XMRIG_NO_AEON
const static uint8_t test_output1[64] = {
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
};
#endif
#endif /* __CRYPTONIGHT_TEST_H__ */

Some files were not shown because too many files have changed in this diff Show more