Added JIT compiler for RandomX on ARMv8

2025-04-22 14:38:10 +00:00 · 2019-09-21 10:10:52 +02:00 · 2019-09-21 10:10:52 +02:00 · 38f4f4f695
commit 38f4f4f695
parent a4bc548fe5
12 changed files with 1918 additions and 59 deletions
--- a/cmake/randomx.cmake
+++ b/cmake/randomx.cmake
@ -51,6 +51,13 @@ if (WITH_RANDOMX)
            )
        # cheat because cmake and ccache hate each other
        set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
+    elseif (XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+        list(APPEND SOURCES_CRYPTO
+             src/crypto/randomx/jit_compiler_a64_static.S
+             src/crypto/randomx/jit_compiler_a64.cpp
+            )
+        # cheat because cmake and ccache hate each other
+        set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
    endif()

    if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
--- a/src/crypto/randomx/common.hpp
+++ b/src/crypto/randomx/common.hpp
@ -108,7 +108,7 @@ namespace randomx {
 	class JitCompilerX86;
 	using JitCompiler = JitCompilerX86;
 #elif defined(__aarch64__)
-	#define RANDOMX_HAVE_COMPILER 0
+	#define RANDOMX_HAVE_COMPILER 1
 	class JitCompilerA64;
 	using JitCompiler = JitCompilerA64;
 #else
--- a/src/crypto/randomx/instructions_portable.cpp
+++ b/src/crypto/randomx/instructions_portable.cpp
@ -82,6 +82,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 	#define HAVE_SETROUNDMODE_IMPL
 #endif

+#ifndef HAVE_SETROUNDMODE_IMPL
+	static void setRoundMode_(uint32_t mode) {
+		fesetround(mode);
+	}
+#endif
+
 #ifndef HAVE_ROTR64
 	uint64_t rotr64(uint64_t a, unsigned int b) {
 		return (a >> b) | (a << (-b & 63));
@ -127,12 +133,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifdef RANDOMX_DEFAULT_FENV

-#	ifndef HAVE_SETROUNDMODE_IMPL
-	static void setRoundMode_(uint32_t mode) {
-		fesetround(mode);
-	}
-#	endif
-
 void rx_reset_float_state() {
 	setRoundMode_(FE_TONEAREST);
 	rx_set_double_precision(); //set precision to 53 bits if needed by the platform
--- a/src/crypto/randomx/intrin_portable.h
+++ b/src/crypto/randomx/intrin_portable.h
@ -376,11 +376,138 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {

 #define RANDOMX_DEFAULT_FENV

-void rx_reset_float_state();
+#elif defined(__aarch64__)

-void rx_set_rounding_mode(uint32_t mode);
+#include <stdlib.h>
+#include <arm_neon.h>
+#include <arm_acle.h>

-#else //end altivec
+typedef uint8x16_t rx_vec_i128;
+typedef float64x2_t rx_vec_f128;
+
+inline void* rx_aligned_alloc(size_t size, size_t align) {
+	void* p;
+	if (posix_memalign(&p, align, size) == 0)
+		return p;
+
+	return 0;
+};
+
+#define rx_aligned_free(a) free(a)
+
+inline void rx_prefetch_nta(void* ptr) {
+	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+	return vld1q_f64((const float64_t*)pd);
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
+	vst1q_f64((float64_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+	float64x2_t temp;
+	temp = vcopyq_laneq_f64(temp, 1, a, 1);
+	a = vcopyq_laneq_f64(a, 1, a, 0);
+	return vcopyq_laneq_f64(a, 0, temp, 1);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+	uint64x2_t temp0 = vdupq_n_u64(x0);
+	uint64x2_t temp1 = vdupq_n_u64(x1);
+	return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+	return vreinterpretq_f64_u64(vdupq_n_u64(x));
+}
+
+#define rx_add_vec_f128 vaddq_f64
+#define rx_sub_vec_f128 vsubq_f64
+#define rx_mul_vec_f128 vmulq_f64
+#define rx_div_vec_f128 vdivq_f64
+#define rx_sqrt_vec_f128 vsqrtq_f64
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+	const uint8x16_t zero = { 0 };
+	return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+	const uint8x16_t zero = { 0 };
+	return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
+}
+
+#define HAVE_AES
+
+#endif
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
+	int32_t data[4];
+	data[0] = _I0;
+	data[1] = _I1;
+	data[2] = _I2;
+	data[3] = _I3;
+	return vreinterpretq_u8_s32(vld1q_s32(data));
+};
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
+	return vld1q_u8((const uint8_t*)mem_addr);
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
+	vst1q_u8((uint8_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+	double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+	double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+	rx_vec_f128 x;
+	x = vsetq_lane_f64(lo, x, 0);
+	x = vsetq_lane_f64(hi, x, 1);
+	return x;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#else //portable fallback

 #include <cstdint>
 #include <stdexcept>
@ -487,7 +614,6 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
 	return v;
 }

-
 FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	rx_vec_f128 x;
 	x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
@ -578,10 +704,6 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {

 #define RANDOMX_DEFAULT_FENV

-void rx_reset_float_state();
-
-void rx_set_rounding_mode(uint32_t mode);
-
 #endif

 #ifndef HAVE_AES
@ -598,6 +720,14 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
 }
 #endif

+#ifdef RANDOMX_DEFAULT_FENV
+
+void rx_reset_float_state();
+
+void rx_set_rounding_mode(uint32_t mode);
+
+#endif
+
 double loadDoublePortable(const void* addr);
 uint64_t mulh(uint64_t, uint64_t);
 int64_t smulh(int64_t, int64_t);
--- a/src/crypto/randomx/jit_compiler_a64.cpp
+++ b/src/crypto/randomx/jit_compiler_a64.cpp
--- a/src/crypto/randomx/jit_compiler_a64.hpp
+++ b/src/crypto/randomx/jit_compiler_a64.hpp
@ -1,5 +1,6 @@
 /*
 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>

 All rights reserved.

@ -32,42 +33,91 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <stdexcept>
 #include "crypto/randomx/common.hpp"
+#include "crypto/randomx/jit_compiler_a64_static.hpp"

 namespace randomx {

 	class Program;
 	class ProgramConfiguration;
 	class SuperscalarProgram;
+	class Instruction;
+
+	typedef void(JitCompilerA64::*InstructionGeneratorA64)(Instruction&, uint32_t&);

 	class JitCompilerA64 {
 	public:
-		JitCompilerA64() {
-			throw std::runtime_error("ARM64 JIT compiler is not implemented yet.");
-		}
-		void generateProgram(Program&, ProgramConfiguration&) {
+		JitCompilerA64();
+		~JitCompilerA64();
+
+		void generateProgram(Program&, ProgramConfiguration&);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);

-		}
-		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
-			
-		}
 		template<size_t N>
-		void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
+		void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &);

-		}
-		void generateDatasetInitCode() {
+		void generateDatasetInitCode() {}

+		ProgramFunc* getProgramFunc() { return reinterpret_cast<ProgramFunc*>(code); }
+		DatasetInitFunc* getDatasetInitFunc();
+		uint8_t* getCode() { return code; }
+		size_t getCodeSize();
+
+		static InstructionGeneratorA64 engine[256];
+		uint32_t reg_changed_offset[8];
+		uint8_t* code;
+		uint32_t literalPos;
+		uint32_t num32bitLiterals;
+
+		static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos)
+		{
+			*(uint32_t*)(code + codePos) = val;
+			codePos += sizeof(val);
 		}
-		ProgramFunc* getProgramFunc() {
-			return nullptr;
-		}
-		DatasetInitFunc* getDatasetInitFunc() {
-			return nullptr;
-		}
-		uint8_t* getCode() {
-			return nullptr;
-		}
-		size_t getCodeSize() {
-			return 0;
+
+		static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
+		{
+			*(uint64_t*)(code + codePos) = val;
+			codePos += sizeof(val);
 		}
+
+		void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos);
+		void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos);
+
+		template<uint32_t tmp_reg>
+		void emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
+
+		template<uint32_t tmp_reg_fp>
+		void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
+
+		void h_IADD_RS(Instruction&, uint32_t&);
+		void h_IADD_M(Instruction&, uint32_t&);
+		void h_ISUB_R(Instruction&, uint32_t&);
+		void h_ISUB_M(Instruction&, uint32_t&);
+		void h_IMUL_R(Instruction&, uint32_t&);
+		void h_IMUL_M(Instruction&, uint32_t&);
+		void h_IMULH_R(Instruction&, uint32_t&);
+		void h_IMULH_M(Instruction&, uint32_t&);
+		void h_ISMULH_R(Instruction&, uint32_t&);
+		void h_ISMULH_M(Instruction&, uint32_t&);
+		void h_IMUL_RCP(Instruction&, uint32_t&);
+		void h_INEG_R(Instruction&, uint32_t&);
+		void h_IXOR_R(Instruction&, uint32_t&);
+		void h_IXOR_M(Instruction&, uint32_t&);
+		void h_IROR_R(Instruction&, uint32_t&);
+		void h_IROL_R(Instruction&, uint32_t&);
+		void h_ISWAP_R(Instruction&, uint32_t&);
+		void h_FSWAP_R(Instruction&, uint32_t&);
+		void h_FADD_R(Instruction&, uint32_t&);
+		void h_FADD_M(Instruction&, uint32_t&);
+		void h_FSUB_R(Instruction&, uint32_t&);
+		void h_FSUB_M(Instruction&, uint32_t&);
+		void h_FSCAL_R(Instruction&, uint32_t&);
+		void h_FMUL_R(Instruction&, uint32_t&);
+		void h_FDIV_M(Instruction&, uint32_t&);
+		void h_FSQRT_R(Instruction&, uint32_t&);
+		void h_CBRANCH(Instruction&, uint32_t&);
+		void h_CFROUND(Instruction&, uint32_t&);
+		void h_ISTORE(Instruction&, uint32_t&);
+		void h_NOP(Instruction&, uint32_t&);
 	};
 }
--- a/src/crypto/randomx/jit_compiler_a64_static.S
+++ b/src/crypto/randomx/jit_compiler_a64_static.S
@ -0,0 +1,576 @@
+# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+# Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 	* Redistributions of source code must retain the above copyright
+# 	  notice, this list of conditions and the following disclaimer.
+# 	* Redistributions in binary form must reproduce the above copyright
+# 	  notice, this list of conditions and the following disclaimer in the
+# 	  documentation and/or other materials provided with the distribution.
+# 	* Neither the name of the copyright holder nor the
+# 	  names of its contributors may be used to endorse or promote products
+# 	  derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+	.arch armv8-a
+	.text
+	.global	randomx_program_aarch64
+	.global	randomx_program_aarch64_main_loop
+	.global	randomx_program_aarch64_vm_instructions
+	.global randomx_program_aarch64_imul_rcp_literals_end
+	.global	randomx_program_aarch64_vm_instructions_end
+	.global randomx_program_aarch64_cacheline_align_mask1
+	.global randomx_program_aarch64_cacheline_align_mask2
+	.global randomx_program_aarch64_update_spMix1
+	.global randomx_program_aarch64_vm_instructions_end_light
+	.global randomx_program_aarch64_light_cacheline_align_mask
+	.global randomx_program_aarch64_light_dataset_offset
+	.global randomx_init_dataset_aarch64
+	.global randomx_init_dataset_aarch64_end
+	.global randomx_calc_dataset_item_aarch64
+	.global randomx_calc_dataset_item_aarch64_prefetch
+	.global randomx_calc_dataset_item_aarch64_mix
+	.global randomx_calc_dataset_item_aarch64_store_result
+	.global randomx_calc_dataset_item_aarch64_end
+
+# Register allocation
+
+# x0  -> pointer to reg buffer and then literal for IMUL_RCP
+# x1  -> pointer to mem buffer and then to dataset
+# x2  -> pointer to scratchpad
+# x3  -> loop counter
+# x4  -> "r0"
+# x5  -> "r1"
+# x6  -> "r2"
+# x7  -> "r3"
+# x8  -> fpcr (reversed bits)
+# x9  -> mx, ma
+# x10 -> spMix1
+# x11 -> literal for IMUL_RCP
+# x12 -> "r4"
+# x13 -> "r5"
+# x14 -> "r6"
+# x15 -> "r7"
+# x16 -> spAddr0
+# x17 -> spAddr1
+# x18 -> temporary
+# x19 -> temporary
+# x20 -> literal for IMUL_RCP
+# x21 -> literal for IMUL_RCP
+# x22 -> literal for IMUL_RCP
+# x23 -> literal for IMUL_RCP
+# x24 -> literal for IMUL_RCP
+# x25 -> literal for IMUL_RCP
+# x26 -> literal for IMUL_RCP
+# x27 -> literal for IMUL_RCP
+# x28 -> literal for IMUL_RCP
+# x29 -> literal for IMUL_RCP
+# x30 -> literal for IMUL_RCP
+
+# v0-v15 -> store 32-bit literals
+# v16 -> "f0"
+# v17 -> "f1"
+# v18 -> "f2"
+# v19 -> "f3"
+# v20 -> "e0"
+# v21 -> "e1"
+# v22 -> "e2"
+# v23 -> "e3"
+# v24 -> "a0"
+# v25 -> "a1"
+# v26 -> "a2"
+# v27 -> "a3"
+# v28 -> temporary
+# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
+# v30 -> E 'or' mask  = 0x3*00000000******3*00000000******
+# v31 -> scale mask   = 0x81f000000000000081f0000000000000
+
+randomx_program_aarch64:
+	# Save callee-saved registers
+	sub	sp, sp, 192
+	stp	x16, x17, [sp]
+	stp	x18, x19, [sp, 16]
+	stp	x20, x21, [sp, 32]
+	stp	x22, x23, [sp, 48]
+	stp	x24, x25, [sp, 64]
+	stp	x26, x27, [sp, 80]
+	stp	x28, x29, [sp, 96]
+	stp	x8, x30, [sp, 112]
+	stp	d8, d9, [sp, 128]
+	stp	d10, d11, [sp, 144]
+	stp	d12, d13, [sp, 160]
+	stp	d14, d15, [sp, 176]
+
+	# Zero integer registers
+	mov	x4, xzr
+	mov	x5, xzr
+	mov	x6, xzr
+	mov	x7, xzr
+	mov	x12, xzr
+	mov	x13, xzr
+	mov	x14, xzr
+	mov	x15, xzr
+
+	# Load ma, mx and dataset pointer
+	ldp	x9, x1, [x1]
+
+	# Load initial spMix value
+	mov	x10, x9
+
+	# Load group A registers
+	ldp	q24, q25, [x0, 192]
+	ldp	q26, q27, [x0, 224]
+
+	# Load E 'and' mask
+	mov	x16, 0x00FFFFFFFFFFFFFF
+	ins	v29.d[0], x16
+	ins	v29.d[1], x16
+
+	# Load E 'or' mask (stored in reg.f[0])
+	ldr	q30, [x0, 64]
+
+	# Load scale mask
+	mov	x16, 0x80f0000000000000
+	ins	v31.d[0], x16
+	ins	v31.d[1], x16
+
+	# Read fpcr
+	mrs	x8, fpcr
+	rbit	x8, x8
+
+	# Save x0
+	str	x0, [sp, -16]!
+
+	# Read literals
+	ldr	x0, literal_x0
+	ldr	x11, literal_x11
+	ldr	x20, literal_x20
+	ldr	x21, literal_x21
+	ldr	x22, literal_x22
+	ldr	x23, literal_x23
+	ldr	x24, literal_x24
+	ldr	x25, literal_x25
+	ldr	x26, literal_x26
+	ldr	x27, literal_x27
+	ldr	x28, literal_x28
+	ldr	x29, literal_x29
+	ldr	x30, literal_x30
+
+	ldr	q0, literal_v0
+	ldr	q1, literal_v1
+	ldr	q2, literal_v2
+	ldr	q3, literal_v3
+	ldr	q4, literal_v4
+	ldr	q5, literal_v5
+	ldr	q6, literal_v6
+	ldr	q7, literal_v7
+	ldr	q8, literal_v8
+	ldr	q9, literal_v9
+	ldr	q10, literal_v10
+	ldr	q11, literal_v11
+	ldr	q12, literal_v12
+	ldr	q13, literal_v13
+	ldr	q14, literal_v14
+	ldr	q15, literal_v15
+
+randomx_program_aarch64_main_loop:
+	# spAddr0 = spMix1 & ScratchpadL3Mask64;
+	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
+	lsr	x18, x10, 32
+
+	# Actual mask will be inserted by JIT compiler
+	and	w16, w10, 1
+	and	w17, w18, 1
+
+	# x16 = scratchpad + spAddr0
+	# x17 = scratchpad + spAddr1
+	add	x16, x16, x2
+	add	x17, x17, x2
+
+	# xor integer registers with scratchpad data (spAddr0)
+	ldp	x18, x19, [x16]
+	eor	x4, x4, x18
+	eor	x5, x5, x19
+	ldp	x18, x19, [x16, 16]
+	eor	x6, x6, x18
+	eor	x7, x7, x19
+	ldp	x18, x19, [x16, 32]
+	eor	x12, x12, x18
+	eor	x13, x13, x19
+	ldp	x18, x19, [x16, 48]
+	eor	x14, x14, x18
+	eor	x15, x15, x19
+
+	# Load group F registers (spAddr1)
+	ldpsw	x18, x19, [x17]
+	ins	v16.d[0], x18
+	ins	v16.d[1], x19
+	ldpsw	x18, x19, [x17, 8]
+	ins	v17.d[0], x18
+	ins	v17.d[1], x19
+	ldpsw	x18, x19, [x17, 16]
+	ins	v18.d[0], x18
+	ins	v18.d[1], x19
+	ldpsw	x18, x19, [x17, 24]
+	ins	v19.d[0], x18
+	ins	v19.d[1], x19
+	scvtf	v16.2d, v16.2d
+	scvtf	v17.2d, v17.2d
+	scvtf	v18.2d, v18.2d
+	scvtf	v19.2d, v19.2d
+
+	# Load group E registers (spAddr1)
+	ldpsw	x18, x19, [x17, 32]
+	ins	v20.d[0], x18
+	ins	v20.d[1], x19
+	ldpsw	x18, x19, [x17, 40]
+	ins	v21.d[0], x18
+	ins	v21.d[1], x19
+	ldpsw	x18, x19, [x17, 48]
+	ins	v22.d[0], x18
+	ins	v22.d[1], x19
+	ldpsw	x18, x19, [x17, 56]
+	ins	v23.d[0], x18
+	ins	v23.d[1], x19
+	scvtf	v20.2d, v20.2d
+	scvtf	v21.2d, v21.2d
+	scvtf	v22.2d, v22.2d
+	scvtf	v23.2d, v23.2d
+	and	v20.16b, v20.16b, v29.16b
+	and	v21.16b, v21.16b, v29.16b
+	and	v22.16b, v22.16b, v29.16b
+	and	v23.16b, v23.16b, v29.16b
+	orr	v20.16b, v20.16b, v30.16b
+	orr	v21.16b, v21.16b, v30.16b
+	orr	v22.16b, v22.16b, v30.16b
+	orr	v23.16b, v23.16b, v30.16b
+
+	# Execute VM instructions
+randomx_program_aarch64_vm_instructions:
+
+	# 16 KB buffer for generated instructions
+	.fill 4096,4,0
+
+literal_x0:  .fill 1,8,0
+literal_x11: .fill 1,8,0
+literal_x20: .fill 1,8,0
+literal_x21: .fill 1,8,0
+literal_x22: .fill 1,8,0
+literal_x23: .fill 1,8,0
+literal_x24: .fill 1,8,0
+literal_x25: .fill 1,8,0
+literal_x26: .fill 1,8,0
+literal_x27: .fill 1,8,0
+literal_x28: .fill 1,8,0
+literal_x29: .fill 1,8,0
+literal_x30: .fill 1,8,0
+randomx_program_aarch64_imul_rcp_literals_end:
+
+literal_v0:  .fill 2,8,0
+literal_v1:  .fill 2,8,0
+literal_v2:  .fill 2,8,0
+literal_v3:  .fill 2,8,0
+literal_v4:  .fill 2,8,0
+literal_v5:  .fill 2,8,0
+literal_v6:  .fill 2,8,0
+literal_v7:  .fill 2,8,0
+literal_v8:  .fill 2,8,0
+literal_v9:  .fill 2,8,0
+literal_v10: .fill 2,8,0
+literal_v11: .fill 2,8,0
+literal_v12: .fill 2,8,0
+literal_v13: .fill 2,8,0
+literal_v14: .fill 2,8,0
+literal_v15: .fill 2,8,0
+
+randomx_program_aarch64_vm_instructions_end:
+
+	# mx ^= r[readReg2] ^ r[readReg3];
+	eor	x9, x9, x18
+
+	# Calculate dataset pointer for dataset prefetch
+	mov	w18, w9
+randomx_program_aarch64_cacheline_align_mask1:
+	# Actual mask will be inserted by JIT compiler
+	and	x18, x18, 1
+	add	x18, x18, x1
+
+	# Prefetch dataset data
+	prfm	pldl2strm, [x18]
+
+	# mx <-> ma
+	ror	x9, x9, 32
+
+	# Calculate dataset pointer for dataset read
+	mov	w10, w9
+randomx_program_aarch64_cacheline_align_mask2:
+	# Actual mask will be inserted by JIT compiler
+	and	x10, x10, 1
+	add	x10, x10, x1
+
+randomx_program_aarch64_xor_with_dataset_line:
+	# xor integer registers with dataset data
+	ldp	x18, x19, [x10]
+	eor	x4, x4, x18
+	eor	x5, x5, x19
+	ldp	x18, x19, [x10, 16]
+	eor	x6, x6, x18
+	eor	x7, x7, x19
+	ldp	x18, x19, [x10, 32]
+	eor	x12, x12, x18
+	eor	x13, x13, x19
+	ldp	x18, x19, [x10, 48]
+	eor	x14, x14, x18
+	eor	x15, x15, x19
+
+randomx_program_aarch64_update_spMix1:
+	# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1"
+	eor	x10, x0, x0
+
+	# Store integer registers to scratchpad (spAddr1)
+	stp	x4, x5, [x17, 0]
+	stp	x6, x7, [x17, 16]
+	stp	x12, x13, [x17, 32]
+	stp	x14, x15, [x17, 48]
+
+	# xor group F and group E registers
+	eor	v16.16b, v16.16b, v20.16b
+	eor	v17.16b, v17.16b, v21.16b
+	eor	v18.16b, v18.16b, v22.16b
+	eor	v19.16b, v19.16b, v23.16b
+
+	# Store FP registers to scratchpad (spAddr0)
+	stp	q16, q17, [x16, 0]
+	stp	q18, q19, [x16, 32]
+
+	subs	x3, x3, 1
+	bne	randomx_program_aarch64_main_loop
+	
+	# Restore x0
+	ldr	x0, [sp], 16
+
+	# Store integer registers
+	stp	x4, x5, [x0, 0]
+	stp	x6, x7, [x0, 16]
+	stp	x12, x13, [x0, 32]
+	stp	x14, x15, [x0, 48]
+
+	# Store FP registers
+	stp	q16, q17, [x0, 64]
+	stp	q18, q19, [x0, 96]
+	stp	q20, q21, [x0, 128]
+	stp	q22, q23, [x0, 160]
+
+	# Restore callee-saved registers
+	ldp	x16, x17, [sp]
+	ldp	x18, x19, [sp, 16]
+	ldp	x20, x21, [sp, 32]
+	ldp	x22, x23, [sp, 48]
+	ldp	x24, x25, [sp, 64]
+	ldp	x26, x27, [sp, 80]
+	ldp	x28, x29, [sp, 96]
+	ldp	x8, x30, [sp, 112]
+	ldp	d8, d9, [sp, 128]
+	ldp	d10, d11, [sp, 144]
+	ldp	d12, d13, [sp, 160]
+	ldp	d14, d15, [sp, 176]
+	add	sp, sp, 192
+
+	ret
+
+randomx_program_aarch64_vm_instructions_end_light:
+	sub	sp, sp, 96
+	stp	x0, x1, [sp, 64]
+	stp	x2, x30, [sp, 80]
+
+	# mx ^= r[readReg2] ^ r[readReg3];
+	eor	x9, x9, x18
+
+	# mx <-> ma
+	ror	x9, x9, 32
+
+	# x0 -> pointer to cache memory
+	mov	x0, x1
+
+	# x1 -> pointer to output
+	mov	x1, sp
+
+randomx_program_aarch64_light_cacheline_align_mask:
+	# Actual mask will be inserted by JIT compiler
+	and	w2, w9, 1
+
+	# x2 -> item number
+	lsr	x2, x2, 6
+
+randomx_program_aarch64_light_dataset_offset:
+	# Apply dataset offset (filled in by JIT compiler)
+	add	x2, x2, 0
+	add	x2, x2, 0
+
+	bl	randomx_calc_dataset_item_aarch64
+
+	mov	x10, sp
+	ldp	x0, x1, [sp, 64]
+	ldp	x2, x30, [sp, 80]
+	add	sp, sp, 96
+
+	b	randomx_program_aarch64_xor_with_dataset_line
+
+
+
+# Input parameters
+#
+# x0 -> pointer to cache
+# x1 -> pointer to dataset memory at startItem
+# x2 -> start item
+# x3 -> end item
+
+randomx_init_dataset_aarch64:
+	# Save x30 (return address)
+	str	x30, [sp, -16]!
+
+	# Load pointer to cache memory
+	ldr	x0, [x0]
+
+randomx_init_dataset_aarch64_main_loop:
+	bl	randomx_calc_dataset_item_aarch64
+	add	x1, x1, 64
+	add	x2, x2, 1
+	cmp	x2, x3
+	bne	randomx_init_dataset_aarch64_main_loop
+
+	# Restore x30 (return address)
+	ldr	x30, [sp], 16
+
+	ret
+
+randomx_init_dataset_aarch64_end:
+
+# Input parameters
+#
+# x0 -> pointer to cache memory
+# x1 -> pointer to output
+# x2 -> item number
+#
+# Register allocation
+#
+# x0-x7 -> output value (calculated dataset item)
+# x8 -> pointer to cache memory
+# x9 -> pointer to output
+# x10 -> registerValue
+# x11 -> mixBlock
+# x12 -> temporary
+# x13 -> temporary
+
+randomx_calc_dataset_item_aarch64:
+	sub	sp, sp, 112
+	stp	x0, x1, [sp]
+	stp	x2, x3, [sp, 16]
+	stp	x4, x5, [sp, 32]
+	stp	x6, x7, [sp, 48]
+	stp	x8, x9, [sp, 64]
+	stp	x10, x11, [sp, 80]
+	stp	x12, x13, [sp, 96]
+
+	mov	x8, x0
+	mov	x9, x1
+	mov	x10, x2
+
+	# rl[0] = (itemNumber + 1) * superscalarMul0;
+	ldr	x12, superscalarMul0
+	madd	x0, x2, x12, x12
+
+	# rl[1] = rl[0] ^ superscalarAdd1;
+	ldr	x12, superscalarAdd1
+	eor	x1, x0, x12
+
+	# rl[2] = rl[0] ^ superscalarAdd2;
+	ldr	x12, superscalarAdd2
+	eor	x2, x0, x12
+
+	# rl[3] = rl[0] ^ superscalarAdd3;
+	ldr	x12, superscalarAdd3
+	eor	x3, x0, x12
+
+	# rl[4] = rl[0] ^ superscalarAdd4;
+	ldr	x12, superscalarAdd4
+	eor	x4, x0, x12
+
+	# rl[5] = rl[0] ^ superscalarAdd5;
+	ldr	x12, superscalarAdd5
+	eor	x5, x0, x12
+
+	# rl[6] = rl[0] ^ superscalarAdd6;
+	ldr	x12, superscalarAdd6
+	eor	x6, x0, x12
+
+	# rl[7] = rl[0] ^ superscalarAdd7;
+	ldr	x12, superscalarAdd7
+	eor	x7, x0, x12
+
+	b	randomx_calc_dataset_item_aarch64_prefetch
+
+superscalarMul0: .quad 6364136223846793005
+superscalarAdd1: .quad 9298411001130361340
+superscalarAdd2: .quad 12065312585734608966
+superscalarAdd3: .quad 9306329213124626780
+superscalarAdd4: .quad 5281919268842080866
+superscalarAdd5: .quad 10536153434571861004
+superscalarAdd6: .quad 3398623926847679864
+superscalarAdd7: .quad 9549104520008361294
+
+# Prefetch -> SuperScalar hash -> Mix will be repeated N times
+
+randomx_calc_dataset_item_aarch64_prefetch:
+	# Actual mask will be inserted by JIT compiler
+	and	x11, x10, 1
+	add	x11, x8, x11, lsl 6
+	prfm	pldl2strm, [x11]
+
+	# Generated SuperScalar hash program goes here
+
+randomx_calc_dataset_item_aarch64_mix:
+	ldp	x12, x13, [x11]
+	eor	x0, x0, x12
+	eor	x1, x1, x13
+	ldp	x12, x13, [x11, 16]
+	eor	x2, x2, x12
+	eor	x3, x3, x13
+	ldp	x12, x13, [x11, 32]
+	eor	x4, x4, x12
+	eor	x5, x5, x13
+	ldp	x12, x13, [x11, 48]
+	eor	x6, x6, x12
+	eor	x7, x7, x13
+
+randomx_calc_dataset_item_aarch64_store_result:
+	stp	x0, x1, [x9]
+	stp	x2, x3, [x9, 16]
+	stp	x4, x5, [x9, 32]
+	stp	x6, x7, [x9, 48]
+
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, 16]
+	ldp	x4, x5, [sp, 32]
+	ldp	x6, x7, [sp, 48]
+	ldp	x8, x9, [sp, 64]
+	ldp	x10, x11, [sp, 80]
+	ldp	x12, x13, [sp, 96]
+	add	sp, sp, 112
+
+	ret
+
+randomx_calc_dataset_item_aarch64_end:
--- a/src/crypto/randomx/jit_compiler_a64_static.hpp
+++ b/src/crypto/randomx/jit_compiler_a64_static.hpp
@ -0,0 +1,51 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_program_aarch64(void* reg, void* mem, void* scratchpad, uint64_t iterations);
+	void randomx_program_aarch64_main_loop();
+	void randomx_program_aarch64_vm_instructions();
+	void randomx_program_aarch64_imul_rcp_literals_end();
+	void randomx_program_aarch64_vm_instructions_end();
+	void randomx_program_aarch64_cacheline_align_mask1();
+	void randomx_program_aarch64_cacheline_align_mask2();
+	void randomx_program_aarch64_update_spMix1();
+	void randomx_program_aarch64_vm_instructions_end_light();
+	void randomx_program_aarch64_light_cacheline_align_mask();
+	void randomx_program_aarch64_light_dataset_offset();
+	void randomx_init_dataset_aarch64();
+	void randomx_init_dataset_aarch64_end();
+	void randomx_calc_dataset_item_aarch64();
+	void randomx_calc_dataset_item_aarch64_prefetch();
+	void randomx_calc_dataset_item_aarch64_mix();
+	void randomx_calc_dataset_item_aarch64_store_result();
+	void randomx_calc_dataset_item_aarch64_end();
+}
--- a/src/crypto/randomx/randomx.cpp
+++ b/src/crypto/randomx/randomx.cpp
@ -26,6 +26,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#include "crypto/randomx/common.hpp"
 #include "crypto/randomx/randomx.h"
 #include "crypto/randomx/dataset.hpp"
 #include "crypto/randomx/vm_interpreted.hpp"
@ -33,7 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/vm_compiled.hpp"
 #include "crypto/randomx/vm_compiled_light.hpp"
 #include "crypto/randomx/blake2/blake2.h"
+
+#if defined(_M_X64) || defined(__x86_64__)
 #include "crypto/randomx/jit_compiler_x86_static.hpp"
+#elif defined(XMRIG_ARM)
+#include "crypto/randomx/jit_compiler_a64_static.hpp"
+#endif
+
 #include <cassert>

 RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
@ -156,19 +163,10 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
 #endif
 }

+static uint32_t Log2(size_t value) { return (value > 1) ? (Log2(value / 2) + 1) : 0; }
+
 void RandomX_ConfigurationBase::Apply()
 {
-#if defined(_M_X64) || defined(__x86_64__)
-	*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
-	const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
-	*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
-	*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
-	*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
-#endif
-
-	CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
-	DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;
-
 	ScratchpadL1Mask_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) - 1) * 8;
 	ScratchpadL1Mask16_Calculated = (ScratchpadL1_Size / sizeof(uint64_t) / 2 - 1) * 16;
 	ScratchpadL2Mask_Calculated = (ScratchpadL2_Size / sizeof(uint64_t) - 1) * 8;
@ -176,22 +174,40 @@ void RandomX_ConfigurationBase::Apply()
 	ScratchpadL3Mask_Calculated = (((ScratchpadL3_Size / sizeof(uint64_t)) - 1) * 8);
 	ScratchpadL3Mask64_Calculated = ((ScratchpadL3_Size / sizeof(uint64_t)) / 8 - 1) * 64;

-#if defined(_M_X64) || defined(__x86_64__)
-	*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
-	*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
-#endif
+	CacheLineAlignMask_Calculated = (DatasetBaseSize - 1) & ~(RANDOMX_DATASET_ITEM_SIZE - 1);
+	DatasetExtraItems_Calculated = DatasetExtraSize / RANDOMX_DATASET_ITEM_SIZE;

 	ConditionMask_Calculated = (1 << JumpBits) - 1;

-	constexpr int CEIL_NULL = 0;
-	int k = 0;
-
 #if defined(_M_X64) || defined(__x86_64__)
+	*(uint32_t*)(codeShhPrefetchTweaked + 3) = ArgonMemory * 16 - 1;
+	const uint32_t DatasetBaseMask = DatasetBaseSize - RANDOMX_DATASET_ITEM_SIZE;
+	*(uint32_t*)(codeReadDatasetTweaked + 7) = DatasetBaseMask;
+	*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
+	*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
+
+	*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
+	*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
+
 #define JIT_HANDLE(x, prev) randomx::JitCompilerX86::engine[k] = &randomx::JitCompilerX86::h_##x
+
+#elif defined(XMRIG_ARM)
+
+	Log2_ScratchpadL1 = Log2(ScratchpadL1_Size);
+	Log2_ScratchpadL2 = Log2(ScratchpadL2_Size);
+	Log2_ScratchpadL3 = Log2(ScratchpadL3_Size);
+	Log2_DatasetBaseSize = Log2(DatasetBaseSize);
+	Log2_CacheSize = Log2((ArgonMemory * randomx::ArgonBlockSize) / randomx::CacheLineSize);
+
+#define JIT_HANDLE(x, prev) randomx::JitCompilerA64::engine[k] = &randomx::JitCompilerA64::h_##x
+
 #else
 #define JIT_HANDLE(x, prev)
 #endif

+	constexpr int CEIL_NULL = 0;
+	int k = 0;
+
 #define INST_HANDLE(x, prev) \
 	CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \
 	for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); }
@ -435,12 +451,12 @@ extern "C" {
 		assert(inputSize == 0 || input != nullptr);
 		assert(output != nullptr);
 		alignas(16) uint64_t tempHash[8];
-        rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
+		rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
 		machine->initScratchpad(&tempHash);
 		machine->resetRoundingMode();
 		for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
 			machine->run(&tempHash);
-            rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
+			rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
 		}
 		machine->run(&tempHash);
 		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
--- a/src/crypto/randomx/randomx.h
+++ b/src/crypto/randomx/randomx.h
@ -133,6 +133,14 @@ struct RandomX_ConfigurationBase

 	uint32_t ConditionMask_Calculated;

+#ifdef XMRIG_ARM
+	uint32_t Log2_ScratchpadL1;
+	uint32_t Log2_ScratchpadL2;
+	uint32_t Log2_ScratchpadL3;
+	uint32_t Log2_DatasetBaseSize;
+	uint32_t Log2_CacheSize;
+#endif
+
 	int CEIL_IADD_RS;
 	int CEIL_IADD_M;
 	int CEIL_ISUB_R;
--- a/src/crypto/randomx/vm_compiled.cpp
+++ b/src/crypto/randomx/vm_compiled.cpp
@ -50,6 +50,9 @@ namespace randomx {

 	template<bool softAes>
 	void CompiledVm<softAes>::execute() {
+#ifdef XMRIG_ARM
+		memcpy(reg.f, config.eMask, sizeof(config.eMask));
+#endif
 		compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations);
 	}

--- a/src/crypto/rx/RxVm.cpp
+++ b/src/crypto/rx/RxVm.cpp
@ -33,11 +33,9 @@

 xmrig::RxVm::RxVm(RxDataset *dataset, uint8_t *scratchpad, bool softAes)
 {
-#   ifndef XMRIG_ARM
    if (!softAes) {
       m_flags |= RANDOMX_FLAG_HARD_AES;
    }
-#   endif

    if (dataset->get()) {
        m_flags |= RANDOMX_FLAG_FULL_MEM;