diff --git a/src/crypto/cn/CryptoNight_x86.h b/src/crypto/cn/CryptoNight_x86.h
index 25eeb908a..cc88342be 100644
--- a/src/crypto/cn/CryptoNight_x86.h
+++ b/src/crypto/cn/CryptoNight_x86.h
@@ -377,12 +377,15 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
         _mm_store_si128(output + 1, xin1);
         _mm_store_si128(output + 2, xin2);
         _mm_store_si128(output + 3, xin3);
-        output += (64 << interleave) / sizeof(__m128i);
-        _mm_store_si128(output + 0, xin4);
-        _mm_store_si128(output + 1, xin5);
-        _mm_store_si128(output + 2, xin6);
-        _mm_store_si128(output + 3, xin7);
-        output += (64 << interleave) / sizeof(__m128i);
+
+        constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
+
+        _mm_store_si128(output + output_increment + 0, xin4);
+        _mm_store_si128(output + output_increment + 1, xin5);
+        _mm_store_si128(output + output_increment + 2, xin6);
+        _mm_store_si128(output + output_increment + 3, xin7);
+
+        output += output_increment * 2;
     }
 }
 
@@ -414,13 +417,15 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
         xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
         xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
         xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
-        input += (64 << interleave) / sizeof(__m128i);
-        xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
-        input += (64 << interleave) / sizeof(__m128i);
 
+        constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
+
+        xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
+        xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
+        xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
+        xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
+
+        input += input_increment * 2;
         i += 8;
 
         if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
diff --git a/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc b/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc
index 4f6b70a04..14222dac0 100644
--- a/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc
+++ b/src/crypto/cn/asm/cn2/cnv2_upx_double_mainloop_zen3.inc
@@ -34,7 +34,7 @@
 	movdqa	XMMWORD PTR [rsp+32], xmm0
 
 	stmxcsr DWORD PTR [rsp+24]
-	mov DWORD PTR [rsp+28], 24448
+	mov DWORD PTR [rsp+28], 16256
 	ldmxcsr DWORD PTR [rsp+28]
 
 	mov	rcx, QWORD PTR [rbx+56]
diff --git a/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc b/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc
index 854fbf111..00fabd6d4 100644
--- a/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc
+++ b/src/crypto/cn/asm/win64/cn2/cnv2_upx_double_mainloop_zen3.inc
@@ -34,7 +34,7 @@
 	movdqa	XMMWORD PTR [rsp+32], xmm0
 
 	stmxcsr DWORD PTR [rsp+24]
-	mov DWORD PTR [rsp+28], 24448
+	mov DWORD PTR [rsp+28], 16256
 	ldmxcsr DWORD PTR [rsp+28]
 
 	mov	rcx, QWORD PTR [rbx+56]