diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp
index a98027c0f..22651c35f 100644
--- a/src/backend/cpu/CpuWorker.cpp
+++ b/src/backend/cpu/CpuWorker.cpp
@@ -69,12 +69,12 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
 template<size_t N>
 xmrig::CpuWorker<N>::~CpuWorker()
 {
-    CnCtx::release(m_ctx, N);
-    delete m_memory;
-
 #   ifdef XMRIG_ALGO_RANDOMX
     delete m_vm;
 #   endif
+
+    CnCtx::release(m_ctx, N);
+    delete m_memory;
 }
 
 
diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp
index 708a9559c..0642b3cb8 100644
--- a/src/crypto/rx/Rx.cpp
+++ b/src/crypto/rx/Rx.cpp
@@ -86,6 +86,9 @@ public:
 
     inline ~RxPrivate()
     {
+        m_pending = std::numeric_limits<uint32_t>::max();
+
+        std::lock_guard<std::mutex> lock(mutex);
         Handle::close(m_async);
 
         delete m_storage;
@@ -121,6 +124,10 @@ public:
     {
         std::lock_guard<std::mutex> lock(mutex);
 
+        if (d_ptr->pending() > std::numeric_limits<uint16_t>::max()) {
+            return;
+        }
+
         LOG_INFO("%s" MAGENTA_BOLD("init dataset%s") " algo " WHITE_BOLD("%s (") CYAN_BOLD("%u") WHITE_BOLD(" threads)") BLACK_BOLD(" seed %s..."),
                  tag,
                  nodeset.size() > 1 ? "s" : "",
diff --git a/src/crypto/rx/RxNUMAStorage.cpp b/src/crypto/rx/RxNUMAStorage.cpp
index f6fffadd8..6d8ec167b 100644
--- a/src/crypto/rx/RxNUMAStorage.cpp
+++ b/src/crypto/rx/RxNUMAStorage.cpp
@@ -87,9 +87,17 @@ class RxNUMAStoragePrivate
 public:
     XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxNUMAStoragePrivate)
 
-    inline RxNUMAStoragePrivate(const std::vector<uint32_t> &nodeset) : m_nodeset(nodeset) {}
+    inline RxNUMAStoragePrivate(const std::vector<uint32_t> &nodeset) :
+        m_nodeset(nodeset)
+    {
+        m_threads.reserve(nodeset.size());
+    }
+
+
     inline ~RxNUMAStoragePrivate()
     {
+        join();
+
         for (auto const &item : m_datasets) {
             delete item.second;
         }
@@ -116,16 +124,11 @@ public:
     {
         const uint64_t ts = Chrono::steadyMSecs();
 
-        std::vector<std::thread> threads;
-        threads.reserve(m_nodeset.size());
-
         for (uint32_t node : m_nodeset) {
-            threads.emplace_back(allocate, this, node, hugePages);
+            m_threads.emplace_back(allocate, this, node, hugePages);
         }
 
-        for (auto &thread : threads) {
-            thread.join();
-        }
+        join();
 
         std::thread thread(allocateCache, this, m_nodeset.front(), hugePages);
         thread.join();
@@ -156,20 +159,15 @@ public:
         printDatasetReady(id, ts);
 
         if (m_datasets.size() > 1) {
-            std::vector<std::thread> threads;
-            threads.reserve(m_datasets.size() - 1);
-
             for (auto const &item : m_datasets) {
                 if (item.first == id) {
                     continue;
                 }
 
-                threads.emplace_back(copyDataset, item.second, item.first, primary->raw());
+                m_threads.emplace_back(copyDataset, item.second, item.first, primary->raw());
             }
 
-            for (auto &thread : threads) {
-                thread.join();
-            }
+            join();
         }
 
         m_ready = true;
@@ -293,11 +291,22 @@ private:
     }
 
 
+    inline void join()
+    {
+        for (auto &thread : m_threads) {
+            thread.join();
+        }
+
+        m_threads.clear();
+    }
+
+
     bool m_allocated        = false;
     bool m_ready            = false;
     RxCache *m_cache        = nullptr;
     RxSeed m_seed;
     std::map<uint32_t, RxDataset *> m_datasets;
+    std::vector<std::thread> m_threads;
     std::vector<uint32_t> m_nodeset;
 };