From c6a68c3e5178c9eb837f46042d6d018650444d7f Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Thu, 30 Sep 2021 09:19:48 -0600 Subject: [PATCH] Cap max threads to 4096 with nVidia OpenCL --- src/backend/opencl/generators/ocl_generic_cn_generator.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/backend/opencl/generators/ocl_generic_cn_generator.cpp b/src/backend/opencl/generators/ocl_generic_cn_generator.cpp index 78da97103..c3d7733f1 100644 --- a/src/backend/opencl/generators/ocl_generic_cn_generator.cpp +++ b/src/backend/opencl/generators/ocl_generic_cn_generator.cpp @@ -39,6 +39,10 @@ static inline uint32_t getMaxThreads(const OclDevice &device, const Algorithm &a return 40000U; } + if (device.vendorId() == OCL_VENDOR_NVIDIA) { + return 4096U; + } + const uint32_t ratio = (algorithm.l3() <= oneMiB) ? 2U : 1U; if (device.vendorId() == OCL_VENDOR_INTEL) {