mirror of
https://github.com/xmrig/xmrig.git
synced 2024-11-18 00:37:46 +00:00
Fixed generic OpenCL code for AMD Navi
This commit is contained in:
parent
aacdbc360b
commit
ef629ba0d0
5 changed files with 1969 additions and 1949 deletions
File diff suppressed because it is too large
Load diff
|
@ -316,7 +316,7 @@ __kernel void init_vm(__global const void* entropy_data, __global void* vm_state
|
||||||
uint64_t registerLatencyFP = 0;
|
uint64_t registerLatencyFP = 0;
|
||||||
uint64_t registerReadCycleFP = 0;
|
uint64_t registerReadCycleFP = 0;
|
||||||
uint32_t ScratchpadHighLatency = 0;
|
uint32_t ScratchpadHighLatency = 0;
|
||||||
uint32_t ScratchpadLatency = 0;
|
volatile uint32_t ScratchpadLatency = 0;
|
||||||
|
|
||||||
int32_t first_available_slot = 0;
|
int32_t first_available_slot = 0;
|
||||||
int32_t first_allowed_slot_cfround = 0;
|
int32_t first_allowed_slot_cfround = 0;
|
||||||
|
@ -1425,8 +1425,7 @@ double fma_soft(double a, double b, double c, uint32_t rounding_mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint64_t mantissa_size = 52;
|
const uint64_t mantissa_size = 52;
|
||||||
const uint64_t mantissa_mask = (1UL << mantissa_size) - 1;
|
const uint64_t mantissa_mask = (1UL << 52) - 1;
|
||||||
const uint64_t mantissa_high_bit = 1UL << mantissa_size;
|
|
||||||
|
|
||||||
const uint64_t exponent_size = 11;
|
const uint64_t exponent_size = 11;
|
||||||
const uint64_t exponent_mask = (1 << exponent_size) - 1;
|
const uint64_t exponent_mask = (1 << exponent_size) - 1;
|
||||||
|
@ -1441,9 +1440,13 @@ double fma_soft(double a, double b, double c, uint32_t rounding_mode)
|
||||||
return as_double(inf);
|
return as_double(inf);
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint64_t mantissa_a = (as_ulong(a) & mantissa_mask) | mantissa_high_bit;
|
uint64_t mantissa_a = (as_ulong(a) & mantissa_mask);
|
||||||
const uint64_t mantissa_b = (as_ulong(b) & mantissa_mask) | mantissa_high_bit;
|
uint64_t mantissa_b = (as_ulong(b) & mantissa_mask);
|
||||||
const uint64_t mantissa_c = (as_ulong(c) & mantissa_mask) | mantissa_high_bit;
|
uint64_t mantissa_c = (as_ulong(c) & mantissa_mask);
|
||||||
|
|
||||||
|
((uint2*)&mantissa_a)->y |= 1U << 20;
|
||||||
|
((uint2*)&mantissa_b)->y |= 1U << 20;
|
||||||
|
((uint2*)&mantissa_c)->y |= 1U << 20;
|
||||||
|
|
||||||
const uint32_t sign_a = as_uint2(a).y >> 31;
|
const uint32_t sign_a = as_uint2(a).y >> 31;
|
||||||
const uint32_t sign_b = as_uint2(b).y >> 31;
|
const uint32_t sign_b = as_uint2(b).y >> 31;
|
||||||
|
@ -1585,7 +1588,7 @@ double fma_soft(double a, double b, double c, uint32_t rounding_mode)
|
||||||
if (rounding_mode + sign_fma_result == 2)
|
if (rounding_mode + sign_fma_result == 2)
|
||||||
{
|
{
|
||||||
fma_result[1] += round_up;
|
fma_result[1] += round_up;
|
||||||
if (fma_result[1] == mantissa_high_bit)
|
if (fma_result[1] == (1UL << mantissa_size))
|
||||||
{
|
{
|
||||||
fma_result[1] = 0;
|
fma_result[1] = 0;
|
||||||
++exponent_fma_result;
|
++exponent_fma_result;
|
||||||
|
|
|
@ -49,6 +49,7 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith
|
||||||
const size_t mem = device.globalMemSize();
|
const size_t mem = device.globalMemSize();
|
||||||
auto config = RxAlgo::base(algorithm);
|
auto config = RxAlgo::base(algorithm);
|
||||||
bool gcnAsm = false;
|
bool gcnAsm = false;
|
||||||
|
bool isNavi = false;
|
||||||
|
|
||||||
switch (device.type()) {
|
switch (device.type()) {
|
||||||
case OclDevice::Baffin:
|
case OclDevice::Baffin:
|
||||||
|
@ -59,6 +60,12 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith
|
||||||
gcnAsm = true;
|
gcnAsm = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OclDevice::Navi_10:
|
||||||
|
case OclDevice::Navi_12:
|
||||||
|
case OclDevice::Navi_14:
|
||||||
|
isNavi = true;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -75,8 +82,9 @@ bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorith
|
||||||
uint32_t intensity = static_cast<uint32_t>((mem - (datasetHost ? 0 : dataset_mem)) / per_thread_mem / 2);
|
uint32_t intensity = static_cast<uint32_t>((mem - (datasetHost ? 0 : dataset_mem)) / per_thread_mem / 2);
|
||||||
|
|
||||||
// Too high intensity makes hashrate worse
|
// Too high intensity makes hashrate worse
|
||||||
if (intensity > device.computeUnits() * 16) {
|
const uint32_t intensityCoeff = isNavi ? 64 : 16;
|
||||||
intensity = device.computeUnits() * 16;
|
if (intensity > device.computeUnits() * intensityCoeff) {
|
||||||
|
intensity = device.computeUnits() * intensityCoeff;
|
||||||
}
|
}
|
||||||
|
|
||||||
intensity -= intensity % 64;
|
intensity -= intensity % 64;
|
||||||
|
|
|
@ -107,6 +107,14 @@ static OclDevice::Type getType(const String &name)
|
||||||
return OclDevice::Navi_10;
|
return OclDevice::Navi_10;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (name == "gfx1011") {
|
||||||
|
return OclDevice::Navi_12;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name == "gfx1012") {
|
||||||
|
return OclDevice::Navi_14;
|
||||||
|
}
|
||||||
|
|
||||||
if (name == "gfx804") {
|
if (name == "gfx804") {
|
||||||
return OclDevice::Lexa;
|
return OclDevice::Lexa;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,9 @@ public:
|
||||||
Vega_10,
|
Vega_10,
|
||||||
Vega_20,
|
Vega_20,
|
||||||
Raven,
|
Raven,
|
||||||
Navi_10
|
Navi_10,
|
||||||
|
Navi_12,
|
||||||
|
Navi_14
|
||||||
};
|
};
|
||||||
|
|
||||||
OclDevice() = delete;
|
OclDevice() = delete;
|
||||||
|
|
Loading…
Reference in a new issue