RandomX: optimized soft AES code

Unrolled loop was 5-10% slower depending on CPU.
This commit is contained in:
SChernykh 2020-09-29 21:22:11 +02:00
parent dfab81e9fa
commit 7b4f768114

View file

@ -270,19 +270,22 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_prefetch_t0(prefetchPtr);
rx_prefetch_t0(prefetchPtr + 64);
scratchpadPtr += 128;
prefetchPtr += 128;
break;
default:
HASH_STATE(0);
FILL_STATE(0);
rx_prefetch_t0(prefetchPtr);
HASH_STATE(1);
FILL_STATE(1);
rx_prefetch_t0(prefetchPtr + 64);
}
scratchpadPtr += 64;
prefetchPtr += 64;
scratchpadPtr += 128;
prefetchPtr += 128;
break;
}
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;