RandomX: optimized soft AES code

Unrolled loop was 5-10% slower depending on CPU.
This commit is contained in:
SChernykh 2020-09-29 21:22:11 +02:00
parent dfab81e9fa
commit 7b4f768114

View file

@ -270,19 +270,22 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_prefetch_t0(prefetchPtr); rx_prefetch_t0(prefetchPtr);
rx_prefetch_t0(prefetchPtr + 64); rx_prefetch_t0(prefetchPtr + 64);
scratchpadPtr += 128;
prefetchPtr += 128;
break; break;
default: default:
HASH_STATE(0); HASH_STATE(0);
FILL_STATE(0); FILL_STATE(0);
rx_prefetch_t0(prefetchPtr); rx_prefetch_t0(prefetchPtr);
HASH_STATE(1); scratchpadPtr += 64;
FILL_STATE(1); prefetchPtr += 64;
rx_prefetch_t0(prefetchPtr + 64);
}
scratchpadPtr += 128; break;
prefetchPtr += 128; }
} }
prefetchPtr = (const char*) scratchpad; prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE; scratchpadEnd += PREFETCH_DISTANCE;