mirror of
https://github.com/Cuprate/cuprate.git
synced 2025-01-08 20:09:44 +00:00
remove randomX lib
This commit is contained in:
parent
216bedaf06
commit
2440ccbd8d
14 changed files with 0 additions and 2008 deletions
|
@ -5,7 +5,6 @@ members = [
|
|||
"common",
|
||||
"consensus",
|
||||
"cryptonight",
|
||||
"random-x",
|
||||
#"cuprate",
|
||||
# "database",
|
||||
"net/levin",
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
[package]
|
||||
name = "random-x"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["jit"]
|
||||
jit = ["dep:dynasmrt"]
|
||||
rayon = ["dep:rayon"]
|
||||
|
||||
[dependencies]
|
||||
blake2 = "0.10"
|
||||
argon2 = "0.5"
|
||||
aes = {version = "0.8", features = ["hazmat"]}
|
||||
hex-literal = "0.4"
|
||||
|
||||
dynasmrt = {version = "2.0.0", optional = true}
|
||||
|
||||
rayon = {version ="1.7", optional = true}
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 3
|
|
@ -1,153 +0,0 @@
|
|||
use aes::{
|
||||
hazmat::{cipher_round as aes_enc, equiv_inv_cipher_round as aes_dec},
|
||||
Block,
|
||||
};
|
||||
use hex_literal::hex;
|
||||
|
||||
// key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys")
|
||||
const GENERATOR_1_KEY_0: [u8; 16] = hex!("53a5ac6d096671622b55b5db1749f4b4");
|
||||
const GENERATOR_1_KEY_1: [u8; 16] = hex!("07af7c6d0d716a8478d325174edca10d");
|
||||
const GENERATOR_1_KEY_2: [u8; 16] = hex!("f162123fc67e949f4f79c0f445e3203e");
|
||||
const GENERATOR_1_KEY_3: [u8; 16] = hex!("3581ef6a7c31bab1884c311654911649");
|
||||
|
||||
// key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys 0-3")
|
||||
const GENERATOR_4_KEY_0: [u8; 16] = hex!("ddaa2164db3d83d12b6d542f3fd2e599");
|
||||
const GENERATOR_4_KEY_1: [u8; 16] = hex!("50340eb2553f91b6539df706e5cddfa5");
|
||||
const GENERATOR_4_KEY_2: [u8; 16] = hex!("04d93e5caf7b5e519f67a40abf021c17");
|
||||
const GENERATOR_4_KEY_3: [u8; 16] = hex!("63376285085d8fe7853767cd91d2ded8");
|
||||
// key4, key5, key6, key7 = Hash512("RandomX AesGenerator4R keys 4-7")
|
||||
const GENERATOR_4_KEY_4: [u8; 16] = hex!("736f82b5a6a7d6e36d8b513db4ff9e22");
|
||||
const GENERATOR_4_KEY_5: [u8; 16] = hex!("f36b56c7d9b3109c4e4d02e9d2b772b2");
|
||||
const GENERATOR_4_KEY_6: [u8; 16] = hex!("e7c973f28ba365f70a66a92ba7ef3bf6");
|
||||
const GENERATOR_4_KEY_7: [u8; 16] = hex!("09d67c7ade395891fdd1060c2d76b0c0");
|
||||
|
||||
// state0, state1, state2, state3 = Hash512("RandomX AesHash1R state")
|
||||
const HASH_1_STATE_0: [u8; 16] = hex!("0d2cb592de56a89f47db82ccad3a98d7");
|
||||
const HASH_1_STATE_1: [u8; 16] = hex!("6e998d3398b7c7155a129ef55780e7ac");
|
||||
const HASH_1_STATE_2: [u8; 16] = hex!("1700776ad0c762ae6b507950e47ca0e8");
|
||||
const HASH_1_STATE_3: [u8; 16] = hex!("0c240a638d82ad070500a1794849997e");
|
||||
// xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys")
|
||||
const HASH_1_X_KEY_0: [u8; 16] = hex!("8983faf69f94248bbf56dc9001028906");
|
||||
const HASH_1_X_KEY_1: [u8; 16] = hex!("d163b2613ce0f451c64310ee9bf918ed");
|
||||
|
||||
/// AesHash1R in the spec.
|
||||
///
|
||||
/// creates a 64 byte hash from the input.
|
||||
///
|
||||
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#34-aeshash1r
|
||||
pub(crate) fn hash_aes_r1(buf: &[u8]) -> [u8; 64] {
|
||||
assert_eq!(buf.len() % 64, 0);
|
||||
|
||||
let mut block_0 = Block::from(HASH_1_STATE_0);
|
||||
let mut block_1 = Block::from(HASH_1_STATE_1);
|
||||
let mut block_2 = Block::from(HASH_1_STATE_2);
|
||||
let mut block_3 = Block::from(HASH_1_STATE_3);
|
||||
|
||||
for window in buf.windows(64) {
|
||||
aes_enc(&mut block_0, Block::from_slice(&window[0..16]));
|
||||
aes_dec(&mut block_1, Block::from_slice(&window[16..32]));
|
||||
aes_enc(&mut block_2, Block::from_slice(&window[32..48]));
|
||||
aes_dec(&mut block_3, Block::from_slice(&window[48..64]));
|
||||
}
|
||||
|
||||
let x_key_0 = Block::from_slice(&HASH_1_X_KEY_0);
|
||||
aes_enc(&mut block_0, x_key_0);
|
||||
aes_dec(&mut block_1, x_key_0);
|
||||
aes_enc(&mut block_2, x_key_0);
|
||||
aes_dec(&mut block_3, x_key_0);
|
||||
|
||||
let x_key_1 = Block::from_slice(&HASH_1_X_KEY_1);
|
||||
aes_enc(&mut block_0, x_key_1);
|
||||
aes_dec(&mut block_1, x_key_1);
|
||||
aes_enc(&mut block_2, x_key_1);
|
||||
aes_dec(&mut block_3, x_key_1);
|
||||
|
||||
[block_0, block_1, block_2, block_3]
|
||||
.concat()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// AesGenerator1R in the spec.
|
||||
///
|
||||
/// Fills the bytes with pseudorandom bytes seeded by the input.
|
||||
///
|
||||
/// `output` must be a multiple of 64.
|
||||
///
|
||||
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#32-aesgenerator1r
|
||||
pub(crate) fn aes_fill_1r(input: &[u8; 64], output: &mut [u8]) {
|
||||
assert_eq!(output.len() % 64, 0);
|
||||
|
||||
let key_0 = Block::from(GENERATOR_1_KEY_0);
|
||||
let key_1 = Block::from(GENERATOR_1_KEY_1);
|
||||
let key_2 = Block::from(GENERATOR_1_KEY_2);
|
||||
let key_3 = Block::from(GENERATOR_1_KEY_3);
|
||||
|
||||
let mut block_0 = Block::clone_from_slice(&input[0..16]);
|
||||
let mut block_1 = Block::clone_from_slice(&input[16..32]);
|
||||
let mut block_2 = Block::clone_from_slice(&input[32..48]);
|
||||
let mut block_3 = Block::clone_from_slice(&input[48..64]);
|
||||
|
||||
for idx in (0..output.len()).step_by(64) {
|
||||
aes_dec(&mut block_0, &key_0);
|
||||
aes_enc(&mut block_1, &key_1);
|
||||
aes_dec(&mut block_2, &key_2);
|
||||
aes_enc(&mut block_3, &key_3);
|
||||
|
||||
output[idx..idx + 16].clone_from_slice(block_0.as_slice());
|
||||
output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice());
|
||||
output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice());
|
||||
output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice());
|
||||
}
|
||||
}
|
||||
|
||||
/// AesGenerator4R in the spec.
|
||||
///
|
||||
/// Fills the output with pseudorandom bytes seeded by the input.
|
||||
///
|
||||
/// `output` must be a multiple of 64.
|
||||
///
|
||||
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#33-aesgenerator4r
|
||||
pub(crate) fn aes_fill_4r(input: &[u8; 64], output: &mut [u8]) {
|
||||
assert_eq!(output.len() % 64, 0);
|
||||
|
||||
let key_0 = Block::from(GENERATOR_4_KEY_0);
|
||||
let key_1 = Block::from(GENERATOR_4_KEY_1);
|
||||
let key_2 = Block::from(GENERATOR_4_KEY_2);
|
||||
let key_3 = Block::from(GENERATOR_4_KEY_3);
|
||||
let key_4 = Block::from(GENERATOR_4_KEY_4);
|
||||
let key_5 = Block::from(GENERATOR_4_KEY_5);
|
||||
let key_6 = Block::from(GENERATOR_4_KEY_6);
|
||||
let key_7 = Block::from(GENERATOR_4_KEY_7);
|
||||
|
||||
let mut block_0 = Block::clone_from_slice(&input[0..16]);
|
||||
let mut block_1 = Block::clone_from_slice(&input[16..32]);
|
||||
let mut block_2 = Block::clone_from_slice(&input[32..48]);
|
||||
let mut block_3 = Block::clone_from_slice(&input[48..64]);
|
||||
|
||||
let aes_enc_4 = |block: &mut Block, key_a, key_b, key_c, key_d| {
|
||||
aes_enc(block, key_a);
|
||||
aes_enc(block, key_b);
|
||||
aes_enc(block, key_c);
|
||||
aes_enc(block, key_d);
|
||||
};
|
||||
|
||||
let aes_dec_4 = |block: &mut Block, key_a, key_b, key_c, key_d| {
|
||||
aes_dec(block, key_a);
|
||||
aes_dec(block, key_b);
|
||||
aes_dec(block, key_c);
|
||||
aes_dec(block, key_d);
|
||||
};
|
||||
|
||||
for idx in (0..output.len()).step_by(64) {
|
||||
aes_dec_4(&mut block_0, &key_0, &key_1, &key_2, &key_3);
|
||||
aes_enc_4(&mut block_1, &key_0, &key_1, &key_2, &key_3);
|
||||
aes_dec_4(&mut block_2, &key_4, &key_5, &key_6, &key_7);
|
||||
aes_enc_4(&mut block_3, &key_4, &key_5, &key_6, &key_7);
|
||||
|
||||
output[idx..idx + 16].clone_from_slice(block_0.as_slice());
|
||||
output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice());
|
||||
output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice());
|
||||
output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice());
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
use blake2::digest::FixedOutputReset;
|
||||
use blake2::{Blake2b512, Digest};
|
||||
|
||||
const MAX_SEED_LEN: usize = 60;
|
||||
|
||||
pub struct Blake2Generator {
|
||||
data: [u8; 64],
|
||||
index: usize,
|
||||
hasher: Blake2b512,
|
||||
}
|
||||
|
||||
impl Blake2Generator {
|
||||
pub fn new(seed: &[u8], nonce: u32) -> Self {
|
||||
assert!(seed.len() <= MAX_SEED_LEN);
|
||||
|
||||
let mut data = [0; 64];
|
||||
data[..seed.len()].copy_from_slice(seed);
|
||||
|
||||
data[MAX_SEED_LEN..].copy_from_slice(&nonce.to_le_bytes());
|
||||
|
||||
Blake2Generator {
|
||||
data,
|
||||
index: 64,
|
||||
hasher: Blake2b512::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_u8(&mut self) -> u8 {
|
||||
self.check_extend(1);
|
||||
self.index += 1;
|
||||
self.data[self.index - 1]
|
||||
}
|
||||
|
||||
pub fn next_u32(&mut self) -> u32 {
|
||||
self.check_extend(4);
|
||||
self.index += 4;
|
||||
u32::from_le_bytes(self.data[self.index - 4..self.index].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn check_extend(&mut self, bytes_needed: usize) {
|
||||
if self.index + bytes_needed > self.data.len() {
|
||||
self.hasher.update(self.data);
|
||||
self.data = self.hasher.finalize_fixed_reset().into();
|
||||
self.index = 0;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
/// Target latency for SuperscalarHash (in cycles of the reference CPU).
|
||||
pub(crate) const RANDOMX_SUPERSCALAR_LATENCY: usize = 170;
|
||||
|
||||
pub(crate) const SUPERSCALAR_MAX_SIZE: usize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
|
||||
|
||||
/// Dataset base size in bytes. Must be a power of 2.
|
||||
pub(crate) const RANDOMX_DATASET_BASE_SIZE: usize = 2147483648;
|
||||
|
||||
pub(crate) const RANDOMX_DATASET_EXTRA_SIZE: usize = 33554368;
|
||||
|
||||
pub(crate) const RANDOMX_DATASET_SIZE: usize =
|
||||
RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
|
||||
|
||||
pub(crate) const RANDOMX_ARGON_LANES: u32 = 1;
|
||||
|
||||
pub(crate) const RANDOMX_ARGON_ITERATIONS: u32 = 3;
|
||||
|
||||
pub(crate) const RANDOMX_ARGON_MEMORY: u32 = 262144;
|
||||
|
||||
pub(crate) const RANDOMX_ARGON_SALT: &[u8] = b"RandomX\x03";
|
||||
|
||||
pub(crate) const RANDOMX_CACHE_ACCESSES: usize = 8;
|
|
@ -1,193 +0,0 @@
|
|||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use argon2::{Algorithm, Argon2, Block, Params, Version};
|
||||
#[cfg(feature = "rayon")]
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::blake2_generator::Blake2Generator;
|
||||
use crate::{
|
||||
config::{
|
||||
RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_LANES, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_SALT,
|
||||
RANDOMX_CACHE_ACCESSES, RANDOMX_DATASET_SIZE,
|
||||
},
|
||||
registers::{RGroupRegisterID, RGroupRegisters},
|
||||
superscalar::SSProgram,
|
||||
};
|
||||
|
||||
/// Generates the memory blocks used in the cache
|
||||
fn argon2_blocks(key: &[u8]) -> Box<[Block]> {
|
||||
let params = Params::new(
|
||||
RANDOMX_ARGON_MEMORY,
|
||||
RANDOMX_ARGON_ITERATIONS,
|
||||
RANDOMX_ARGON_LANES,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let numb_blocks: usize = (RANDOMX_ARGON_LANES * RANDOMX_ARGON_MEMORY)
|
||||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
let mut blocks = vec![Block::new(); numb_blocks].into_boxed_slice();
|
||||
|
||||
let argon = Argon2::new(Algorithm::Argon2d, Version::V0x13, params);
|
||||
|
||||
argon
|
||||
.fill_memory(key, RANDOMX_ARGON_SALT, &mut blocks)
|
||||
.unwrap();
|
||||
blocks
|
||||
}
|
||||
|
||||
/// The Cache.
|
||||
///
|
||||
/// The cache is used during light verification.
|
||||
/// Internally this struct is a wrapper around an [`Arc`] internal cache, this allows
|
||||
/// cheep clones and allows the cache to be shared between VMs on different threads.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Cache {
|
||||
internal_cache: Arc<RwLock<InternalCache>>,
|
||||
}
|
||||
|
||||
impl Cache {
|
||||
/// Initialises the cache with the provided key.
|
||||
///
|
||||
/// The key must be between 1-60 bytes (inclusive) otherwise this will panic.
|
||||
pub fn init(key: &[u8]) -> Self {
|
||||
let internal_cache = InternalCache::init(key);
|
||||
Cache {
|
||||
internal_cache: Arc::new(RwLock::new(internal_cache)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The internal cache structure, used during light verification.
|
||||
#[derive(Debug)]
|
||||
|
||||
struct InternalCache {
|
||||
memory_blocks: Box<[Block]>,
|
||||
programs: Vec<SSProgram>,
|
||||
}
|
||||
|
||||
impl InternalCache {
|
||||
fn init(key: &[u8]) -> Self {
|
||||
let memory_blocks = argon2_blocks(key);
|
||||
|
||||
let mut blake_gen = Blake2Generator::new(key, 0);
|
||||
|
||||
let programs = (0..RANDOMX_CACHE_ACCESSES)
|
||||
.map(|_| SSProgram::generate(&mut blake_gen))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
InternalCache {
|
||||
memory_blocks,
|
||||
programs,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets an item from the cache at the specified index.
|
||||
fn get_item(&self, idx: usize) -> [u64; 8] {
|
||||
// one item is 8 u64s
|
||||
// mask = (blocks in cache * bytes in a block / size of item) minus one.
|
||||
let mask = (self.memory_blocks.len() * 1024 / 64) - 1;
|
||||
// and the idx with the mask this is the same as doing mod (self.memory_blocks.len() * 1024 / 64)
|
||||
let idx = idx & mask;
|
||||
|
||||
// block_idx = idx divided by amount of items in a block
|
||||
let block_idx = idx / (1024 / 64);
|
||||
// idx * 8 is to get the idx of a single u64
|
||||
// we mask with amount of u64s in a block minus 1 which is the same as doing
|
||||
// mod the amount of instructions in a block.
|
||||
let block_u64_start = (idx * 8) & 127;
|
||||
// The plus 8 cannot overflow as (idx * 8) & 127 wont give a number bigger than 120
|
||||
return self.memory_blocks[block_idx].as_ref()[block_u64_start..block_u64_start + 8]
|
||||
.try_into()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Generates the dataset item at the specified index.
|
||||
fn init_data_set_item(&self, item_number: usize) -> [u64; 8] {
|
||||
let mut registers = RGroupRegisters::default();
|
||||
registers.set(
|
||||
&RGroupRegisterID::R0,
|
||||
(TryInto::<u64>::try_into(item_number).unwrap() + 1_u64)
|
||||
.wrapping_mul(6364136223846793005_u64),
|
||||
);
|
||||
|
||||
let mut init_reg = |dst, val: u64| {
|
||||
registers.apply_to_dst_with_src(&dst, &RGroupRegisterID::R0, |_, src| src ^ val)
|
||||
};
|
||||
|
||||
init_reg(RGroupRegisterID::R1, 9298411001130361340);
|
||||
init_reg(RGroupRegisterID::R2, 12065312585734608966);
|
||||
init_reg(RGroupRegisterID::R3, 9306329213124626780);
|
||||
init_reg(RGroupRegisterID::R4, 5281919268842080866);
|
||||
init_reg(RGroupRegisterID::R5, 10536153434571861004);
|
||||
init_reg(RGroupRegisterID::R6, 3398623926847679864);
|
||||
init_reg(RGroupRegisterID::R7, 9549104520008361294);
|
||||
|
||||
let mut cache_index = item_number;
|
||||
|
||||
for program in &self.programs {
|
||||
program.execute(&mut registers);
|
||||
|
||||
let cache_item = self.get_item(cache_index);
|
||||
for (reg_id, item) in RGroupRegisterID::iter().zip(cache_item) {
|
||||
registers.apply_to_dst(®_id, |dst| dst ^ item);
|
||||
}
|
||||
|
||||
cache_index = registers
|
||||
.get(&program.reg_with_max_latency())
|
||||
.try_into()
|
||||
.expect("u64 does not fit into usize");
|
||||
}
|
||||
registers.inner()
|
||||
}
|
||||
}
|
||||
|
||||
/// The Dataset used during mining.
|
||||
///
|
||||
/// Internally this struct is a wrapper around an [`Arc`] internal dataset, this allows
|
||||
/// cheep clones and allows the dataset to be shared between VMs on different threads.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Dataset {
|
||||
internal_dataset: Arc<RwLock<InternalDataset>>,
|
||||
}
|
||||
|
||||
impl Dataset {
|
||||
/// Initialises the dataset with the provided key.
|
||||
///
|
||||
/// The key must be between 1-60 bytes (inclusive) otherwise this will panic.
|
||||
///
|
||||
/// This is very computationally intense so might take a long time to complete.
|
||||
pub fn init(key: &[u8]) -> Dataset {
|
||||
let internal_dataset = InternalDataset::init(key);
|
||||
Dataset {
|
||||
internal_dataset: Arc::new(RwLock::new(internal_dataset)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The internal dataset used during mining.
|
||||
#[derive(Debug)]
|
||||
struct InternalDataset {
|
||||
dataset: Vec<[u64; 8]>,
|
||||
}
|
||||
|
||||
impl InternalDataset {
|
||||
fn init(key: &[u8]) -> InternalDataset {
|
||||
let cache = InternalCache::init(key);
|
||||
|
||||
#[cfg(feature = "rayon")]
|
||||
let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8))
|
||||
.into_par_iter()
|
||||
.map(|i| cache.init_data_set_item(i))
|
||||
.collect();
|
||||
|
||||
#[cfg(not(feature = "rayon"))]
|
||||
let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8))
|
||||
.map(|i| cache.init_data_set_item(i))
|
||||
.collect();
|
||||
|
||||
Self { dataset }
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
mod aes_hash;
|
||||
mod blake2_generator;
|
||||
mod config;
|
||||
mod dataset;
|
||||
mod registers;
|
||||
mod superscalar;
|
||||
|
||||
pub use dataset::{Cache, Dataset};
|
||||
|
||||
fn is_0_or_power_of_2(x: u64) -> bool {
|
||||
(x & (x - 1)) == 0
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
mod integer;
|
||||
|
||||
pub(crate) use integer::*;
|
|
@ -1,62 +0,0 @@
|
|||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
#[repr(usize)]
|
||||
pub enum RGroupRegisterID {
|
||||
R0 = 0,
|
||||
R1,
|
||||
R2,
|
||||
R3,
|
||||
R4,
|
||||
R5,
|
||||
R6,
|
||||
R7,
|
||||
}
|
||||
|
||||
impl RGroupRegisterID {
|
||||
pub fn iter() -> impl Iterator<Item = RGroupRegisterID> {
|
||||
[
|
||||
RGroupRegisterID::R0,
|
||||
RGroupRegisterID::R1,
|
||||
RGroupRegisterID::R2,
|
||||
RGroupRegisterID::R3,
|
||||
RGroupRegisterID::R4,
|
||||
RGroupRegisterID::R5,
|
||||
RGroupRegisterID::R6,
|
||||
RGroupRegisterID::R7,
|
||||
]
|
||||
.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct RGroupRegisters([u64; 8]);
|
||||
|
||||
impl RGroupRegisters {
|
||||
pub fn inner(self) -> [u64; 8] {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn apply_to_dst(&mut self, dst: &RGroupRegisterID, f: impl FnOnce(u64) -> u64) {
|
||||
*self.get_mut(dst) = f(self.get(dst));
|
||||
}
|
||||
|
||||
pub fn apply_to_dst_with_src(
|
||||
&mut self,
|
||||
dst: &RGroupRegisterID,
|
||||
src: &RGroupRegisterID,
|
||||
f: impl FnOnce(u64, u64) -> u64,
|
||||
) {
|
||||
*self.get_mut(dst) = f(self.get(dst), self.get(src));
|
||||
}
|
||||
|
||||
pub fn set(&mut self, id: &RGroupRegisterID, val: u64) {
|
||||
self.0[*id as usize] = val
|
||||
}
|
||||
|
||||
pub fn get(&self, id: &RGroupRegisterID) -> u64 {
|
||||
self.0[*id as usize]
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, id: &RGroupRegisterID) -> &mut u64 {
|
||||
&mut self.0[*id as usize]
|
||||
}
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
mod cpu;
|
||||
mod executor;
|
||||
mod generator;
|
||||
mod instructions;
|
||||
mod program;
|
||||
|
||||
use crate::blake2_generator::Blake2Generator;
|
||||
|
||||
use crate::registers::{RGroupRegisterID, RGroupRegisters};
|
||||
use executor::execute;
|
||||
use generator::generate;
|
||||
use instructions::ScalarInstruction;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct SSProgram {
|
||||
program: Vec<ScalarInstruction>,
|
||||
reg_with_max_latency: RGroupRegisterID,
|
||||
}
|
||||
|
||||
impl SSProgram {
|
||||
pub fn generate(gen: &mut Blake2Generator) -> Self {
|
||||
generate(gen)
|
||||
}
|
||||
|
||||
pub fn execute(&self, registers: &mut RGroupRegisters) {
|
||||
execute(&self.program, registers)
|
||||
}
|
||||
|
||||
pub fn reg_with_max_latency(&self) -> RGroupRegisterID {
|
||||
self.reg_with_max_latency
|
||||
}
|
||||
}
|
|
@ -1,295 +0,0 @@
|
|||
use crate::config::RANDOMX_SUPERSCALAR_LATENCY;
|
||||
|
||||
/// Max cycles + highest amount of cycles on a macro op.
|
||||
const CYCLE_MAP_SIZE: usize = RANDOMX_SUPERSCALAR_LATENCY + 4;
|
||||
|
||||
pub(crate) enum SlotLen {
|
||||
L3,
|
||||
L4,
|
||||
L7,
|
||||
L8,
|
||||
L9,
|
||||
L10,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum ExecutionPort {
|
||||
P0,
|
||||
P1,
|
||||
P5,
|
||||
}
|
||||
|
||||
enum AllowedPorts {
|
||||
One(ExecutionPort),
|
||||
Two(ExecutionPort, ExecutionPort),
|
||||
All,
|
||||
}
|
||||
|
||||
impl AllowedPorts {
|
||||
fn port_allowed(&self, port: &ExecutionPort) -> bool {
|
||||
match self {
|
||||
AllowedPorts::One(allowed_port) => allowed_port == port,
|
||||
AllowedPorts::Two(allowed_port_1, allowed_port_2) => {
|
||||
allowed_port_1 == port || allowed_port_2 == port
|
||||
}
|
||||
AllowedPorts::All => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum MacroOp {
|
||||
SUB_RR,
|
||||
XOR_RR,
|
||||
LEA_SIB,
|
||||
IMUL_RR { dependant: bool },
|
||||
ROR_RI,
|
||||
ADD_RI,
|
||||
XOR_RI,
|
||||
MOV_RR,
|
||||
MUL_R,
|
||||
IMUL_R,
|
||||
MOV_RI,
|
||||
}
|
||||
|
||||
impl MacroOp {
|
||||
pub fn cycles_to_complete(&self) -> usize {
|
||||
match self {
|
||||
MacroOp::SUB_RR => 1,
|
||||
MacroOp::XOR_RR => 1,
|
||||
MacroOp::LEA_SIB => 1,
|
||||
MacroOp::IMUL_RR { .. } => 3,
|
||||
MacroOp::ROR_RI => 1,
|
||||
MacroOp::ADD_RI => 1,
|
||||
MacroOp::XOR_RI => 1,
|
||||
MacroOp::MOV_RR => 0,
|
||||
MacroOp::MUL_R => 4,
|
||||
MacroOp::IMUL_R => 4,
|
||||
MacroOp::MOV_RI => 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn can_be_eliminated(&self) -> bool {
|
||||
self.micro_ops_needed() == 0
|
||||
}
|
||||
|
||||
pub fn is_dependant_on_last_op(&self) -> bool {
|
||||
match self {
|
||||
MacroOp::IMUL_RR { dependant } => *dependant,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn micro_ops_needed(&self) -> usize {
|
||||
match self {
|
||||
MacroOp::SUB_RR => 1,
|
||||
MacroOp::XOR_RR => 1,
|
||||
MacroOp::LEA_SIB => 1,
|
||||
MacroOp::IMUL_RR { .. } => 1,
|
||||
MacroOp::ROR_RI => 1,
|
||||
MacroOp::ADD_RI => 1,
|
||||
MacroOp::XOR_RI => 1,
|
||||
MacroOp::MOV_RR => 0,
|
||||
MacroOp::MUL_R => 2,
|
||||
MacroOp::IMUL_R => 2,
|
||||
MacroOp::MOV_RI => 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn allowed_execution_ports(&self, micro_op_index: usize) -> AllowedPorts {
|
||||
match self {
|
||||
MacroOp::SUB_RR => AllowedPorts::All,
|
||||
MacroOp::XOR_RR => AllowedPorts::All,
|
||||
MacroOp::LEA_SIB => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P1),
|
||||
MacroOp::IMUL_RR { .. } => AllowedPorts::One(ExecutionPort::P1),
|
||||
MacroOp::ROR_RI => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P5),
|
||||
MacroOp::ADD_RI => AllowedPorts::All,
|
||||
MacroOp::XOR_RI => AllowedPorts::All,
|
||||
MacroOp::MOV_RR => panic!("No execution units needed for MOV_RR"),
|
||||
MacroOp::MUL_R => match micro_op_index {
|
||||
0 => AllowedPorts::One(ExecutionPort::P1),
|
||||
1 => AllowedPorts::One(ExecutionPort::P5),
|
||||
_ => panic!("no execution port at that index"),
|
||||
},
|
||||
MacroOp::IMUL_R => match micro_op_index {
|
||||
0 => AllowedPorts::One(ExecutionPort::P1),
|
||||
1 => AllowedPorts::One(ExecutionPort::P5),
|
||||
_ => panic!("no execution port at that index"),
|
||||
},
|
||||
MacroOp::MOV_RI => AllowedPorts::All,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the ports availability during a single cycle.
|
||||
#[derive(Debug, Default, Copy, Clone)]
|
||||
struct CycleSchedule {
|
||||
p0: bool,
|
||||
p1: bool,
|
||||
p5: bool,
|
||||
}
|
||||
|
||||
impl CycleSchedule {
|
||||
fn space_for_micro_op(&self, allowed_ports: &AllowedPorts) -> Option<ExecutionPort> {
|
||||
if !self.p5 && allowed_ports.port_allowed(&ExecutionPort::P5) {
|
||||
Some(ExecutionPort::P5)
|
||||
} else if !self.p0 && allowed_ports.port_allowed(&ExecutionPort::P0) {
|
||||
Some(ExecutionPort::P0)
|
||||
} else if !self.p1 && allowed_ports.port_allowed(&ExecutionPort::P1) {
|
||||
Some(ExecutionPort::P1)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn set_port_busy(&mut self, port: ExecutionPort) {
|
||||
match port {
|
||||
ExecutionPort::P0 => self.p0 = true,
|
||||
ExecutionPort::P1 => self.p1 = true,
|
||||
ExecutionPort::P5 => self.p5 = true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct MacroOpOpportunity {
|
||||
cycle: usize,
|
||||
micro_port_0: Option<ExecutionPort>,
|
||||
micro_port_1: Option<ExecutionPort>,
|
||||
}
|
||||
|
||||
impl MacroOpOpportunity {
|
||||
pub fn cycle(&self) -> usize {
|
||||
self.cycle
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ProgramSchedule {
|
||||
ports_schedule: [CycleSchedule; CYCLE_MAP_SIZE],
|
||||
full: bool,
|
||||
}
|
||||
|
||||
impl Default for ProgramSchedule {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ports_schedule: [CycleSchedule::default(); CYCLE_MAP_SIZE],
|
||||
full: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgramSchedule {
|
||||
pub fn set_full(&mut self) {
|
||||
self.full = true;
|
||||
}
|
||||
|
||||
pub fn is_full(&self) -> bool {
|
||||
self.full
|
||||
}
|
||||
|
||||
pub fn schedule_macro_op_at_earliest(
|
||||
&mut self,
|
||||
op: &MacroOp,
|
||||
cycle: usize,
|
||||
last_op_completes_at: usize,
|
||||
) -> Option<usize> {
|
||||
let opportunity = self.earliest_cycle_for_macro_op(op, cycle, last_op_completes_at)?;
|
||||
let cycle = opportunity.cycle();
|
||||
if let Some(port0) = opportunity.micro_port_0 {
|
||||
self.schedule_micro_op(cycle, port0);
|
||||
if let Some(port1) = opportunity.micro_port_1 {
|
||||
self.schedule_micro_op(cycle, port1);
|
||||
};
|
||||
};
|
||||
|
||||
Some(cycle)
|
||||
}
|
||||
|
||||
pub fn earliest_cycle_for_macro_op(
|
||||
&mut self,
|
||||
op: &MacroOp,
|
||||
cycle: usize,
|
||||
last_op_completes_at: usize,
|
||||
) -> Option<MacroOpOpportunity> {
|
||||
let mut cycle = if op.is_dependant_on_last_op() {
|
||||
cycle.max(last_op_completes_at)
|
||||
} else {
|
||||
cycle
|
||||
};
|
||||
|
||||
if op.can_be_eliminated() {
|
||||
return Some(MacroOpOpportunity {
|
||||
cycle,
|
||||
micro_port_0: None,
|
||||
micro_port_1: None,
|
||||
});
|
||||
}
|
||||
|
||||
match op.micro_ops_needed() {
|
||||
0 => Some(MacroOpOpportunity {
|
||||
cycle,
|
||||
micro_port_0: None,
|
||||
micro_port_1: None,
|
||||
}),
|
||||
1 => self
|
||||
.earliest_cycle_for_mirco_op(&op.allowed_execution_ports(0), cycle)
|
||||
.map(|(cycle, micro_port_0)| MacroOpOpportunity {
|
||||
cycle,
|
||||
micro_port_0: Some(micro_port_0),
|
||||
micro_port_1: None,
|
||||
}),
|
||||
2 => {
|
||||
// both ops must happen in the same cycle
|
||||
let allowed_0 = op.allowed_execution_ports(0);
|
||||
let allowed_1 = op.allowed_execution_ports(1);
|
||||
|
||||
while cycle < CYCLE_MAP_SIZE {
|
||||
let (min_0_cycle, port_0) =
|
||||
self.earliest_cycle_for_mirco_op(&allowed_0, cycle)?;
|
||||
let (min_1_cycle, port_1) =
|
||||
self.earliest_cycle_for_mirco_op(&allowed_1, cycle)?;
|
||||
|
||||
if min_0_cycle == min_1_cycle {
|
||||
return Some(MacroOpOpportunity {
|
||||
cycle: min_0_cycle,
|
||||
micro_port_0: Some(port_0),
|
||||
micro_port_1: Some(port_1),
|
||||
});
|
||||
} else {
|
||||
cycle += 1;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn schedule_micro_op_at_earliest(
|
||||
&mut self,
|
||||
allowed_ports: &AllowedPorts,
|
||||
cycle: usize,
|
||||
) -> Option<usize> {
|
||||
let (cycle, port) = self.earliest_cycle_for_mirco_op(allowed_ports, cycle)?;
|
||||
self.schedule_micro_op(cycle, port);
|
||||
Some(cycle)
|
||||
}
|
||||
|
||||
fn schedule_micro_op(&mut self, cycle: usize, port: ExecutionPort) {
|
||||
self.ports_schedule[cycle].set_port_busy(port)
|
||||
}
|
||||
|
||||
fn earliest_cycle_for_mirco_op(
|
||||
&mut self,
|
||||
allowed_ports: &AllowedPorts,
|
||||
cycle: usize,
|
||||
) -> Option<(usize, ExecutionPort)> {
|
||||
for (cycle, cycle_schedule) in self.ports_schedule.iter().enumerate().skip(cycle) {
|
||||
if let Some(port) = cycle_schedule.space_for_micro_op(allowed_ports) {
|
||||
return Some((cycle, port));
|
||||
}
|
||||
}
|
||||
self.full = true;
|
||||
None
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
use crate::registers::RGroupRegisters;
|
||||
use crate::superscalar::instructions::ScalarInstruction;
|
||||
|
||||
const P2EXP63: u64 = 1 << 63;
|
||||
|
||||
pub fn execute(program: &[ScalarInstruction], registers: &mut RGroupRegisters) {
|
||||
for instruction in program {
|
||||
match instruction {
|
||||
ScalarInstruction::ISUB_R { dst, src } => {
|
||||
let op = |dst_val: u64, src_val| dst_val.wrapping_sub(src_val);
|
||||
registers.apply_to_dst_with_src(dst, src, op);
|
||||
}
|
||||
ScalarInstruction::IXOR_R { dst, src } => {
|
||||
let op = |dst_val: u64, src_val| dst_val ^ src_val;
|
||||
registers.apply_to_dst_with_src(dst, src, op);
|
||||
}
|
||||
ScalarInstruction::IADD_RS {
|
||||
dst,
|
||||
src,
|
||||
mod_shift,
|
||||
} => {
|
||||
let op = |dst_val: u64, src_val| {
|
||||
dst_val.wrapping_add(src_val << clamp_mod_shift(*mod_shift))
|
||||
};
|
||||
registers.apply_to_dst_with_src(dst, src, op);
|
||||
}
|
||||
ScalarInstruction::IMUL_R { dst, src } => {
|
||||
let op = |dst_val: u64, src_val| dst_val.wrapping_mul(src_val);
|
||||
registers.apply_to_dst_with_src(dst, src, op);
|
||||
}
|
||||
ScalarInstruction::IROR_C { dst, imm32 } => {
|
||||
let op = |dst_val: u64| dst_val.rotate_right(*imm32);
|
||||
registers.apply_to_dst(dst, op);
|
||||
}
|
||||
ScalarInstruction::IADD_C { dst, imm32 } => {
|
||||
let op = |dst_val: u64| dst_val.wrapping_add(sign_extend_2s_compl(*imm32));
|
||||
registers.apply_to_dst(dst, op);
|
||||
}
|
||||
ScalarInstruction::IXOR_C { dst, imm32 } => {
|
||||
let op = |dst_val: u64| dst_val ^ sign_extend_2s_compl(*imm32);
|
||||
registers.apply_to_dst(dst, op);
|
||||
}
|
||||
ScalarInstruction::IMULH_R { dst, src } => {
|
||||
registers.apply_to_dst_with_src(dst, src, high_mul);
|
||||
}
|
||||
ScalarInstruction::ISMULH_R { dst, src } => {
|
||||
let op = |dst_val: u64, src_val: u64| {
|
||||
signed_high_mul(dst_val as i64, src_val as i64) as u64
|
||||
};
|
||||
registers.apply_to_dst_with_src(dst, src, op);
|
||||
}
|
||||
ScalarInstruction::IMUL_RCP { dst, imm32 } => {
|
||||
let op = |dst_val: u64| dst_val.wrapping_mul(randomx_reciprocal(*imm32 as u64));
|
||||
registers.apply_to_dst(dst, op);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn randomx_reciprocal(divisor: u64) -> u64 {
|
||||
assert!(!divisor.is_power_of_two());
|
||||
assert_ne!(divisor, 0);
|
||||
|
||||
let mut quotient = P2EXP63 / divisor;
|
||||
let mut remainder = P2EXP63 % divisor;
|
||||
let mut bsr = 0;
|
||||
|
||||
let mut bit = divisor;
|
||||
|
||||
while bit > 0 {
|
||||
bsr += 1;
|
||||
bit >>= 1;
|
||||
}
|
||||
|
||||
for _ in 0..bsr {
|
||||
if remainder >= divisor.wrapping_sub(remainder) {
|
||||
quotient = quotient.wrapping_mul(2).wrapping_add(1);
|
||||
remainder = remainder.wrapping_mul(2).wrapping_sub(divisor);
|
||||
} else {
|
||||
quotient = quotient.wrapping_mul(2);
|
||||
remainder = remainder.wrapping_mul(2);
|
||||
}
|
||||
}
|
||||
quotient
|
||||
}
|
||||
|
||||
fn high_mul(a: u64, b: u64) -> u64 {
|
||||
((a as u128 * b as u128) >> 64) as u64
|
||||
}
|
||||
|
||||
fn signed_high_mul(a: i64, b: i64) -> i64 {
|
||||
((a as i128 * b as i128) >> 64) as i64
|
||||
}
|
||||
|
||||
pub fn sign_extend_2s_compl(imm: u32) -> u64 {
|
||||
if imm > i32::MAX as u32 {
|
||||
imm as u64 | 0xffffffff00000000
|
||||
} else {
|
||||
imm as u64
|
||||
}
|
||||
}
|
||||
|
||||
fn clamp_mod_shift(x: u8) -> u64 {
|
||||
(x as u64 >> 2) % 4
|
||||
}
|
|
@ -1,797 +0,0 @@
|
|||
use std::cmp::Ordering;
|
||||
|
||||
use crate::config::SUPERSCALAR_MAX_SIZE;
|
||||
use crate::registers::{RGroupRegisterID, RGroupRegisters};
|
||||
use crate::superscalar::cpu::{ProgramSchedule, SlotLen};
|
||||
use crate::superscalar::instructions::ScalarInstruction;
|
||||
use crate::superscalar::SSProgram;
|
||||
use crate::{
|
||||
blake2_generator::Blake2Generator,
|
||||
config::RANDOMX_SUPERSCALAR_LATENCY,
|
||||
is_0_or_power_of_2,
|
||||
superscalar::instructions::{OpSource, ScalarInstructionID},
|
||||
};
|
||||
|
||||
const LOOK_FORWARD_CYCLES: usize = 4;
|
||||
const MAX_THROWAWAY_COUNT: usize = 256;
|
||||
|
||||
/// Groups of 3 or 4 Macro-op slots that sum to 16
|
||||
///
|
||||
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#631-decoding-stage
|
||||
/// table 6.3.1
|
||||
#[derive(Eq, PartialEq, Copy, Clone)]
|
||||
enum DecoderGroup {
|
||||
/// 0: 4-8-4
|
||||
D484,
|
||||
/// 1: 7-3-3-3
|
||||
D7333,
|
||||
/// 2: 3-7-3-3
|
||||
D3733,
|
||||
/// 3: 4-9-3
|
||||
D493,
|
||||
|
||||
/// 4: 4-4-4-4
|
||||
D4444,
|
||||
/// 5: 3-3-10
|
||||
D3310,
|
||||
}
|
||||
|
||||
impl DecoderGroup {
|
||||
fn slot_len(&self, index: usize) -> Option<SlotLen> {
|
||||
match self {
|
||||
DecoderGroup::D484 => match index {
|
||||
0 | 2 => Some(SlotLen::L4),
|
||||
1 => Some(SlotLen::L8),
|
||||
_ => None,
|
||||
},
|
||||
DecoderGroup::D7333 => match index {
|
||||
0 => Some(SlotLen::L7),
|
||||
1..=3 => Some(SlotLen::L3),
|
||||
_ => None,
|
||||
},
|
||||
DecoderGroup::D3733 => match index {
|
||||
0 | 2 | 3 => Some(SlotLen::L3),
|
||||
1 => Some(SlotLen::L7),
|
||||
_ => None,
|
||||
},
|
||||
DecoderGroup::D493 => match index {
|
||||
0 => Some(SlotLen::L4),
|
||||
1 => Some(SlotLen::L9),
|
||||
2 => Some(SlotLen::L3),
|
||||
_ => None,
|
||||
},
|
||||
DecoderGroup::D4444 => match index {
|
||||
0..=3 => Some(SlotLen::L4),
|
||||
_ => None,
|
||||
},
|
||||
DecoderGroup::D3310 => match index {
|
||||
0 | 1 => Some(SlotLen::L3),
|
||||
2 => Some(SlotLen::L10),
|
||||
_ => None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the lengths with a bool `is_last`
|
||||
pub fn iter_slot_len(&self) -> impl Iterator<Item = (SlotLen, bool)> + '_ {
|
||||
(0..self.size()).map(|i| (self.slot_len(i).unwrap(), self.size() - 1 == i))
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
DecoderGroup::D484 => 3,
|
||||
DecoderGroup::D7333 => 4,
|
||||
DecoderGroup::D3733 => 4,
|
||||
DecoderGroup::D493 => 3,
|
||||
DecoderGroup::D4444 => 4,
|
||||
DecoderGroup::D3310 => 3,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_group(
|
||||
gen: &mut Blake2Generator,
|
||||
instruction: Option<ScalarInstructionID>,
|
||||
total_muls_low: bool,
|
||||
) -> DecoderGroup {
|
||||
if matches!(
|
||||
instruction,
|
||||
Some(ScalarInstructionID::IMULH_R) | Some(ScalarInstructionID::ISMULH_R)
|
||||
) {
|
||||
return DecoderGroup::D3310;
|
||||
}
|
||||
|
||||
if total_muls_low {
|
||||
return DecoderGroup::D4444;
|
||||
}
|
||||
|
||||
if instruction == Some(ScalarInstructionID::IMUL_RCP) {
|
||||
return match (gen.next_u8() & 1).cmp(&1) {
|
||||
Ordering::Equal => DecoderGroup::D484,
|
||||
Ordering::Less => DecoderGroup::D493,
|
||||
Ordering::Greater => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
match gen.next_u8() & 3 {
|
||||
0 => DecoderGroup::D484,
|
||||
1 => DecoderGroup::D7333,
|
||||
2 => DecoderGroup::D3733,
|
||||
3 => DecoderGroup::D493,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub(crate) struct SingleRegisterInfo {
|
||||
id: RGroupRegisterID,
|
||||
next_ready: usize,
|
||||
last_instruction: Option<ScalarInstructionID>,
|
||||
last_source: OpSource,
|
||||
}
|
||||
|
||||
impl SingleRegisterInfo {
|
||||
pub fn id(&self) -> RGroupRegisterID {
|
||||
self.id
|
||||
}
|
||||
pub fn next_ready(&self) -> usize {
|
||||
self.next_ready
|
||||
}
|
||||
pub fn last_instruction(&self) -> Option<ScalarInstructionID> {
|
||||
self.last_instruction
|
||||
}
|
||||
pub fn last_source(&self) -> OpSource {
|
||||
self.last_source
|
||||
}
|
||||
pub fn set_next_ready(&mut self, next_ready: usize) {
|
||||
self.next_ready = next_ready
|
||||
}
|
||||
pub fn set_last_instruction(&mut self, last_instruction: ScalarInstructionID) {
|
||||
self.last_instruction = Some(last_instruction);
|
||||
}
|
||||
pub fn set_last_source(&mut self, last_source: OpSource) {
|
||||
self.last_source = last_source
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct RegistersInfo {
|
||||
registers: [SingleRegisterInfo; 8],
|
||||
}
|
||||
|
||||
impl Default for RegistersInfo {
|
||||
fn default() -> Self {
|
||||
let default = SingleRegisterInfo {
|
||||
id: RGroupRegisterID::R0,
|
||||
next_ready: 0,
|
||||
last_instruction: None,
|
||||
last_source: OpSource::Constant,
|
||||
};
|
||||
let mut default = [default; 8];
|
||||
let reg_ids = [
|
||||
RGroupRegisterID::R1,
|
||||
RGroupRegisterID::R2,
|
||||
RGroupRegisterID::R3,
|
||||
RGroupRegisterID::R4,
|
||||
RGroupRegisterID::R5,
|
||||
RGroupRegisterID::R6,
|
||||
RGroupRegisterID::R7,
|
||||
];
|
||||
for (reg, id) in default.iter_mut().skip(1).zip(reg_ids) {
|
||||
reg.id = id;
|
||||
}
|
||||
RegistersInfo { registers: default }
|
||||
}
|
||||
}
|
||||
|
||||
impl RegistersInfo {
|
||||
pub fn iter(&self) -> impl Iterator<Item = &SingleRegisterInfo> {
|
||||
self.registers.iter()
|
||||
}
|
||||
pub fn ready_at_cycle(&self, cycle: usize) -> Vec<&SingleRegisterInfo> {
|
||||
self.registers
|
||||
.iter()
|
||||
.filter(|reg| reg.next_ready <= cycle)
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
pub fn get_mut(&mut self, id: RGroupRegisterID) -> &mut SingleRegisterInfo {
|
||||
&mut self.registers[id as usize]
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn select_register(
|
||||
gen: &mut Blake2Generator,
|
||||
available: &[&SingleRegisterInfo],
|
||||
) -> Option<RGroupRegisterID> {
|
||||
if available.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let index = if available.len() > 1 {
|
||||
// available is <= 8 so as is safe
|
||||
(gen.next_u32() % available.len() as u32)
|
||||
.try_into()
|
||||
.expect("Could not fit u32 into usize")
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
Some(available[index].id)
|
||||
}
|
||||
|
||||
/// Returns an imm32 if the instruction requires one.
|
||||
fn get_imm32(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option<u32> {
|
||||
match id {
|
||||
ScalarInstructionID::IADD_C | ScalarInstructionID::IXOR_C => Some(gen.next_u32()),
|
||||
ScalarInstructionID::IROR_C => {
|
||||
// imm32 % 64 != 0
|
||||
Some(
|
||||
loop {
|
||||
let imm8 = gen.next_u8() & 63;
|
||||
if imm8 != 0 {
|
||||
break imm8;
|
||||
}
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
ScalarInstructionID::IMUL_RCP => {
|
||||
// imm32 != 0, imm32 != 2N
|
||||
Some(loop {
|
||||
let imm32 = gen.next_u32();
|
||||
if !is_0_or_power_of_2(imm32.into()) {
|
||||
break imm32;
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mod_shift(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option<u8> {
|
||||
match id {
|
||||
// keep the shit between 0 and 3.
|
||||
ScalarInstructionID::IADD_RS => Some((gen.next_u8() >> 2) % 4),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Used during [`ScalarInstructionBuilder`] creation. Returns the [`OpSource`] to give the register
|
||||
/// if this is known otherwise [`None`] is returned and this field will be filled later.
|
||||
fn get_src_to_give_register(
|
||||
gen: &mut Blake2Generator,
|
||||
id: &ScalarInstructionID,
|
||||
) -> Option<OpSource> {
|
||||
match id {
|
||||
ScalarInstructionID::IADD_C
|
||||
| ScalarInstructionID::IXOR_C
|
||||
| ScalarInstructionID::IROR_C
|
||||
| ScalarInstructionID::IMUL_RCP => Some(OpSource::Constant),
|
||||
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => {
|
||||
// not actually the source value, the Monero C++ version sets this field to a random
|
||||
// value, this has an issue of becoming an actual meaningful value though so we handle
|
||||
// those rare cases here:
|
||||
Some(OpSource::from_rand_i32(gen.next_u32() as i32))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
struct ScalarInstructionBuilder {
|
||||
/// The id of the instruction we are building.
|
||||
id: ScalarInstructionID,
|
||||
/// The true source register - the one we are actually getting the value from will be
|
||||
/// None if this instruction doesn't need a register source.
|
||||
true_src: Option<RGroupRegisterID>,
|
||||
/// The value src we tell the dst register, if this is a register then most of the time this
|
||||
/// is the same as [`true_src`] but for `IMULH_R` and `ISMULH_R` it's not.
|
||||
///
|
||||
/// `IMULH_R` and `ISMULH_R` generate a random i32 and set it for this slot .
|
||||
src_to_give_register: Option<OpSource>,
|
||||
/// The destination register for this instruction.
|
||||
dst: Option<RGroupRegisterID>,
|
||||
/// A constant used in some instructions.
|
||||
imm32: Option<u32>,
|
||||
/// used in IADD_RS
|
||||
mod_shift: Option<u8>,
|
||||
}
|
||||
|
||||
impl ScalarInstructionBuilder {
|
||||
/// Creates a new [`ScalarInstructionBuilder`].
|
||||
///
|
||||
pub fn new(
|
||||
gen: &mut Blake2Generator,
|
||||
slot_len: &SlotLen,
|
||||
group: &DecoderGroup,
|
||||
is_last: bool,
|
||||
) -> Self {
|
||||
// https://github.com/tevador/RandomX/blob/master/doc/specs.md#632-instruction-selection
|
||||
let id = match slot_len {
|
||||
SlotLen::L3 if !is_last => match gen.next_u8() & 1 {
|
||||
0 => ScalarInstructionID::ISUB_R,
|
||||
_ => ScalarInstructionID::IXOR_R,
|
||||
},
|
||||
SlotLen::L3 => match gen.next_u8() & 3 {
|
||||
0 => ScalarInstructionID::ISUB_R,
|
||||
1 => ScalarInstructionID::IXOR_R,
|
||||
2 => ScalarInstructionID::IMULH_R,
|
||||
_ => ScalarInstructionID::ISMULH_R,
|
||||
},
|
||||
SlotLen::L4 if group == &DecoderGroup::D4444 && !is_last => ScalarInstructionID::IMUL_R,
|
||||
SlotLen::L4 => match gen.next_u8() & 1 {
|
||||
0 => ScalarInstructionID::IROR_C,
|
||||
_ => ScalarInstructionID::IADD_RS,
|
||||
},
|
||||
SlotLen::L7 | SlotLen::L8 | SlotLen::L9 => match gen.next_u8() & 1 {
|
||||
0 => ScalarInstructionID::IXOR_C,
|
||||
_ => ScalarInstructionID::IADD_C,
|
||||
},
|
||||
SlotLen::L10 => ScalarInstructionID::IMUL_RCP,
|
||||
};
|
||||
|
||||
Self {
|
||||
id,
|
||||
true_src: None,
|
||||
src_to_give_register: get_src_to_give_register(gen, &id),
|
||||
dst: None,
|
||||
imm32: get_imm32(gen, &id),
|
||||
mod_shift: get_mod_shift(gen, &id),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the source of the operation
|
||||
fn set_src(&mut self, src: RGroupRegisterID) {
|
||||
self.true_src = Some(src);
|
||||
if self.src_to_give_register.is_none() {
|
||||
// If the src_to_give_register field hasn't already been set then set it now.
|
||||
// The only fields that have true_src as a register with a different src_to_give_register
|
||||
// set this field at the start.
|
||||
self.src_to_give_register = Some(OpSource::Register(src));
|
||||
}
|
||||
}
|
||||
|
||||
/// Select the source of this operation from the given registers.
|
||||
///
|
||||
/// If no registers are available [`false`] is returned.
|
||||
pub fn select_source(
|
||||
&mut self,
|
||||
gen: &mut Blake2Generator,
|
||||
cycle: usize,
|
||||
registers_info: &RegistersInfo,
|
||||
) -> bool {
|
||||
let available_registers = registers_info.ready_at_cycle(cycle);
|
||||
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
|
||||
if available_registers.len() == 2
|
||||
&& self.id == ScalarInstructionID::IADD_RS
|
||||
&& (available_registers[0].id() == RGroupRegisterID::R5
|
||||
|| available_registers[1].id() == RGroupRegisterID::R5)
|
||||
{
|
||||
self.set_src(RGroupRegisterID::R5);
|
||||
return true;
|
||||
}
|
||||
if let Some(reg) = select_register(gen, &available_registers) {
|
||||
self.set_src(reg);
|
||||
return true;
|
||||
};
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Selects the destination of this operation from the given registers.
|
||||
///
|
||||
/// If no registers are available [`false`] is returned.
|
||||
fn select_destination(
|
||||
&mut self,
|
||||
gen: &mut Blake2Generator,
|
||||
cycle: usize,
|
||||
allow_chain_mul: bool,
|
||||
registers_info: &RegistersInfo,
|
||||
) -> bool {
|
||||
let available_registers = registers_info
|
||||
.iter()
|
||||
.filter(|reg| {
|
||||
reg.next_ready() <= cycle
|
||||
&& (self.id.can_dst_be_src() || Some(reg.id()) != self.true_src)
|
||||
&& (allow_chain_mul
|
||||
|| self.id.instruction_group() != ScalarInstructionID::IMUL_R
|
||||
|| reg.last_instruction() != Some(ScalarInstructionID::IMUL_R))
|
||||
&& (Some(self.id.instruction_group()) != reg.last_instruction()
|
||||
|| self.src_to_give_register != Some(reg.last_source()))
|
||||
&& (reg.id() != RGroupRegisterID::R5 || self.id != ScalarInstructionID::IADD_RS)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let Some(reg) = select_register(gen, &available_registers) else {
|
||||
return false;
|
||||
};
|
||||
self.dst = Some(reg);
|
||||
true
|
||||
}
|
||||
|
||||
fn construct(self) -> ScalarInstruction {
|
||||
match self.id {
|
||||
ScalarInstructionID::ISUB_R => ScalarInstruction::ISUB_R {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IXOR_R => ScalarInstruction::IXOR_R {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IADD_RS => ScalarInstruction::IADD_RS {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
mod_shift: self.mod_shift.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IMUL_R => ScalarInstruction::IMUL_R {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IROR_C => ScalarInstruction::IROR_C {
|
||||
dst: self.dst.unwrap(),
|
||||
imm32: self.imm32.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IADD_C => ScalarInstruction::IADD_C {
|
||||
dst: self.dst.unwrap(),
|
||||
imm32: self.imm32.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IXOR_C => ScalarInstruction::IXOR_C {
|
||||
dst: self.dst.unwrap(),
|
||||
imm32: self.imm32.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IMULH_R => ScalarInstruction::IMULH_R {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::ISMULH_R => ScalarInstruction::ISMULH_R {
|
||||
dst: self.dst.unwrap(),
|
||||
src: self.true_src.unwrap(),
|
||||
},
|
||||
ScalarInstructionID::IMUL_RCP => ScalarInstruction::IMUL_RCP {
|
||||
dst: self.dst.unwrap(),
|
||||
imm32: self.imm32.unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct ProgramState {
|
||||
/// The current cycle we are generating for.
|
||||
current_cycle: usize,
|
||||
/// The cycle the last operation will complete at.
|
||||
last_op_completes_at: usize,
|
||||
|
||||
/// The amount of multiplication instructions the program
|
||||
/// has generated.
|
||||
mul_count: usize,
|
||||
/// The amount of instructions in a row the program has thrown
|
||||
/// away because they couldn't be completed.
|
||||
throw_away_count: usize,
|
||||
/// The execution port schedule of the program.
|
||||
program_schedule: ProgramSchedule,
|
||||
/// Information on the registers state.
|
||||
registers_info: RegistersInfo,
|
||||
/// The program
|
||||
program: Vec<ScalarInstruction>,
|
||||
}
|
||||
|
||||
impl ProgramState {
|
||||
fn allow_chain_mul(&self) -> bool {
|
||||
self.throw_away_count > 0
|
||||
}
|
||||
}
|
||||
|
||||
/// A state machine that controls instruction generation.
|
||||
enum ScalarInstructionBuilderSM {
|
||||
/// The generate instruction state, the next call will
|
||||
/// start a new instruction.
|
||||
Generate {
|
||||
/// The last instruction generated.
|
||||
last_instruction: Option<ScalarInstructionID>,
|
||||
},
|
||||
/// A partially completed instruction, the next call will
|
||||
/// push this instruction forward.
|
||||
PartiallyComplete {
|
||||
/// The instruction currently being generated.
|
||||
builder: ScalarInstructionBuilder,
|
||||
/// The macro op of the instruction we are going
|
||||
/// to do next.
|
||||
macro_op_idx: usize,
|
||||
},
|
||||
/// NULL state, this state will only be finished on is the program is full.
|
||||
Null,
|
||||
}
|
||||
|
||||
impl ScalarInstructionBuilderSM {
|
||||
pub fn push_forward(
|
||||
&mut self,
|
||||
gen: &mut Blake2Generator,
|
||||
decoder_group: &DecoderGroup,
|
||||
slot_len: &SlotLen,
|
||||
is_last_slot: bool,
|
||||
program_state: &mut ProgramState,
|
||||
) {
|
||||
loop {
|
||||
match std::mem::replace(self, ScalarInstructionBuilderSM::Null) {
|
||||
ScalarInstructionBuilderSM::Null => {
|
||||
return;
|
||||
}
|
||||
ScalarInstructionBuilderSM::Generate { .. } => {
|
||||
if program_state.program_schedule.is_full()
|
||||
|| program_state.program.len() >= SUPERSCALAR_MAX_SIZE
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let builder =
|
||||
ScalarInstructionBuilder::new(gen, slot_len, decoder_group, is_last_slot);
|
||||
|
||||
*self = ScalarInstructionBuilderSM::PartiallyComplete {
|
||||
builder,
|
||||
macro_op_idx: 0,
|
||||
};
|
||||
}
|
||||
ScalarInstructionBuilderSM::PartiallyComplete {
|
||||
mut builder,
|
||||
mut macro_op_idx,
|
||||
} => {
|
||||
let top_cycle = program_state.current_cycle;
|
||||
|
||||
if macro_op_idx >= builder.id.number_of_macro_ops() {
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: Some(builder.id),
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(next_macro_op) = builder.id.macro_op(macro_op_idx) else {
|
||||
unreachable!("We just checked if the macro op idx is too high")
|
||||
};
|
||||
|
||||
let Some(opportunity) =
|
||||
program_state.program_schedule.earliest_cycle_for_macro_op(
|
||||
&next_macro_op,
|
||||
program_state.current_cycle,
|
||||
program_state.last_op_completes_at,
|
||||
)
|
||||
else {
|
||||
program_state.program_schedule.set_full();
|
||||
return;
|
||||
};
|
||||
|
||||
let mut scheduled_cycle = opportunity.cycle();
|
||||
|
||||
if !Self::check_set_src(
|
||||
&mut builder,
|
||||
macro_op_idx,
|
||||
gen,
|
||||
&mut scheduled_cycle,
|
||||
&mut program_state.current_cycle,
|
||||
&program_state.registers_info,
|
||||
) {
|
||||
// If the source couldn't be set throw the instruction away
|
||||
if program_state.throw_away_count < MAX_THROWAWAY_COUNT {
|
||||
program_state.throw_away_count += 1;
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: Some(builder.id),
|
||||
};
|
||||
continue;
|
||||
}
|
||||
// If too many instructions are thrown away return for the next decoder
|
||||
// idx
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: None,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
let allow_chain_mul = program_state.allow_chain_mul();
|
||||
|
||||
if !Self::check_set_dst(
|
||||
&mut builder,
|
||||
macro_op_idx,
|
||||
gen,
|
||||
&mut scheduled_cycle,
|
||||
&mut program_state.current_cycle,
|
||||
allow_chain_mul,
|
||||
&program_state.registers_info,
|
||||
) {
|
||||
// If the source couldn't be set throw the instruction away
|
||||
if program_state.throw_away_count < MAX_THROWAWAY_COUNT {
|
||||
program_state.throw_away_count += 1;
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: Some(builder.id),
|
||||
};
|
||||
continue;
|
||||
}
|
||||
// If too many instructions are thrown away return for the next decoder
|
||||
// idx
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: None,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
program_state.throw_away_count = 0;
|
||||
|
||||
let Some(scheduled_cycle) = program_state
|
||||
.program_schedule
|
||||
.schedule_macro_op_at_earliest(
|
||||
&next_macro_op,
|
||||
scheduled_cycle,
|
||||
program_state.last_op_completes_at,
|
||||
)
|
||||
else {
|
||||
program_state.program_schedule.set_full();
|
||||
return;
|
||||
};
|
||||
|
||||
let completes_at = scheduled_cycle + next_macro_op.cycles_to_complete();
|
||||
program_state.last_op_completes_at = completes_at;
|
||||
|
||||
if macro_op_idx == builder.id.macro_op_to_store_res() {
|
||||
let reg = program_state.registers_info.get_mut(builder.dst.unwrap());
|
||||
reg.set_next_ready(completes_at);
|
||||
reg.set_last_source(builder.src_to_give_register.unwrap());
|
||||
reg.set_last_instruction(builder.id.instruction_group());
|
||||
}
|
||||
|
||||
macro_op_idx += 1;
|
||||
program_state.current_cycle = top_cycle;
|
||||
|
||||
if scheduled_cycle >= RANDOMX_SUPERSCALAR_LATENCY {
|
||||
program_state.program_schedule.set_full();
|
||||
}
|
||||
|
||||
if macro_op_idx >= builder.id.number_of_macro_ops() {
|
||||
if builder.id.is_multiplication() {
|
||||
program_state.mul_count += 1;
|
||||
}
|
||||
*self = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: Some(builder.id),
|
||||
};
|
||||
program_state.program.push(builder.construct());
|
||||
} else {
|
||||
*self = ScalarInstructionBuilderSM::PartiallyComplete {
|
||||
builder,
|
||||
macro_op_idx,
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Try set the instructions source.
|
||||
///
|
||||
/// Will return true if the src has been set or if its not the correct macro op to set the dst.
|
||||
///
|
||||
/// Will return false if its the correct macro op to set the dst and the src couldn't be set.
|
||||
fn check_set_dst(
|
||||
builder: &mut ScalarInstructionBuilder,
|
||||
macro_op_idx: usize,
|
||||
gen: &mut Blake2Generator,
|
||||
scheduled_cycle: &mut usize,
|
||||
cycle: &mut usize,
|
||||
allow_chain_mul: bool,
|
||||
registers_info: &RegistersInfo,
|
||||
) -> bool {
|
||||
if builder.id.macro_op_to_select_dst() != macro_op_idx {
|
||||
// We don't need to set the src at this macro op.
|
||||
return true;
|
||||
}
|
||||
|
||||
let mut set = false;
|
||||
for _ in 0..LOOK_FORWARD_CYCLES {
|
||||
if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, registers_info) {
|
||||
*scheduled_cycle += 1;
|
||||
*cycle += 1;
|
||||
} else {
|
||||
set = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
set
|
||||
}
|
||||
|
||||
/// Try set the instructions source.
|
||||
///
|
||||
/// Will return true if the src has been set or if its not he correct macro op to set the src.
|
||||
///
|
||||
/// Will return false if its the correct macro op to set the src and the src couldn't be set.
|
||||
fn check_set_src(
|
||||
builder: &mut ScalarInstructionBuilder,
|
||||
macro_op_idx: usize,
|
||||
gen: &mut Blake2Generator,
|
||||
scheduled_cycle: &mut usize,
|
||||
cycle: &mut usize,
|
||||
registers_info: &RegistersInfo,
|
||||
) -> bool {
|
||||
if builder.id.macro_op_to_select_src() != Some(macro_op_idx) {
|
||||
// We don't need to set the src at this macro op.
|
||||
return true;
|
||||
}
|
||||
|
||||
let mut set = false;
|
||||
for _ in 0..LOOK_FORWARD_CYCLES {
|
||||
if !builder.select_source(gen, *scheduled_cycle, registers_info) {
|
||||
*scheduled_cycle += 1;
|
||||
*cycle += 1;
|
||||
} else {
|
||||
set = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
set
|
||||
}
|
||||
|
||||
pub fn get_instruction_id(&self) -> Option<ScalarInstructionID> {
|
||||
match self {
|
||||
ScalarInstructionBuilderSM::Generate { last_instruction } => *last_instruction,
|
||||
ScalarInstructionBuilderSM::PartiallyComplete { builder, .. } => Some(builder.id),
|
||||
ScalarInstructionBuilderSM::Null => {
|
||||
panic!("Should not be calling this function in this state")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn generate(gen: &mut Blake2Generator) -> SSProgram {
|
||||
let mut program_state = ProgramState::default();
|
||||
|
||||
let mut instruction_sm = ScalarInstructionBuilderSM::Generate {
|
||||
last_instruction: None,
|
||||
};
|
||||
|
||||
for decoder_cycle in 0..RANDOMX_SUPERSCALAR_LATENCY {
|
||||
if program_state.program_schedule.is_full()
|
||||
|| program_state.program.len() >= SUPERSCALAR_MAX_SIZE
|
||||
{
|
||||
break;
|
||||
}
|
||||
let current_decode_group = DecoderGroup::next_group(
|
||||
gen,
|
||||
instruction_sm.get_instruction_id(),
|
||||
program_state.mul_count < decoder_cycle + 1,
|
||||
);
|
||||
|
||||
for (slot_len, is_last) in current_decode_group.iter_slot_len() {
|
||||
instruction_sm.push_forward(
|
||||
gen,
|
||||
¤t_decode_group,
|
||||
&slot_len,
|
||||
is_last,
|
||||
&mut program_state,
|
||||
);
|
||||
}
|
||||
program_state.current_cycle += 1;
|
||||
}
|
||||
|
||||
//Calculate ASIC latency:
|
||||
//Assumes 1 cycle latency for all operations and unlimited parallelization.
|
||||
let mut asic_latencies = RGroupRegisters::default();
|
||||
for instr in program_state.program.iter() {
|
||||
let mut latency_dst = asic_latencies.get(&instr.dst());
|
||||
latency_dst += 1;
|
||||
let latency_src = if let Some(src) = instr.src() {
|
||||
asic_latencies.get(&src) + 1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
asic_latencies.set(&instr.dst(), latency_src.max(latency_dst));
|
||||
}
|
||||
|
||||
let mut reg_with_max_latency = RGroupRegisterID::R0;
|
||||
for reg in RGroupRegisterID::iter().skip(1) {
|
||||
if asic_latencies.get(®) > asic_latencies.get(®_with_max_latency) {
|
||||
reg_with_max_latency = reg
|
||||
}
|
||||
}
|
||||
|
||||
SSProgram {
|
||||
program: program_state.program,
|
||||
reg_with_max_latency,
|
||||
}
|
||||
}
|
|
@ -1,264 +0,0 @@
|
|||
use crate::registers::RGroupRegisterID;
|
||||
use crate::superscalar::cpu::MacroOp;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum ScalarInstructionID {
|
||||
/// dst = dst - src
|
||||
ISUB_R,
|
||||
/// dst = dst ^ src
|
||||
IXOR_R,
|
||||
/// dst = dst + (src << mod_shift)
|
||||
IADD_RS,
|
||||
/// dst = dst * src
|
||||
IMUL_R,
|
||||
/// dst = dst >>> imm32
|
||||
IROR_C,
|
||||
/// dst = dst + imm32
|
||||
IADD_C,
|
||||
/// dst = dst ^ imm32
|
||||
IXOR_C,
|
||||
/// dst = (dst * src) >> 64
|
||||
IMULH_R,
|
||||
/// dst = (dst * src) >> 64 (signed)
|
||||
ISMULH_R,
|
||||
/// dst = 2x / imm32 * dst
|
||||
IMUL_RCP,
|
||||
}
|
||||
|
||||
impl ScalarInstructionID {
|
||||
pub fn macro_op_to_select_src(&self) -> Option<usize> {
|
||||
match self {
|
||||
ScalarInstructionID::ISUB_R
|
||||
| ScalarInstructionID::IXOR_R
|
||||
| ScalarInstructionID::IADD_RS
|
||||
| ScalarInstructionID::IMUL_R => Some(0),
|
||||
ScalarInstructionID::IROR_C
|
||||
| ScalarInstructionID::IADD_C
|
||||
| ScalarInstructionID::IXOR_C => None,
|
||||
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => Some(1),
|
||||
ScalarInstructionID::IMUL_RCP => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn macro_op_to_select_dst(&self) -> usize {
|
||||
match self {
|
||||
ScalarInstructionID::IMUL_RCP => 1,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn macro_op_to_store_res(&self) -> usize {
|
||||
match self {
|
||||
ScalarInstructionID::IMULH_R
|
||||
| ScalarInstructionID::ISMULH_R
|
||||
| ScalarInstructionID::IMUL_RCP => 1,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_multiplication(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ScalarInstructionID::IMUL_R
|
||||
| ScalarInstructionID::IMULH_R
|
||||
| ScalarInstructionID::ISMULH_R
|
||||
| ScalarInstructionID::IMUL_RCP
|
||||
)
|
||||
}
|
||||
/// is the destination allowed to be the same as the source
|
||||
pub fn can_dst_be_src(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the group of this operation.
|
||||
///
|
||||
/// A group is related instructions that effect register choice during program construction.
|
||||
pub fn instruction_group(&self) -> ScalarInstructionID {
|
||||
match self {
|
||||
// The only 2 instructions in the same group is ISUB_R & IADD_RS
|
||||
// We could make group an enum but for just these 2 i don't think
|
||||
// it's worth it.
|
||||
ScalarInstructionID::ISUB_R => ScalarInstructionID::IADD_RS,
|
||||
id => *id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn number_of_macro_ops(&self) -> usize {
|
||||
match self {
|
||||
ScalarInstructionID::ISUB_R
|
||||
| ScalarInstructionID::IXOR_R
|
||||
| ScalarInstructionID::IADD_RS
|
||||
| ScalarInstructionID::IMUL_R
|
||||
| ScalarInstructionID::IROR_C
|
||||
| ScalarInstructionID::IADD_C
|
||||
| ScalarInstructionID::IXOR_C => 1,
|
||||
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => 3,
|
||||
ScalarInstructionID::IMUL_RCP => 2,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn macro_op(&self, i: usize) -> Option<MacroOp> {
|
||||
Some(match self {
|
||||
ScalarInstructionID::ISUB_R => MacroOp::SUB_RR,
|
||||
ScalarInstructionID::IXOR_R => MacroOp::XOR_RR,
|
||||
ScalarInstructionID::IADD_RS => MacroOp::LEA_SIB,
|
||||
ScalarInstructionID::IMUL_R => MacroOp::IMUL_RR { dependant: false },
|
||||
ScalarInstructionID::IROR_C => MacroOp::ROR_RI,
|
||||
ScalarInstructionID::IADD_C => MacroOp::ADD_RI,
|
||||
ScalarInstructionID::IXOR_C => MacroOp::XOR_RI,
|
||||
ScalarInstructionID::IMULH_R => match i {
|
||||
0 => MacroOp::MOV_RR,
|
||||
1 => MacroOp::MUL_R,
|
||||
2 => MacroOp::MOV_RR,
|
||||
_ => return None,
|
||||
},
|
||||
ScalarInstructionID::ISMULH_R => match i {
|
||||
0 => MacroOp::MOV_RR,
|
||||
1 => MacroOp::IMUL_R,
|
||||
2 => MacroOp::MOV_RR,
|
||||
_ => return None,
|
||||
},
|
||||
ScalarInstructionID::IMUL_RCP => match i {
|
||||
0 => MacroOp::MOV_RI,
|
||||
1 => MacroOp::IMUL_RR { dependant: true },
|
||||
_ => return None,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum ScalarInstruction {
|
||||
/// dst = dst - src
|
||||
ISUB_R {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
},
|
||||
/// dst = dst ^ src
|
||||
IXOR_R {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
},
|
||||
/// dst = dst + (src << mod_shift)
|
||||
IADD_RS {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
mod_shift: u8,
|
||||
},
|
||||
/// dst = dst * src
|
||||
IMUL_R {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
},
|
||||
/// dst = dst >>> imm32
|
||||
IROR_C { dst: RGroupRegisterID, imm32: u32 },
|
||||
/// dst = dst + imm32
|
||||
IADD_C { dst: RGroupRegisterID, imm32: u32 },
|
||||
/// dst = dst ^ imm32
|
||||
IXOR_C { dst: RGroupRegisterID, imm32: u32 },
|
||||
/// dst = (dst * src) >> 64
|
||||
IMULH_R {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
},
|
||||
/// dst = (dst * src) >> 64 (signed)
|
||||
ISMULH_R {
|
||||
dst: RGroupRegisterID,
|
||||
src: RGroupRegisterID,
|
||||
},
|
||||
/// dst = 2x / imm32 * dst
|
||||
IMUL_RCP { dst: RGroupRegisterID, imm32: u32 },
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum OpSource {
|
||||
Constant,
|
||||
Register(RGroupRegisterID),
|
||||
/// Not actually a source, but the C++ version sets this field to a
|
||||
/// random value on some instructions.
|
||||
Randi32(i32),
|
||||
}
|
||||
|
||||
impl OpSource {
|
||||
pub fn from_rand_i32(x: i32) -> Self {
|
||||
match x {
|
||||
-1 => OpSource::Constant,
|
||||
0 => OpSource::Register(RGroupRegisterID::R0),
|
||||
1 => OpSource::Register(RGroupRegisterID::R1),
|
||||
2 => OpSource::Register(RGroupRegisterID::R2),
|
||||
3 => OpSource::Register(RGroupRegisterID::R3),
|
||||
4 => OpSource::Register(RGroupRegisterID::R4),
|
||||
5 => OpSource::Register(RGroupRegisterID::R5),
|
||||
6 => OpSource::Register(RGroupRegisterID::R6),
|
||||
7 => OpSource::Register(RGroupRegisterID::R7),
|
||||
rand => OpSource::Randi32(rand),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarInstruction {
|
||||
pub fn dst(&self) -> RGroupRegisterID {
|
||||
match self {
|
||||
ScalarInstruction::ISUB_R { dst, .. }
|
||||
| ScalarInstruction::IXOR_R { dst, .. }
|
||||
| ScalarInstruction::IADD_RS { dst, .. }
|
||||
| ScalarInstruction::IMUL_R { dst, .. }
|
||||
| ScalarInstruction::IROR_C { dst, .. }
|
||||
| ScalarInstruction::IADD_C { dst, .. }
|
||||
| ScalarInstruction::IXOR_C { dst, .. }
|
||||
| ScalarInstruction::IMULH_R { dst, .. }
|
||||
| ScalarInstruction::ISMULH_R { dst, .. }
|
||||
| ScalarInstruction::IMUL_RCP { dst, .. } => *dst,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn src(&self) -> Option<RGroupRegisterID> {
|
||||
match self {
|
||||
ScalarInstruction::ISUB_R { src, .. }
|
||||
| ScalarInstruction::IXOR_R { src, .. }
|
||||
| ScalarInstruction::IADD_RS { src, .. }
|
||||
| ScalarInstruction::IMUL_R { src, .. }
|
||||
| ScalarInstruction::IMULH_R { src, .. }
|
||||
| ScalarInstruction::ISMULH_R { src, .. } => Some(*src),
|
||||
ScalarInstruction::IROR_C { .. }
|
||||
| ScalarInstruction::IADD_C { .. }
|
||||
| ScalarInstruction::IXOR_C { .. }
|
||||
| ScalarInstruction::IMUL_RCP { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn id(&self) -> ScalarInstructionID {
|
||||
match self {
|
||||
ScalarInstruction::ISUB_R { .. } => ScalarInstructionID::ISUB_R,
|
||||
ScalarInstruction::IXOR_R { .. } => ScalarInstructionID::IXOR_R,
|
||||
ScalarInstruction::IADD_RS { .. } => ScalarInstructionID::IADD_RS,
|
||||
ScalarInstruction::IMUL_R { .. } => ScalarInstructionID::IMUL_R,
|
||||
ScalarInstruction::IROR_C { .. } => ScalarInstructionID::IROR_C,
|
||||
ScalarInstruction::IADD_C { .. } => ScalarInstructionID::IADD_C,
|
||||
ScalarInstruction::IXOR_C { .. } => ScalarInstructionID::IXOR_C,
|
||||
ScalarInstruction::IMULH_R { .. } => ScalarInstructionID::IMULH_R,
|
||||
ScalarInstruction::ISMULH_R { .. } => ScalarInstructionID::ISMULH_R,
|
||||
ScalarInstruction::IMUL_RCP { .. } => ScalarInstructionID::IMUL_RCP,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn op_source(&self) -> OpSource {
|
||||
match self {
|
||||
ScalarInstruction::ISUB_R { src, .. }
|
||||
| ScalarInstruction::IXOR_R { src, .. }
|
||||
| ScalarInstruction::IADD_RS { src, .. }
|
||||
| ScalarInstruction::IMUL_R { src, .. }
|
||||
| ScalarInstruction::IMULH_R { src, .. }
|
||||
| ScalarInstruction::ISMULH_R { src, .. } => OpSource::Register(*src),
|
||||
ScalarInstruction::IROR_C { .. }
|
||||
| ScalarInstruction::IADD_C { .. }
|
||||
| ScalarInstruction::IXOR_C { .. }
|
||||
| ScalarInstruction::IMUL_RCP { .. } => OpSource::Constant,
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue