diff --git a/Cargo.toml b/Cargo.toml index c2a4f6a2..12537d90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,6 @@ members = [ "common", "consensus", "cryptonight", - "random-x", #"cuprate", # "database", "net/levin", diff --git a/random-x/Cargo.toml b/random-x/Cargo.toml deleted file mode 100644 index eae01067..00000000 --- a/random-x/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "random-x" -version = "0.1.0" -edition = "2021" - -[features] -default = ["jit"] -jit = ["dep:dynasmrt"] -rayon = ["dep:rayon"] - -[dependencies] -blake2 = "0.10" -argon2 = "0.5" -aes = {version = "0.8", features = ["hazmat"]} -hex-literal = "0.4" - -dynasmrt = {version = "2.0.0", optional = true} - -rayon = {version ="1.7", optional = true} - -[profile.dev] -opt-level = 3 \ No newline at end of file diff --git a/random-x/src/aes_hash.rs b/random-x/src/aes_hash.rs deleted file mode 100644 index 585cf681..00000000 --- a/random-x/src/aes_hash.rs +++ /dev/null @@ -1,153 +0,0 @@ -use aes::{ - hazmat::{cipher_round as aes_enc, equiv_inv_cipher_round as aes_dec}, - Block, -}; -use hex_literal::hex; - -// key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys") -const GENERATOR_1_KEY_0: [u8; 16] = hex!("53a5ac6d096671622b55b5db1749f4b4"); -const GENERATOR_1_KEY_1: [u8; 16] = hex!("07af7c6d0d716a8478d325174edca10d"); -const GENERATOR_1_KEY_2: [u8; 16] = hex!("f162123fc67e949f4f79c0f445e3203e"); -const GENERATOR_1_KEY_3: [u8; 16] = hex!("3581ef6a7c31bab1884c311654911649"); - -// key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys 0-3") -const GENERATOR_4_KEY_0: [u8; 16] = hex!("ddaa2164db3d83d12b6d542f3fd2e599"); -const GENERATOR_4_KEY_1: [u8; 16] = hex!("50340eb2553f91b6539df706e5cddfa5"); -const GENERATOR_4_KEY_2: [u8; 16] = hex!("04d93e5caf7b5e519f67a40abf021c17"); -const GENERATOR_4_KEY_3: [u8; 16] = hex!("63376285085d8fe7853767cd91d2ded8"); -// key4, key5, key6, key7 = Hash512("RandomX AesGenerator4R keys 4-7") -const GENERATOR_4_KEY_4: [u8; 16] = hex!("736f82b5a6a7d6e36d8b513db4ff9e22"); -const GENERATOR_4_KEY_5: [u8; 16] = hex!("f36b56c7d9b3109c4e4d02e9d2b772b2"); -const GENERATOR_4_KEY_6: [u8; 16] = hex!("e7c973f28ba365f70a66a92ba7ef3bf6"); -const GENERATOR_4_KEY_7: [u8; 16] = hex!("09d67c7ade395891fdd1060c2d76b0c0"); - -// state0, state1, state2, state3 = Hash512("RandomX AesHash1R state") -const HASH_1_STATE_0: [u8; 16] = hex!("0d2cb592de56a89f47db82ccad3a98d7"); -const HASH_1_STATE_1: [u8; 16] = hex!("6e998d3398b7c7155a129ef55780e7ac"); -const HASH_1_STATE_2: [u8; 16] = hex!("1700776ad0c762ae6b507950e47ca0e8"); -const HASH_1_STATE_3: [u8; 16] = hex!("0c240a638d82ad070500a1794849997e"); -// xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys") -const HASH_1_X_KEY_0: [u8; 16] = hex!("8983faf69f94248bbf56dc9001028906"); -const HASH_1_X_KEY_1: [u8; 16] = hex!("d163b2613ce0f451c64310ee9bf918ed"); - -/// AesHash1R in the spec. -/// -/// creates a 64 byte hash from the input. -/// -/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#34-aeshash1r -pub(crate) fn hash_aes_r1(buf: &[u8]) -> [u8; 64] { - assert_eq!(buf.len() % 64, 0); - - let mut block_0 = Block::from(HASH_1_STATE_0); - let mut block_1 = Block::from(HASH_1_STATE_1); - let mut block_2 = Block::from(HASH_1_STATE_2); - let mut block_3 = Block::from(HASH_1_STATE_3); - - for window in buf.windows(64) { - aes_enc(&mut block_0, Block::from_slice(&window[0..16])); - aes_dec(&mut block_1, Block::from_slice(&window[16..32])); - aes_enc(&mut block_2, Block::from_slice(&window[32..48])); - aes_dec(&mut block_3, Block::from_slice(&window[48..64])); - } - - let x_key_0 = Block::from_slice(&HASH_1_X_KEY_0); - aes_enc(&mut block_0, x_key_0); - aes_dec(&mut block_1, x_key_0); - aes_enc(&mut block_2, x_key_0); - aes_dec(&mut block_3, x_key_0); - - let x_key_1 = Block::from_slice(&HASH_1_X_KEY_1); - aes_enc(&mut block_0, x_key_1); - aes_dec(&mut block_1, x_key_1); - aes_enc(&mut block_2, x_key_1); - aes_dec(&mut block_3, x_key_1); - - [block_0, block_1, block_2, block_3] - .concat() - .try_into() - .unwrap() -} - -/// AesGenerator1R in the spec. -/// -/// Fills the bytes with pseudorandom bytes seeded by the input. -/// -/// `output` must be a multiple of 64. -/// -/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#32-aesgenerator1r -pub(crate) fn aes_fill_1r(input: &[u8; 64], output: &mut [u8]) { - assert_eq!(output.len() % 64, 0); - - let key_0 = Block::from(GENERATOR_1_KEY_0); - let key_1 = Block::from(GENERATOR_1_KEY_1); - let key_2 = Block::from(GENERATOR_1_KEY_2); - let key_3 = Block::from(GENERATOR_1_KEY_3); - - let mut block_0 = Block::clone_from_slice(&input[0..16]); - let mut block_1 = Block::clone_from_slice(&input[16..32]); - let mut block_2 = Block::clone_from_slice(&input[32..48]); - let mut block_3 = Block::clone_from_slice(&input[48..64]); - - for idx in (0..output.len()).step_by(64) { - aes_dec(&mut block_0, &key_0); - aes_enc(&mut block_1, &key_1); - aes_dec(&mut block_2, &key_2); - aes_enc(&mut block_3, &key_3); - - output[idx..idx + 16].clone_from_slice(block_0.as_slice()); - output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice()); - output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice()); - output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice()); - } -} - -/// AesGenerator4R in the spec. -/// -/// Fills the output with pseudorandom bytes seeded by the input. -/// -/// `output` must be a multiple of 64. -/// -/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#33-aesgenerator4r -pub(crate) fn aes_fill_4r(input: &[u8; 64], output: &mut [u8]) { - assert_eq!(output.len() % 64, 0); - - let key_0 = Block::from(GENERATOR_4_KEY_0); - let key_1 = Block::from(GENERATOR_4_KEY_1); - let key_2 = Block::from(GENERATOR_4_KEY_2); - let key_3 = Block::from(GENERATOR_4_KEY_3); - let key_4 = Block::from(GENERATOR_4_KEY_4); - let key_5 = Block::from(GENERATOR_4_KEY_5); - let key_6 = Block::from(GENERATOR_4_KEY_6); - let key_7 = Block::from(GENERATOR_4_KEY_7); - - let mut block_0 = Block::clone_from_slice(&input[0..16]); - let mut block_1 = Block::clone_from_slice(&input[16..32]); - let mut block_2 = Block::clone_from_slice(&input[32..48]); - let mut block_3 = Block::clone_from_slice(&input[48..64]); - - let aes_enc_4 = |block: &mut Block, key_a, key_b, key_c, key_d| { - aes_enc(block, key_a); - aes_enc(block, key_b); - aes_enc(block, key_c); - aes_enc(block, key_d); - }; - - let aes_dec_4 = |block: &mut Block, key_a, key_b, key_c, key_d| { - aes_dec(block, key_a); - aes_dec(block, key_b); - aes_dec(block, key_c); - aes_dec(block, key_d); - }; - - for idx in (0..output.len()).step_by(64) { - aes_dec_4(&mut block_0, &key_0, &key_1, &key_2, &key_3); - aes_enc_4(&mut block_1, &key_0, &key_1, &key_2, &key_3); - aes_dec_4(&mut block_2, &key_4, &key_5, &key_6, &key_7); - aes_enc_4(&mut block_3, &key_4, &key_5, &key_6, &key_7); - - output[idx..idx + 16].clone_from_slice(block_0.as_slice()); - output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice()); - output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice()); - output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice()); - } -} diff --git a/random-x/src/blake2_generator.rs b/random-x/src/blake2_generator.rs deleted file mode 100644 index 1d90a03b..00000000 --- a/random-x/src/blake2_generator.rs +++ /dev/null @@ -1,47 +0,0 @@ -use blake2::digest::FixedOutputReset; -use blake2::{Blake2b512, Digest}; - -const MAX_SEED_LEN: usize = 60; - -pub struct Blake2Generator { - data: [u8; 64], - index: usize, - hasher: Blake2b512, -} - -impl Blake2Generator { - pub fn new(seed: &[u8], nonce: u32) -> Self { - assert!(seed.len() <= MAX_SEED_LEN); - - let mut data = [0; 64]; - data[..seed.len()].copy_from_slice(seed); - - data[MAX_SEED_LEN..].copy_from_slice(&nonce.to_le_bytes()); - - Blake2Generator { - data, - index: 64, - hasher: Blake2b512::default(), - } - } - - pub fn next_u8(&mut self) -> u8 { - self.check_extend(1); - self.index += 1; - self.data[self.index - 1] - } - - pub fn next_u32(&mut self) -> u32 { - self.check_extend(4); - self.index += 4; - u32::from_le_bytes(self.data[self.index - 4..self.index].try_into().unwrap()) - } - - fn check_extend(&mut self, bytes_needed: usize) { - if self.index + bytes_needed > self.data.len() { - self.hasher.update(self.data); - self.data = self.hasher.finalize_fixed_reset().into(); - self.index = 0; - } - } -} diff --git a/random-x/src/config.rs b/random-x/src/config.rs deleted file mode 100644 index 67cfacd4..00000000 --- a/random-x/src/config.rs +++ /dev/null @@ -1,22 +0,0 @@ -/// Target latency for SuperscalarHash (in cycles of the reference CPU). -pub(crate) const RANDOMX_SUPERSCALAR_LATENCY: usize = 170; - -pub(crate) const SUPERSCALAR_MAX_SIZE: usize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; - -/// Dataset base size in bytes. Must be a power of 2. -pub(crate) const RANDOMX_DATASET_BASE_SIZE: usize = 2147483648; - -pub(crate) const RANDOMX_DATASET_EXTRA_SIZE: usize = 33554368; - -pub(crate) const RANDOMX_DATASET_SIZE: usize = - RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; - -pub(crate) const RANDOMX_ARGON_LANES: u32 = 1; - -pub(crate) const RANDOMX_ARGON_ITERATIONS: u32 = 3; - -pub(crate) const RANDOMX_ARGON_MEMORY: u32 = 262144; - -pub(crate) const RANDOMX_ARGON_SALT: &[u8] = b"RandomX\x03"; - -pub(crate) const RANDOMX_CACHE_ACCESSES: usize = 8; diff --git a/random-x/src/dataset.rs b/random-x/src/dataset.rs deleted file mode 100644 index 481a8207..00000000 --- a/random-x/src/dataset.rs +++ /dev/null @@ -1,193 +0,0 @@ -use std::sync::{Arc, RwLock}; - -use argon2::{Algorithm, Argon2, Block, Params, Version}; -#[cfg(feature = "rayon")] -use rayon::prelude::*; - -use crate::blake2_generator::Blake2Generator; -use crate::{ - config::{ - RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_LANES, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_SALT, - RANDOMX_CACHE_ACCESSES, RANDOMX_DATASET_SIZE, - }, - registers::{RGroupRegisterID, RGroupRegisters}, - superscalar::SSProgram, -}; - -/// Generates the memory blocks used in the cache -fn argon2_blocks(key: &[u8]) -> Box<[Block]> { - let params = Params::new( - RANDOMX_ARGON_MEMORY, - RANDOMX_ARGON_ITERATIONS, - RANDOMX_ARGON_LANES, - None, - ) - .unwrap(); - - let numb_blocks: usize = (RANDOMX_ARGON_LANES * RANDOMX_ARGON_MEMORY) - .try_into() - .unwrap(); - - let mut blocks = vec![Block::new(); numb_blocks].into_boxed_slice(); - - let argon = Argon2::new(Algorithm::Argon2d, Version::V0x13, params); - - argon - .fill_memory(key, RANDOMX_ARGON_SALT, &mut blocks) - .unwrap(); - blocks -} - -/// The Cache. -/// -/// The cache is used during light verification. -/// Internally this struct is a wrapper around an [`Arc`] internal cache, this allows -/// cheep clones and allows the cache to be shared between VMs on different threads. -#[derive(Debug, Clone)] -pub struct Cache { - internal_cache: Arc>, -} - -impl Cache { - /// Initialises the cache with the provided key. - /// - /// The key must be between 1-60 bytes (inclusive) otherwise this will panic. - pub fn init(key: &[u8]) -> Self { - let internal_cache = InternalCache::init(key); - Cache { - internal_cache: Arc::new(RwLock::new(internal_cache)), - } - } -} - -/// The internal cache structure, used during light verification. -#[derive(Debug)] - -struct InternalCache { - memory_blocks: Box<[Block]>, - programs: Vec, -} - -impl InternalCache { - fn init(key: &[u8]) -> Self { - let memory_blocks = argon2_blocks(key); - - let mut blake_gen = Blake2Generator::new(key, 0); - - let programs = (0..RANDOMX_CACHE_ACCESSES) - .map(|_| SSProgram::generate(&mut blake_gen)) - .collect::>(); - - InternalCache { - memory_blocks, - programs, - } - } - - /// Gets an item from the cache at the specified index. - fn get_item(&self, idx: usize) -> [u64; 8] { - // one item is 8 u64s - // mask = (blocks in cache * bytes in a block / size of item) minus one. - let mask = (self.memory_blocks.len() * 1024 / 64) - 1; - // and the idx with the mask this is the same as doing mod (self.memory_blocks.len() * 1024 / 64) - let idx = idx & mask; - - // block_idx = idx divided by amount of items in a block - let block_idx = idx / (1024 / 64); - // idx * 8 is to get the idx of a single u64 - // we mask with amount of u64s in a block minus 1 which is the same as doing - // mod the amount of instructions in a block. - let block_u64_start = (idx * 8) & 127; - // The plus 8 cannot overflow as (idx * 8) & 127 wont give a number bigger than 120 - return self.memory_blocks[block_idx].as_ref()[block_u64_start..block_u64_start + 8] - .try_into() - .unwrap(); - } - - /// Generates the dataset item at the specified index. - fn init_data_set_item(&self, item_number: usize) -> [u64; 8] { - let mut registers = RGroupRegisters::default(); - registers.set( - &RGroupRegisterID::R0, - (TryInto::::try_into(item_number).unwrap() + 1_u64) - .wrapping_mul(6364136223846793005_u64), - ); - - let mut init_reg = |dst, val: u64| { - registers.apply_to_dst_with_src(&dst, &RGroupRegisterID::R0, |_, src| src ^ val) - }; - - init_reg(RGroupRegisterID::R1, 9298411001130361340); - init_reg(RGroupRegisterID::R2, 12065312585734608966); - init_reg(RGroupRegisterID::R3, 9306329213124626780); - init_reg(RGroupRegisterID::R4, 5281919268842080866); - init_reg(RGroupRegisterID::R5, 10536153434571861004); - init_reg(RGroupRegisterID::R6, 3398623926847679864); - init_reg(RGroupRegisterID::R7, 9549104520008361294); - - let mut cache_index = item_number; - - for program in &self.programs { - program.execute(&mut registers); - - let cache_item = self.get_item(cache_index); - for (reg_id, item) in RGroupRegisterID::iter().zip(cache_item) { - registers.apply_to_dst(®_id, |dst| dst ^ item); - } - - cache_index = registers - .get(&program.reg_with_max_latency()) - .try_into() - .expect("u64 does not fit into usize"); - } - registers.inner() - } -} - -/// The Dataset used during mining. -/// -/// Internally this struct is a wrapper around an [`Arc`] internal dataset, this allows -/// cheep clones and allows the dataset to be shared between VMs on different threads. -#[derive(Debug, Clone)] -pub struct Dataset { - internal_dataset: Arc>, -} - -impl Dataset { - /// Initialises the dataset with the provided key. - /// - /// The key must be between 1-60 bytes (inclusive) otherwise this will panic. - /// - /// This is very computationally intense so might take a long time to complete. - pub fn init(key: &[u8]) -> Dataset { - let internal_dataset = InternalDataset::init(key); - Dataset { - internal_dataset: Arc::new(RwLock::new(internal_dataset)), - } - } -} - -/// The internal dataset used during mining. -#[derive(Debug)] -struct InternalDataset { - dataset: Vec<[u64; 8]>, -} - -impl InternalDataset { - fn init(key: &[u8]) -> InternalDataset { - let cache = InternalCache::init(key); - - #[cfg(feature = "rayon")] - let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8)) - .into_par_iter() - .map(|i| cache.init_data_set_item(i)) - .collect(); - - #[cfg(not(feature = "rayon"))] - let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8)) - .map(|i| cache.init_data_set_item(i)) - .collect(); - - Self { dataset } - } -} diff --git a/random-x/src/lib.rs b/random-x/src/lib.rs deleted file mode 100644 index 0b98002b..00000000 --- a/random-x/src/lib.rs +++ /dev/null @@ -1,12 +0,0 @@ -mod aes_hash; -mod blake2_generator; -mod config; -mod dataset; -mod registers; -mod superscalar; - -pub use dataset::{Cache, Dataset}; - -fn is_0_or_power_of_2(x: u64) -> bool { - (x & (x - 1)) == 0 -} diff --git a/random-x/src/registers.rs b/random-x/src/registers.rs deleted file mode 100644 index c3a3cd5d..00000000 --- a/random-x/src/registers.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod integer; - -pub(crate) use integer::*; diff --git a/random-x/src/registers/integer.rs b/random-x/src/registers/integer.rs deleted file mode 100644 index 979a4337..00000000 --- a/random-x/src/registers/integer.rs +++ /dev/null @@ -1,62 +0,0 @@ -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -#[repr(usize)] -pub enum RGroupRegisterID { - R0 = 0, - R1, - R2, - R3, - R4, - R5, - R6, - R7, -} - -impl RGroupRegisterID { - pub fn iter() -> impl Iterator { - [ - RGroupRegisterID::R0, - RGroupRegisterID::R1, - RGroupRegisterID::R2, - RGroupRegisterID::R3, - RGroupRegisterID::R4, - RGroupRegisterID::R5, - RGroupRegisterID::R6, - RGroupRegisterID::R7, - ] - .into_iter() - } -} - -#[derive(Debug, Default, Clone)] -pub struct RGroupRegisters([u64; 8]); - -impl RGroupRegisters { - pub fn inner(self) -> [u64; 8] { - self.0 - } - - pub fn apply_to_dst(&mut self, dst: &RGroupRegisterID, f: impl FnOnce(u64) -> u64) { - *self.get_mut(dst) = f(self.get(dst)); - } - - pub fn apply_to_dst_with_src( - &mut self, - dst: &RGroupRegisterID, - src: &RGroupRegisterID, - f: impl FnOnce(u64, u64) -> u64, - ) { - *self.get_mut(dst) = f(self.get(dst), self.get(src)); - } - - pub fn set(&mut self, id: &RGroupRegisterID, val: u64) { - self.0[*id as usize] = val - } - - pub fn get(&self, id: &RGroupRegisterID) -> u64 { - self.0[*id as usize] - } - - pub fn get_mut(&mut self, id: &RGroupRegisterID) -> &mut u64 { - &mut self.0[*id as usize] - } -} diff --git a/random-x/src/superscalar.rs b/random-x/src/superscalar.rs deleted file mode 100644 index 94d3a759..00000000 --- a/random-x/src/superscalar.rs +++ /dev/null @@ -1,32 +0,0 @@ -mod cpu; -mod executor; -mod generator; -mod instructions; -mod program; - -use crate::blake2_generator::Blake2Generator; - -use crate::registers::{RGroupRegisterID, RGroupRegisters}; -use executor::execute; -use generator::generate; -use instructions::ScalarInstruction; - -#[derive(Debug)] -pub(crate) struct SSProgram { - program: Vec, - reg_with_max_latency: RGroupRegisterID, -} - -impl SSProgram { - pub fn generate(gen: &mut Blake2Generator) -> Self { - generate(gen) - } - - pub fn execute(&self, registers: &mut RGroupRegisters) { - execute(&self.program, registers) - } - - pub fn reg_with_max_latency(&self) -> RGroupRegisterID { - self.reg_with_max_latency - } -} diff --git a/random-x/src/superscalar/cpu.rs b/random-x/src/superscalar/cpu.rs deleted file mode 100644 index 42b02982..00000000 --- a/random-x/src/superscalar/cpu.rs +++ /dev/null @@ -1,295 +0,0 @@ -use crate::config::RANDOMX_SUPERSCALAR_LATENCY; - -/// Max cycles + highest amount of cycles on a macro op. -const CYCLE_MAP_SIZE: usize = RANDOMX_SUPERSCALAR_LATENCY + 4; - -pub(crate) enum SlotLen { - L3, - L4, - L7, - L8, - L9, - L10, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum ExecutionPort { - P0, - P1, - P5, -} - -enum AllowedPorts { - One(ExecutionPort), - Two(ExecutionPort, ExecutionPort), - All, -} - -impl AllowedPorts { - fn port_allowed(&self, port: &ExecutionPort) -> bool { - match self { - AllowedPorts::One(allowed_port) => allowed_port == port, - AllowedPorts::Two(allowed_port_1, allowed_port_2) => { - allowed_port_1 == port || allowed_port_2 == port - } - AllowedPorts::All => true, - } - } -} - -#[allow(non_camel_case_types)] -pub enum MacroOp { - SUB_RR, - XOR_RR, - LEA_SIB, - IMUL_RR { dependant: bool }, - ROR_RI, - ADD_RI, - XOR_RI, - MOV_RR, - MUL_R, - IMUL_R, - MOV_RI, -} - -impl MacroOp { - pub fn cycles_to_complete(&self) -> usize { - match self { - MacroOp::SUB_RR => 1, - MacroOp::XOR_RR => 1, - MacroOp::LEA_SIB => 1, - MacroOp::IMUL_RR { .. } => 3, - MacroOp::ROR_RI => 1, - MacroOp::ADD_RI => 1, - MacroOp::XOR_RI => 1, - MacroOp::MOV_RR => 0, - MacroOp::MUL_R => 4, - MacroOp::IMUL_R => 4, - MacroOp::MOV_RI => 1, - } - } - - pub fn can_be_eliminated(&self) -> bool { - self.micro_ops_needed() == 0 - } - - pub fn is_dependant_on_last_op(&self) -> bool { - match self { - MacroOp::IMUL_RR { dependant } => *dependant, - _ => false, - } - } - - pub fn micro_ops_needed(&self) -> usize { - match self { - MacroOp::SUB_RR => 1, - MacroOp::XOR_RR => 1, - MacroOp::LEA_SIB => 1, - MacroOp::IMUL_RR { .. } => 1, - MacroOp::ROR_RI => 1, - MacroOp::ADD_RI => 1, - MacroOp::XOR_RI => 1, - MacroOp::MOV_RR => 0, - MacroOp::MUL_R => 2, - MacroOp::IMUL_R => 2, - MacroOp::MOV_RI => 1, - } - } - - fn allowed_execution_ports(&self, micro_op_index: usize) -> AllowedPorts { - match self { - MacroOp::SUB_RR => AllowedPorts::All, - MacroOp::XOR_RR => AllowedPorts::All, - MacroOp::LEA_SIB => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P1), - MacroOp::IMUL_RR { .. } => AllowedPorts::One(ExecutionPort::P1), - MacroOp::ROR_RI => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P5), - MacroOp::ADD_RI => AllowedPorts::All, - MacroOp::XOR_RI => AllowedPorts::All, - MacroOp::MOV_RR => panic!("No execution units needed for MOV_RR"), - MacroOp::MUL_R => match micro_op_index { - 0 => AllowedPorts::One(ExecutionPort::P1), - 1 => AllowedPorts::One(ExecutionPort::P5), - _ => panic!("no execution port at that index"), - }, - MacroOp::IMUL_R => match micro_op_index { - 0 => AllowedPorts::One(ExecutionPort::P1), - 1 => AllowedPorts::One(ExecutionPort::P5), - _ => panic!("no execution port at that index"), - }, - MacroOp::MOV_RI => AllowedPorts::All, - } - } -} - -/// Represents the ports availability during a single cycle. -#[derive(Debug, Default, Copy, Clone)] -struct CycleSchedule { - p0: bool, - p1: bool, - p5: bool, -} - -impl CycleSchedule { - fn space_for_micro_op(&self, allowed_ports: &AllowedPorts) -> Option { - if !self.p5 && allowed_ports.port_allowed(&ExecutionPort::P5) { - Some(ExecutionPort::P5) - } else if !self.p0 && allowed_ports.port_allowed(&ExecutionPort::P0) { - Some(ExecutionPort::P0) - } else if !self.p1 && allowed_ports.port_allowed(&ExecutionPort::P1) { - Some(ExecutionPort::P1) - } else { - None - } - } - - fn set_port_busy(&mut self, port: ExecutionPort) { - match port { - ExecutionPort::P0 => self.p0 = true, - ExecutionPort::P1 => self.p1 = true, - ExecutionPort::P5 => self.p5 = true, - } - } -} - -pub(crate) struct MacroOpOpportunity { - cycle: usize, - micro_port_0: Option, - micro_port_1: Option, -} - -impl MacroOpOpportunity { - pub fn cycle(&self) -> usize { - self.cycle - } -} - -#[derive(Debug)] -pub(crate) struct ProgramSchedule { - ports_schedule: [CycleSchedule; CYCLE_MAP_SIZE], - full: bool, -} - -impl Default for ProgramSchedule { - fn default() -> Self { - Self { - ports_schedule: [CycleSchedule::default(); CYCLE_MAP_SIZE], - full: false, - } - } -} - -impl ProgramSchedule { - pub fn set_full(&mut self) { - self.full = true; - } - - pub fn is_full(&self) -> bool { - self.full - } - - pub fn schedule_macro_op_at_earliest( - &mut self, - op: &MacroOp, - cycle: usize, - last_op_completes_at: usize, - ) -> Option { - let opportunity = self.earliest_cycle_for_macro_op(op, cycle, last_op_completes_at)?; - let cycle = opportunity.cycle(); - if let Some(port0) = opportunity.micro_port_0 { - self.schedule_micro_op(cycle, port0); - if let Some(port1) = opportunity.micro_port_1 { - self.schedule_micro_op(cycle, port1); - }; - }; - - Some(cycle) - } - - pub fn earliest_cycle_for_macro_op( - &mut self, - op: &MacroOp, - cycle: usize, - last_op_completes_at: usize, - ) -> Option { - let mut cycle = if op.is_dependant_on_last_op() { - cycle.max(last_op_completes_at) - } else { - cycle - }; - - if op.can_be_eliminated() { - return Some(MacroOpOpportunity { - cycle, - micro_port_0: None, - micro_port_1: None, - }); - } - - match op.micro_ops_needed() { - 0 => Some(MacroOpOpportunity { - cycle, - micro_port_0: None, - micro_port_1: None, - }), - 1 => self - .earliest_cycle_for_mirco_op(&op.allowed_execution_ports(0), cycle) - .map(|(cycle, micro_port_0)| MacroOpOpportunity { - cycle, - micro_port_0: Some(micro_port_0), - micro_port_1: None, - }), - 2 => { - // both ops must happen in the same cycle - let allowed_0 = op.allowed_execution_ports(0); - let allowed_1 = op.allowed_execution_ports(1); - - while cycle < CYCLE_MAP_SIZE { - let (min_0_cycle, port_0) = - self.earliest_cycle_for_mirco_op(&allowed_0, cycle)?; - let (min_1_cycle, port_1) = - self.earliest_cycle_for_mirco_op(&allowed_1, cycle)?; - - if min_0_cycle == min_1_cycle { - return Some(MacroOpOpportunity { - cycle: min_0_cycle, - micro_port_0: Some(port_0), - micro_port_1: Some(port_1), - }); - } else { - cycle += 1; - } - } - None - } - _ => unreachable!(), - } - } - - fn schedule_micro_op_at_earliest( - &mut self, - allowed_ports: &AllowedPorts, - cycle: usize, - ) -> Option { - let (cycle, port) = self.earliest_cycle_for_mirco_op(allowed_ports, cycle)?; - self.schedule_micro_op(cycle, port); - Some(cycle) - } - - fn schedule_micro_op(&mut self, cycle: usize, port: ExecutionPort) { - self.ports_schedule[cycle].set_port_busy(port) - } - - fn earliest_cycle_for_mirco_op( - &mut self, - allowed_ports: &AllowedPorts, - cycle: usize, - ) -> Option<(usize, ExecutionPort)> { - for (cycle, cycle_schedule) in self.ports_schedule.iter().enumerate().skip(cycle) { - if let Some(port) = cycle_schedule.space_for_micro_op(allowed_ports) { - return Some((cycle, port)); - } - } - self.full = true; - None - } -} diff --git a/random-x/src/superscalar/executor.rs b/random-x/src/superscalar/executor.rs deleted file mode 100644 index fef9037f..00000000 --- a/random-x/src/superscalar/executor.rs +++ /dev/null @@ -1,105 +0,0 @@ -use crate::registers::RGroupRegisters; -use crate::superscalar::instructions::ScalarInstruction; - -const P2EXP63: u64 = 1 << 63; - -pub fn execute(program: &[ScalarInstruction], registers: &mut RGroupRegisters) { - for instruction in program { - match instruction { - ScalarInstruction::ISUB_R { dst, src } => { - let op = |dst_val: u64, src_val| dst_val.wrapping_sub(src_val); - registers.apply_to_dst_with_src(dst, src, op); - } - ScalarInstruction::IXOR_R { dst, src } => { - let op = |dst_val: u64, src_val| dst_val ^ src_val; - registers.apply_to_dst_with_src(dst, src, op); - } - ScalarInstruction::IADD_RS { - dst, - src, - mod_shift, - } => { - let op = |dst_val: u64, src_val| { - dst_val.wrapping_add(src_val << clamp_mod_shift(*mod_shift)) - }; - registers.apply_to_dst_with_src(dst, src, op); - } - ScalarInstruction::IMUL_R { dst, src } => { - let op = |dst_val: u64, src_val| dst_val.wrapping_mul(src_val); - registers.apply_to_dst_with_src(dst, src, op); - } - ScalarInstruction::IROR_C { dst, imm32 } => { - let op = |dst_val: u64| dst_val.rotate_right(*imm32); - registers.apply_to_dst(dst, op); - } - ScalarInstruction::IADD_C { dst, imm32 } => { - let op = |dst_val: u64| dst_val.wrapping_add(sign_extend_2s_compl(*imm32)); - registers.apply_to_dst(dst, op); - } - ScalarInstruction::IXOR_C { dst, imm32 } => { - let op = |dst_val: u64| dst_val ^ sign_extend_2s_compl(*imm32); - registers.apply_to_dst(dst, op); - } - ScalarInstruction::IMULH_R { dst, src } => { - registers.apply_to_dst_with_src(dst, src, high_mul); - } - ScalarInstruction::ISMULH_R { dst, src } => { - let op = |dst_val: u64, src_val: u64| { - signed_high_mul(dst_val as i64, src_val as i64) as u64 - }; - registers.apply_to_dst_with_src(dst, src, op); - } - ScalarInstruction::IMUL_RCP { dst, imm32 } => { - let op = |dst_val: u64| dst_val.wrapping_mul(randomx_reciprocal(*imm32 as u64)); - registers.apply_to_dst(dst, op); - } - } - } -} - -pub fn randomx_reciprocal(divisor: u64) -> u64 { - assert!(!divisor.is_power_of_two()); - assert_ne!(divisor, 0); - - let mut quotient = P2EXP63 / divisor; - let mut remainder = P2EXP63 % divisor; - let mut bsr = 0; - - let mut bit = divisor; - - while bit > 0 { - bsr += 1; - bit >>= 1; - } - - for _ in 0..bsr { - if remainder >= divisor.wrapping_sub(remainder) { - quotient = quotient.wrapping_mul(2).wrapping_add(1); - remainder = remainder.wrapping_mul(2).wrapping_sub(divisor); - } else { - quotient = quotient.wrapping_mul(2); - remainder = remainder.wrapping_mul(2); - } - } - quotient -} - -fn high_mul(a: u64, b: u64) -> u64 { - ((a as u128 * b as u128) >> 64) as u64 -} - -fn signed_high_mul(a: i64, b: i64) -> i64 { - ((a as i128 * b as i128) >> 64) as i64 -} - -pub fn sign_extend_2s_compl(imm: u32) -> u64 { - if imm > i32::MAX as u32 { - imm as u64 | 0xffffffff00000000 - } else { - imm as u64 - } -} - -fn clamp_mod_shift(x: u8) -> u64 { - (x as u64 >> 2) % 4 -} diff --git a/random-x/src/superscalar/generator.rs b/random-x/src/superscalar/generator.rs deleted file mode 100644 index 2601bf39..00000000 --- a/random-x/src/superscalar/generator.rs +++ /dev/null @@ -1,797 +0,0 @@ -use std::cmp::Ordering; - -use crate::config::SUPERSCALAR_MAX_SIZE; -use crate::registers::{RGroupRegisterID, RGroupRegisters}; -use crate::superscalar::cpu::{ProgramSchedule, SlotLen}; -use crate::superscalar::instructions::ScalarInstruction; -use crate::superscalar::SSProgram; -use crate::{ - blake2_generator::Blake2Generator, - config::RANDOMX_SUPERSCALAR_LATENCY, - is_0_or_power_of_2, - superscalar::instructions::{OpSource, ScalarInstructionID}, -}; - -const LOOK_FORWARD_CYCLES: usize = 4; -const MAX_THROWAWAY_COUNT: usize = 256; - -/// Groups of 3 or 4 Macro-op slots that sum to 16 -/// -/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#631-decoding-stage -/// table 6.3.1 -#[derive(Eq, PartialEq, Copy, Clone)] -enum DecoderGroup { - /// 0: 4-8-4 - D484, - /// 1: 7-3-3-3 - D7333, - /// 2: 3-7-3-3 - D3733, - /// 3: 4-9-3 - D493, - - /// 4: 4-4-4-4 - D4444, - /// 5: 3-3-10 - D3310, -} - -impl DecoderGroup { - fn slot_len(&self, index: usize) -> Option { - match self { - DecoderGroup::D484 => match index { - 0 | 2 => Some(SlotLen::L4), - 1 => Some(SlotLen::L8), - _ => None, - }, - DecoderGroup::D7333 => match index { - 0 => Some(SlotLen::L7), - 1..=3 => Some(SlotLen::L3), - _ => None, - }, - DecoderGroup::D3733 => match index { - 0 | 2 | 3 => Some(SlotLen::L3), - 1 => Some(SlotLen::L7), - _ => None, - }, - DecoderGroup::D493 => match index { - 0 => Some(SlotLen::L4), - 1 => Some(SlotLen::L9), - 2 => Some(SlotLen::L3), - _ => None, - }, - DecoderGroup::D4444 => match index { - 0..=3 => Some(SlotLen::L4), - _ => None, - }, - DecoderGroup::D3310 => match index { - 0 | 1 => Some(SlotLen::L3), - 2 => Some(SlotLen::L10), - _ => None, - }, - } - } - - /// Returns an iterator over the lengths with a bool `is_last` - pub fn iter_slot_len(&self) -> impl Iterator + '_ { - (0..self.size()).map(|i| (self.slot_len(i).unwrap(), self.size() - 1 == i)) - } - - pub fn size(&self) -> usize { - match self { - DecoderGroup::D484 => 3, - DecoderGroup::D7333 => 4, - DecoderGroup::D3733 => 4, - DecoderGroup::D493 => 3, - DecoderGroup::D4444 => 4, - DecoderGroup::D3310 => 3, - } - } - - fn next_group( - gen: &mut Blake2Generator, - instruction: Option, - total_muls_low: bool, - ) -> DecoderGroup { - if matches!( - instruction, - Some(ScalarInstructionID::IMULH_R) | Some(ScalarInstructionID::ISMULH_R) - ) { - return DecoderGroup::D3310; - } - - if total_muls_low { - return DecoderGroup::D4444; - } - - if instruction == Some(ScalarInstructionID::IMUL_RCP) { - return match (gen.next_u8() & 1).cmp(&1) { - Ordering::Equal => DecoderGroup::D484, - Ordering::Less => DecoderGroup::D493, - Ordering::Greater => unreachable!(), - }; - } - - match gen.next_u8() & 3 { - 0 => DecoderGroup::D484, - 1 => DecoderGroup::D7333, - 2 => DecoderGroup::D3733, - 3 => DecoderGroup::D493, - _ => unreachable!(), - } - } -} - -#[derive(Debug, Copy, Clone)] -pub(crate) struct SingleRegisterInfo { - id: RGroupRegisterID, - next_ready: usize, - last_instruction: Option, - last_source: OpSource, -} - -impl SingleRegisterInfo { - pub fn id(&self) -> RGroupRegisterID { - self.id - } - pub fn next_ready(&self) -> usize { - self.next_ready - } - pub fn last_instruction(&self) -> Option { - self.last_instruction - } - pub fn last_source(&self) -> OpSource { - self.last_source - } - pub fn set_next_ready(&mut self, next_ready: usize) { - self.next_ready = next_ready - } - pub fn set_last_instruction(&mut self, last_instruction: ScalarInstructionID) { - self.last_instruction = Some(last_instruction); - } - pub fn set_last_source(&mut self, last_source: OpSource) { - self.last_source = last_source - } -} - -#[derive(Debug)] -pub(crate) struct RegistersInfo { - registers: [SingleRegisterInfo; 8], -} - -impl Default for RegistersInfo { - fn default() -> Self { - let default = SingleRegisterInfo { - id: RGroupRegisterID::R0, - next_ready: 0, - last_instruction: None, - last_source: OpSource::Constant, - }; - let mut default = [default; 8]; - let reg_ids = [ - RGroupRegisterID::R1, - RGroupRegisterID::R2, - RGroupRegisterID::R3, - RGroupRegisterID::R4, - RGroupRegisterID::R5, - RGroupRegisterID::R6, - RGroupRegisterID::R7, - ]; - for (reg, id) in default.iter_mut().skip(1).zip(reg_ids) { - reg.id = id; - } - RegistersInfo { registers: default } - } -} - -impl RegistersInfo { - pub fn iter(&self) -> impl Iterator { - self.registers.iter() - } - pub fn ready_at_cycle(&self, cycle: usize) -> Vec<&SingleRegisterInfo> { - self.registers - .iter() - .filter(|reg| reg.next_ready <= cycle) - .collect::>() - } - pub fn get_mut(&mut self, id: RGroupRegisterID) -> &mut SingleRegisterInfo { - &mut self.registers[id as usize] - } -} - -pub(crate) fn select_register( - gen: &mut Blake2Generator, - available: &[&SingleRegisterInfo], -) -> Option { - if available.is_empty() { - return None; - } - let index = if available.len() > 1 { - // available is <= 8 so as is safe - (gen.next_u32() % available.len() as u32) - .try_into() - .expect("Could not fit u32 into usize") - } else { - 0 - }; - - Some(available[index].id) -} - -/// Returns an imm32 if the instruction requires one. -fn get_imm32(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option { - match id { - ScalarInstructionID::IADD_C | ScalarInstructionID::IXOR_C => Some(gen.next_u32()), - ScalarInstructionID::IROR_C => { - // imm32 % 64 != 0 - Some( - loop { - let imm8 = gen.next_u8() & 63; - if imm8 != 0 { - break imm8; - } - } - .into(), - ) - } - ScalarInstructionID::IMUL_RCP => { - // imm32 != 0, imm32 != 2N - Some(loop { - let imm32 = gen.next_u32(); - if !is_0_or_power_of_2(imm32.into()) { - break imm32; - } - }) - } - _ => None, - } -} - -fn get_mod_shift(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option { - match id { - // keep the shit between 0 and 3. - ScalarInstructionID::IADD_RS => Some((gen.next_u8() >> 2) % 4), - _ => None, - } -} - -/// Used during [`ScalarInstructionBuilder`] creation. Returns the [`OpSource`] to give the register -/// if this is known otherwise [`None`] is returned and this field will be filled later. -fn get_src_to_give_register( - gen: &mut Blake2Generator, - id: &ScalarInstructionID, -) -> Option { - match id { - ScalarInstructionID::IADD_C - | ScalarInstructionID::IXOR_C - | ScalarInstructionID::IROR_C - | ScalarInstructionID::IMUL_RCP => Some(OpSource::Constant), - ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => { - // not actually the source value, the Monero C++ version sets this field to a random - // value, this has an issue of becoming an actual meaningful value though so we handle - // those rare cases here: - Some(OpSource::from_rand_i32(gen.next_u32() as i32)) - } - _ => None, - } -} - -struct ScalarInstructionBuilder { - /// The id of the instruction we are building. - id: ScalarInstructionID, - /// The true source register - the one we are actually getting the value from will be - /// None if this instruction doesn't need a register source. - true_src: Option, - /// The value src we tell the dst register, if this is a register then most of the time this - /// is the same as [`true_src`] but for `IMULH_R` and `ISMULH_R` it's not. - /// - /// `IMULH_R` and `ISMULH_R` generate a random i32 and set it for this slot . - src_to_give_register: Option, - /// The destination register for this instruction. - dst: Option, - /// A constant used in some instructions. - imm32: Option, - /// used in IADD_RS - mod_shift: Option, -} - -impl ScalarInstructionBuilder { - /// Creates a new [`ScalarInstructionBuilder`]. - /// - pub fn new( - gen: &mut Blake2Generator, - slot_len: &SlotLen, - group: &DecoderGroup, - is_last: bool, - ) -> Self { - // https://github.com/tevador/RandomX/blob/master/doc/specs.md#632-instruction-selection - let id = match slot_len { - SlotLen::L3 if !is_last => match gen.next_u8() & 1 { - 0 => ScalarInstructionID::ISUB_R, - _ => ScalarInstructionID::IXOR_R, - }, - SlotLen::L3 => match gen.next_u8() & 3 { - 0 => ScalarInstructionID::ISUB_R, - 1 => ScalarInstructionID::IXOR_R, - 2 => ScalarInstructionID::IMULH_R, - _ => ScalarInstructionID::ISMULH_R, - }, - SlotLen::L4 if group == &DecoderGroup::D4444 && !is_last => ScalarInstructionID::IMUL_R, - SlotLen::L4 => match gen.next_u8() & 1 { - 0 => ScalarInstructionID::IROR_C, - _ => ScalarInstructionID::IADD_RS, - }, - SlotLen::L7 | SlotLen::L8 | SlotLen::L9 => match gen.next_u8() & 1 { - 0 => ScalarInstructionID::IXOR_C, - _ => ScalarInstructionID::IADD_C, - }, - SlotLen::L10 => ScalarInstructionID::IMUL_RCP, - }; - - Self { - id, - true_src: None, - src_to_give_register: get_src_to_give_register(gen, &id), - dst: None, - imm32: get_imm32(gen, &id), - mod_shift: get_mod_shift(gen, &id), - } - } - - /// Set the source of the operation - fn set_src(&mut self, src: RGroupRegisterID) { - self.true_src = Some(src); - if self.src_to_give_register.is_none() { - // If the src_to_give_register field hasn't already been set then set it now. - // The only fields that have true_src as a register with a different src_to_give_register - // set this field at the start. - self.src_to_give_register = Some(OpSource::Register(src)); - } - } - - /// Select the source of this operation from the given registers. - /// - /// If no registers are available [`false`] is returned. - pub fn select_source( - &mut self, - gen: &mut Blake2Generator, - cycle: usize, - registers_info: &RegistersInfo, - ) -> bool { - let available_registers = registers_info.ready_at_cycle(cycle); - //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination - if available_registers.len() == 2 - && self.id == ScalarInstructionID::IADD_RS - && (available_registers[0].id() == RGroupRegisterID::R5 - || available_registers[1].id() == RGroupRegisterID::R5) - { - self.set_src(RGroupRegisterID::R5); - return true; - } - if let Some(reg) = select_register(gen, &available_registers) { - self.set_src(reg); - return true; - }; - - false - } - - /// Selects the destination of this operation from the given registers. - /// - /// If no registers are available [`false`] is returned. - fn select_destination( - &mut self, - gen: &mut Blake2Generator, - cycle: usize, - allow_chain_mul: bool, - registers_info: &RegistersInfo, - ) -> bool { - let available_registers = registers_info - .iter() - .filter(|reg| { - reg.next_ready() <= cycle - && (self.id.can_dst_be_src() || Some(reg.id()) != self.true_src) - && (allow_chain_mul - || self.id.instruction_group() != ScalarInstructionID::IMUL_R - || reg.last_instruction() != Some(ScalarInstructionID::IMUL_R)) - && (Some(self.id.instruction_group()) != reg.last_instruction() - || self.src_to_give_register != Some(reg.last_source())) - && (reg.id() != RGroupRegisterID::R5 || self.id != ScalarInstructionID::IADD_RS) - }) - .collect::>(); - let Some(reg) = select_register(gen, &available_registers) else { - return false; - }; - self.dst = Some(reg); - true - } - - fn construct(self) -> ScalarInstruction { - match self.id { - ScalarInstructionID::ISUB_R => ScalarInstruction::ISUB_R { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - }, - ScalarInstructionID::IXOR_R => ScalarInstruction::IXOR_R { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - }, - ScalarInstructionID::IADD_RS => ScalarInstruction::IADD_RS { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - mod_shift: self.mod_shift.unwrap(), - }, - ScalarInstructionID::IMUL_R => ScalarInstruction::IMUL_R { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - }, - ScalarInstructionID::IROR_C => ScalarInstruction::IROR_C { - dst: self.dst.unwrap(), - imm32: self.imm32.unwrap(), - }, - ScalarInstructionID::IADD_C => ScalarInstruction::IADD_C { - dst: self.dst.unwrap(), - imm32: self.imm32.unwrap(), - }, - ScalarInstructionID::IXOR_C => ScalarInstruction::IXOR_C { - dst: self.dst.unwrap(), - imm32: self.imm32.unwrap(), - }, - ScalarInstructionID::IMULH_R => ScalarInstruction::IMULH_R { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - }, - ScalarInstructionID::ISMULH_R => ScalarInstruction::ISMULH_R { - dst: self.dst.unwrap(), - src: self.true_src.unwrap(), - }, - ScalarInstructionID::IMUL_RCP => ScalarInstruction::IMUL_RCP { - dst: self.dst.unwrap(), - imm32: self.imm32.unwrap(), - }, - } - } -} - -#[derive(Debug, Default)] -struct ProgramState { - /// The current cycle we are generating for. - current_cycle: usize, - /// The cycle the last operation will complete at. - last_op_completes_at: usize, - - /// The amount of multiplication instructions the program - /// has generated. - mul_count: usize, - /// The amount of instructions in a row the program has thrown - /// away because they couldn't be completed. - throw_away_count: usize, - /// The execution port schedule of the program. - program_schedule: ProgramSchedule, - /// Information on the registers state. - registers_info: RegistersInfo, - /// The program - program: Vec, -} - -impl ProgramState { - fn allow_chain_mul(&self) -> bool { - self.throw_away_count > 0 - } -} - -/// A state machine that controls instruction generation. -enum ScalarInstructionBuilderSM { - /// The generate instruction state, the next call will - /// start a new instruction. - Generate { - /// The last instruction generated. - last_instruction: Option, - }, - /// A partially completed instruction, the next call will - /// push this instruction forward. - PartiallyComplete { - /// The instruction currently being generated. - builder: ScalarInstructionBuilder, - /// The macro op of the instruction we are going - /// to do next. - macro_op_idx: usize, - }, - /// NULL state, this state will only be finished on is the program is full. - Null, -} - -impl ScalarInstructionBuilderSM { - pub fn push_forward( - &mut self, - gen: &mut Blake2Generator, - decoder_group: &DecoderGroup, - slot_len: &SlotLen, - is_last_slot: bool, - program_state: &mut ProgramState, - ) { - loop { - match std::mem::replace(self, ScalarInstructionBuilderSM::Null) { - ScalarInstructionBuilderSM::Null => { - return; - } - ScalarInstructionBuilderSM::Generate { .. } => { - if program_state.program_schedule.is_full() - || program_state.program.len() >= SUPERSCALAR_MAX_SIZE - { - return; - } - - let builder = - ScalarInstructionBuilder::new(gen, slot_len, decoder_group, is_last_slot); - - *self = ScalarInstructionBuilderSM::PartiallyComplete { - builder, - macro_op_idx: 0, - }; - } - ScalarInstructionBuilderSM::PartiallyComplete { - mut builder, - mut macro_op_idx, - } => { - let top_cycle = program_state.current_cycle; - - if macro_op_idx >= builder.id.number_of_macro_ops() { - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: Some(builder.id), - }; - continue; - } - - let Some(next_macro_op) = builder.id.macro_op(macro_op_idx) else { - unreachable!("We just checked if the macro op idx is too high") - }; - - let Some(opportunity) = - program_state.program_schedule.earliest_cycle_for_macro_op( - &next_macro_op, - program_state.current_cycle, - program_state.last_op_completes_at, - ) - else { - program_state.program_schedule.set_full(); - return; - }; - - let mut scheduled_cycle = opportunity.cycle(); - - if !Self::check_set_src( - &mut builder, - macro_op_idx, - gen, - &mut scheduled_cycle, - &mut program_state.current_cycle, - &program_state.registers_info, - ) { - // If the source couldn't be set throw the instruction away - if program_state.throw_away_count < MAX_THROWAWAY_COUNT { - program_state.throw_away_count += 1; - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: Some(builder.id), - }; - continue; - } - // If too many instructions are thrown away return for the next decoder - // idx - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: None, - }; - return; - } - - let allow_chain_mul = program_state.allow_chain_mul(); - - if !Self::check_set_dst( - &mut builder, - macro_op_idx, - gen, - &mut scheduled_cycle, - &mut program_state.current_cycle, - allow_chain_mul, - &program_state.registers_info, - ) { - // If the source couldn't be set throw the instruction away - if program_state.throw_away_count < MAX_THROWAWAY_COUNT { - program_state.throw_away_count += 1; - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: Some(builder.id), - }; - continue; - } - // If too many instructions are thrown away return for the next decoder - // idx - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: None, - }; - return; - } - - program_state.throw_away_count = 0; - - let Some(scheduled_cycle) = program_state - .program_schedule - .schedule_macro_op_at_earliest( - &next_macro_op, - scheduled_cycle, - program_state.last_op_completes_at, - ) - else { - program_state.program_schedule.set_full(); - return; - }; - - let completes_at = scheduled_cycle + next_macro_op.cycles_to_complete(); - program_state.last_op_completes_at = completes_at; - - if macro_op_idx == builder.id.macro_op_to_store_res() { - let reg = program_state.registers_info.get_mut(builder.dst.unwrap()); - reg.set_next_ready(completes_at); - reg.set_last_source(builder.src_to_give_register.unwrap()); - reg.set_last_instruction(builder.id.instruction_group()); - } - - macro_op_idx += 1; - program_state.current_cycle = top_cycle; - - if scheduled_cycle >= RANDOMX_SUPERSCALAR_LATENCY { - program_state.program_schedule.set_full(); - } - - if macro_op_idx >= builder.id.number_of_macro_ops() { - if builder.id.is_multiplication() { - program_state.mul_count += 1; - } - *self = ScalarInstructionBuilderSM::Generate { - last_instruction: Some(builder.id), - }; - program_state.program.push(builder.construct()); - } else { - *self = ScalarInstructionBuilderSM::PartiallyComplete { - builder, - macro_op_idx, - }; - } - return; - } - } - } - } - - /// Try set the instructions source. - /// - /// Will return true if the src has been set or if its not the correct macro op to set the dst. - /// - /// Will return false if its the correct macro op to set the dst and the src couldn't be set. - fn check_set_dst( - builder: &mut ScalarInstructionBuilder, - macro_op_idx: usize, - gen: &mut Blake2Generator, - scheduled_cycle: &mut usize, - cycle: &mut usize, - allow_chain_mul: bool, - registers_info: &RegistersInfo, - ) -> bool { - if builder.id.macro_op_to_select_dst() != macro_op_idx { - // We don't need to set the src at this macro op. - return true; - } - - let mut set = false; - for _ in 0..LOOK_FORWARD_CYCLES { - if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, registers_info) { - *scheduled_cycle += 1; - *cycle += 1; - } else { - set = true; - break; - } - } - - set - } - - /// Try set the instructions source. - /// - /// Will return true if the src has been set or if its not he correct macro op to set the src. - /// - /// Will return false if its the correct macro op to set the src and the src couldn't be set. - fn check_set_src( - builder: &mut ScalarInstructionBuilder, - macro_op_idx: usize, - gen: &mut Blake2Generator, - scheduled_cycle: &mut usize, - cycle: &mut usize, - registers_info: &RegistersInfo, - ) -> bool { - if builder.id.macro_op_to_select_src() != Some(macro_op_idx) { - // We don't need to set the src at this macro op. - return true; - } - - let mut set = false; - for _ in 0..LOOK_FORWARD_CYCLES { - if !builder.select_source(gen, *scheduled_cycle, registers_info) { - *scheduled_cycle += 1; - *cycle += 1; - } else { - set = true; - break; - } - } - - set - } - - pub fn get_instruction_id(&self) -> Option { - match self { - ScalarInstructionBuilderSM::Generate { last_instruction } => *last_instruction, - ScalarInstructionBuilderSM::PartiallyComplete { builder, .. } => Some(builder.id), - ScalarInstructionBuilderSM::Null => { - panic!("Should not be calling this function in this state") - } - } - } -} - -pub(crate) fn generate(gen: &mut Blake2Generator) -> SSProgram { - let mut program_state = ProgramState::default(); - - let mut instruction_sm = ScalarInstructionBuilderSM::Generate { - last_instruction: None, - }; - - for decoder_cycle in 0..RANDOMX_SUPERSCALAR_LATENCY { - if program_state.program_schedule.is_full() - || program_state.program.len() >= SUPERSCALAR_MAX_SIZE - { - break; - } - let current_decode_group = DecoderGroup::next_group( - gen, - instruction_sm.get_instruction_id(), - program_state.mul_count < decoder_cycle + 1, - ); - - for (slot_len, is_last) in current_decode_group.iter_slot_len() { - instruction_sm.push_forward( - gen, - ¤t_decode_group, - &slot_len, - is_last, - &mut program_state, - ); - } - program_state.current_cycle += 1; - } - - //Calculate ASIC latency: - //Assumes 1 cycle latency for all operations and unlimited parallelization. - let mut asic_latencies = RGroupRegisters::default(); - for instr in program_state.program.iter() { - let mut latency_dst = asic_latencies.get(&instr.dst()); - latency_dst += 1; - let latency_src = if let Some(src) = instr.src() { - asic_latencies.get(&src) + 1 - } else { - 0 - }; - asic_latencies.set(&instr.dst(), latency_src.max(latency_dst)); - } - - let mut reg_with_max_latency = RGroupRegisterID::R0; - for reg in RGroupRegisterID::iter().skip(1) { - if asic_latencies.get(®) > asic_latencies.get(®_with_max_latency) { - reg_with_max_latency = reg - } - } - - SSProgram { - program: program_state.program, - reg_with_max_latency, - } -} diff --git a/random-x/src/superscalar/instructions.rs b/random-x/src/superscalar/instructions.rs deleted file mode 100644 index 2f504ed6..00000000 --- a/random-x/src/superscalar/instructions.rs +++ /dev/null @@ -1,264 +0,0 @@ -use crate::registers::RGroupRegisterID; -use crate::superscalar::cpu::MacroOp; - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -#[allow(non_camel_case_types)] -pub enum ScalarInstructionID { - /// dst = dst - src - ISUB_R, - /// dst = dst ^ src - IXOR_R, - /// dst = dst + (src << mod_shift) - IADD_RS, - /// dst = dst * src - IMUL_R, - /// dst = dst >>> imm32 - IROR_C, - /// dst = dst + imm32 - IADD_C, - /// dst = dst ^ imm32 - IXOR_C, - /// dst = (dst * src) >> 64 - IMULH_R, - /// dst = (dst * src) >> 64 (signed) - ISMULH_R, - /// dst = 2x / imm32 * dst - IMUL_RCP, -} - -impl ScalarInstructionID { - pub fn macro_op_to_select_src(&self) -> Option { - match self { - ScalarInstructionID::ISUB_R - | ScalarInstructionID::IXOR_R - | ScalarInstructionID::IADD_RS - | ScalarInstructionID::IMUL_R => Some(0), - ScalarInstructionID::IROR_C - | ScalarInstructionID::IADD_C - | ScalarInstructionID::IXOR_C => None, - ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => Some(1), - ScalarInstructionID::IMUL_RCP => None, - } - } - - pub fn macro_op_to_select_dst(&self) -> usize { - match self { - ScalarInstructionID::IMUL_RCP => 1, - _ => 0, - } - } - - pub fn macro_op_to_store_res(&self) -> usize { - match self { - ScalarInstructionID::IMULH_R - | ScalarInstructionID::ISMULH_R - | ScalarInstructionID::IMUL_RCP => 1, - _ => 0, - } - } - - pub fn is_multiplication(&self) -> bool { - matches!( - self, - ScalarInstructionID::IMUL_R - | ScalarInstructionID::IMULH_R - | ScalarInstructionID::ISMULH_R - | ScalarInstructionID::IMUL_RCP - ) - } - /// is the destination allowed to be the same as the source - pub fn can_dst_be_src(&self) -> bool { - matches!( - self, - ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R - ) - } - - /// Returns the group of this operation. - /// - /// A group is related instructions that effect register choice during program construction. - pub fn instruction_group(&self) -> ScalarInstructionID { - match self { - // The only 2 instructions in the same group is ISUB_R & IADD_RS - // We could make group an enum but for just these 2 i don't think - // it's worth it. - ScalarInstructionID::ISUB_R => ScalarInstructionID::IADD_RS, - id => *id, - } - } - - pub fn number_of_macro_ops(&self) -> usize { - match self { - ScalarInstructionID::ISUB_R - | ScalarInstructionID::IXOR_R - | ScalarInstructionID::IADD_RS - | ScalarInstructionID::IMUL_R - | ScalarInstructionID::IROR_C - | ScalarInstructionID::IADD_C - | ScalarInstructionID::IXOR_C => 1, - ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => 3, - ScalarInstructionID::IMUL_RCP => 2, - } - } - - pub fn macro_op(&self, i: usize) -> Option { - Some(match self { - ScalarInstructionID::ISUB_R => MacroOp::SUB_RR, - ScalarInstructionID::IXOR_R => MacroOp::XOR_RR, - ScalarInstructionID::IADD_RS => MacroOp::LEA_SIB, - ScalarInstructionID::IMUL_R => MacroOp::IMUL_RR { dependant: false }, - ScalarInstructionID::IROR_C => MacroOp::ROR_RI, - ScalarInstructionID::IADD_C => MacroOp::ADD_RI, - ScalarInstructionID::IXOR_C => MacroOp::XOR_RI, - ScalarInstructionID::IMULH_R => match i { - 0 => MacroOp::MOV_RR, - 1 => MacroOp::MUL_R, - 2 => MacroOp::MOV_RR, - _ => return None, - }, - ScalarInstructionID::ISMULH_R => match i { - 0 => MacroOp::MOV_RR, - 1 => MacroOp::IMUL_R, - 2 => MacroOp::MOV_RR, - _ => return None, - }, - ScalarInstructionID::IMUL_RCP => match i { - 0 => MacroOp::MOV_RI, - 1 => MacroOp::IMUL_RR { dependant: true }, - _ => return None, - }, - }) - } -} - -#[derive(Debug, Copy, Clone)] -#[allow(non_camel_case_types)] -pub enum ScalarInstruction { - /// dst = dst - src - ISUB_R { - dst: RGroupRegisterID, - src: RGroupRegisterID, - }, - /// dst = dst ^ src - IXOR_R { - dst: RGroupRegisterID, - src: RGroupRegisterID, - }, - /// dst = dst + (src << mod_shift) - IADD_RS { - dst: RGroupRegisterID, - src: RGroupRegisterID, - mod_shift: u8, - }, - /// dst = dst * src - IMUL_R { - dst: RGroupRegisterID, - src: RGroupRegisterID, - }, - /// dst = dst >>> imm32 - IROR_C { dst: RGroupRegisterID, imm32: u32 }, - /// dst = dst + imm32 - IADD_C { dst: RGroupRegisterID, imm32: u32 }, - /// dst = dst ^ imm32 - IXOR_C { dst: RGroupRegisterID, imm32: u32 }, - /// dst = (dst * src) >> 64 - IMULH_R { - dst: RGroupRegisterID, - src: RGroupRegisterID, - }, - /// dst = (dst * src) >> 64 (signed) - ISMULH_R { - dst: RGroupRegisterID, - src: RGroupRegisterID, - }, - /// dst = 2x / imm32 * dst - IMUL_RCP { dst: RGroupRegisterID, imm32: u32 }, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum OpSource { - Constant, - Register(RGroupRegisterID), - /// Not actually a source, but the C++ version sets this field to a - /// random value on some instructions. - Randi32(i32), -} - -impl OpSource { - pub fn from_rand_i32(x: i32) -> Self { - match x { - -1 => OpSource::Constant, - 0 => OpSource::Register(RGroupRegisterID::R0), - 1 => OpSource::Register(RGroupRegisterID::R1), - 2 => OpSource::Register(RGroupRegisterID::R2), - 3 => OpSource::Register(RGroupRegisterID::R3), - 4 => OpSource::Register(RGroupRegisterID::R4), - 5 => OpSource::Register(RGroupRegisterID::R5), - 6 => OpSource::Register(RGroupRegisterID::R6), - 7 => OpSource::Register(RGroupRegisterID::R7), - rand => OpSource::Randi32(rand), - } - } -} - -impl ScalarInstruction { - pub fn dst(&self) -> RGroupRegisterID { - match self { - ScalarInstruction::ISUB_R { dst, .. } - | ScalarInstruction::IXOR_R { dst, .. } - | ScalarInstruction::IADD_RS { dst, .. } - | ScalarInstruction::IMUL_R { dst, .. } - | ScalarInstruction::IROR_C { dst, .. } - | ScalarInstruction::IADD_C { dst, .. } - | ScalarInstruction::IXOR_C { dst, .. } - | ScalarInstruction::IMULH_R { dst, .. } - | ScalarInstruction::ISMULH_R { dst, .. } - | ScalarInstruction::IMUL_RCP { dst, .. } => *dst, - } - } - - pub fn src(&self) -> Option { - match self { - ScalarInstruction::ISUB_R { src, .. } - | ScalarInstruction::IXOR_R { src, .. } - | ScalarInstruction::IADD_RS { src, .. } - | ScalarInstruction::IMUL_R { src, .. } - | ScalarInstruction::IMULH_R { src, .. } - | ScalarInstruction::ISMULH_R { src, .. } => Some(*src), - ScalarInstruction::IROR_C { .. } - | ScalarInstruction::IADD_C { .. } - | ScalarInstruction::IXOR_C { .. } - | ScalarInstruction::IMUL_RCP { .. } => None, - } - } - - pub fn id(&self) -> ScalarInstructionID { - match self { - ScalarInstruction::ISUB_R { .. } => ScalarInstructionID::ISUB_R, - ScalarInstruction::IXOR_R { .. } => ScalarInstructionID::IXOR_R, - ScalarInstruction::IADD_RS { .. } => ScalarInstructionID::IADD_RS, - ScalarInstruction::IMUL_R { .. } => ScalarInstructionID::IMUL_R, - ScalarInstruction::IROR_C { .. } => ScalarInstructionID::IROR_C, - ScalarInstruction::IADD_C { .. } => ScalarInstructionID::IADD_C, - ScalarInstruction::IXOR_C { .. } => ScalarInstructionID::IXOR_C, - ScalarInstruction::IMULH_R { .. } => ScalarInstructionID::IMULH_R, - ScalarInstruction::ISMULH_R { .. } => ScalarInstructionID::ISMULH_R, - ScalarInstruction::IMUL_RCP { .. } => ScalarInstructionID::IMUL_RCP, - } - } - - pub fn op_source(&self) -> OpSource { - match self { - ScalarInstruction::ISUB_R { src, .. } - | ScalarInstruction::IXOR_R { src, .. } - | ScalarInstruction::IADD_RS { src, .. } - | ScalarInstruction::IMUL_R { src, .. } - | ScalarInstruction::IMULH_R { src, .. } - | ScalarInstruction::ISMULH_R { src, .. } => OpSource::Register(*src), - ScalarInstruction::IROR_C { .. } - | ScalarInstruction::IADD_C { .. } - | ScalarInstruction::IXOR_C { .. } - | ScalarInstruction::IMUL_RCP { .. } => OpSource::Constant, - } - } -}