From 57630ed9fb39815e78aa2acc4a34102acd9484b6 Mon Sep 17 00:00:00 2001 From: Boog900 <54e72d8a-345f-4599-bd90-c6b9bc7d0ec5@aleeas.com> Date: Thu, 14 Sep 2023 00:23:10 +0100 Subject: [PATCH] init random-x module super scaler program generation/ execution is complete. --- Cargo.toml | 1 + random-x/Cargo.toml | 17 + random-x/src/blake2_generator.rs | 47 ++ random-x/src/config.rs | 22 + random-x/src/dataset.rs | 86 +++ random-x/src/lib.rs | 9 + random-x/src/registers.rs | 3 + random-x/src/registers/integer.rs | 58 ++ random-x/src/superscalar.rs | 21 + random-x/src/superscalar/cpu.rs | 294 +++++++++ random-x/src/superscalar/executor.rs | 105 +++ random-x/src/superscalar/generator.rs | 797 +++++++++++++++++++++++ random-x/src/superscalar/instructions.rs | 264 ++++++++ 13 files changed, 1724 insertions(+) create mode 100644 random-x/Cargo.toml create mode 100644 random-x/src/blake2_generator.rs create mode 100644 random-x/src/config.rs create mode 100644 random-x/src/dataset.rs create mode 100644 random-x/src/lib.rs create mode 100644 random-x/src/registers.rs create mode 100644 random-x/src/registers/integer.rs create mode 100644 random-x/src/superscalar.rs create mode 100644 random-x/src/superscalar/cpu.rs create mode 100644 random-x/src/superscalar/executor.rs create mode 100644 random-x/src/superscalar/generator.rs create mode 100644 random-x/src/superscalar/instructions.rs diff --git a/Cargo.toml b/Cargo.toml index 56f4eb8..301b91f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "common", "consensus", "cryptonight", + "random-x", #"cuprate", # "database", "net/levin", diff --git a/random-x/Cargo.toml b/random-x/Cargo.toml new file mode 100644 index 0000000..4ab295f --- /dev/null +++ b/random-x/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "random-x" +version = "0.1.0" +edition = "2021" + +[features] +default = ["jit"] +jit = ["dep:dynasmrt"] + +[dependencies] +blake2 = "0.10" +argon2 = "0.5" + +dynasmrt = {version = "2.0.0", optional = true} + +[profile.dev] +opt-level = 2 \ No newline at end of file diff --git a/random-x/src/blake2_generator.rs b/random-x/src/blake2_generator.rs new file mode 100644 index 0000000..1d90a03 --- /dev/null +++ b/random-x/src/blake2_generator.rs @@ -0,0 +1,47 @@ +use blake2::digest::FixedOutputReset; +use blake2::{Blake2b512, Digest}; + +const MAX_SEED_LEN: usize = 60; + +pub struct Blake2Generator { + data: [u8; 64], + index: usize, + hasher: Blake2b512, +} + +impl Blake2Generator { + pub fn new(seed: &[u8], nonce: u32) -> Self { + assert!(seed.len() <= MAX_SEED_LEN); + + let mut data = [0; 64]; + data[..seed.len()].copy_from_slice(seed); + + data[MAX_SEED_LEN..].copy_from_slice(&nonce.to_le_bytes()); + + Blake2Generator { + data, + index: 64, + hasher: Blake2b512::default(), + } + } + + pub fn next_u8(&mut self) -> u8 { + self.check_extend(1); + self.index += 1; + self.data[self.index - 1] + } + + pub fn next_u32(&mut self) -> u32 { + self.check_extend(4); + self.index += 4; + u32::from_le_bytes(self.data[self.index - 4..self.index].try_into().unwrap()) + } + + fn check_extend(&mut self, bytes_needed: usize) { + if self.index + bytes_needed > self.data.len() { + self.hasher.update(self.data); + self.data = self.hasher.finalize_fixed_reset().into(); + self.index = 0; + } + } +} diff --git a/random-x/src/config.rs b/random-x/src/config.rs new file mode 100644 index 0000000..67cfacd --- /dev/null +++ b/random-x/src/config.rs @@ -0,0 +1,22 @@ +/// Target latency for SuperscalarHash (in cycles of the reference CPU). +pub(crate) const RANDOMX_SUPERSCALAR_LATENCY: usize = 170; + +pub(crate) const SUPERSCALAR_MAX_SIZE: usize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; + +/// Dataset base size in bytes. Must be a power of 2. +pub(crate) const RANDOMX_DATASET_BASE_SIZE: usize = 2147483648; + +pub(crate) const RANDOMX_DATASET_EXTRA_SIZE: usize = 33554368; + +pub(crate) const RANDOMX_DATASET_SIZE: usize = + RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; + +pub(crate) const RANDOMX_ARGON_LANES: u32 = 1; + +pub(crate) const RANDOMX_ARGON_ITERATIONS: u32 = 3; + +pub(crate) const RANDOMX_ARGON_MEMORY: u32 = 262144; + +pub(crate) const RANDOMX_ARGON_SALT: &[u8] = b"RandomX\x03"; + +pub(crate) const RANDOMX_CACHE_ACCESSES: usize = 8; diff --git a/random-x/src/dataset.rs b/random-x/src/dataset.rs new file mode 100644 index 0000000..8415eea --- /dev/null +++ b/random-x/src/dataset.rs @@ -0,0 +1,86 @@ +use argon2::{Algorithm, Argon2, Block, Params, Version}; +use std::sync::{Arc, RwLock}; + +use crate::blake2_generator::Blake2Generator; +use crate::{ + config::{ + RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_LANES, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_SALT, + RANDOMX_CACHE_ACCESSES, + }, + superscalar::SSProgram, +}; + +trait Dataset {} + +fn argon2_blocks(key: &[u8]) -> Box<[Block]> { + let params = Params::new( + RANDOMX_ARGON_MEMORY, + RANDOMX_ARGON_ITERATIONS, + RANDOMX_ARGON_LANES, + None, + ) + .unwrap(); + + let numb_blocks: usize = (RANDOMX_ARGON_LANES * RANDOMX_ARGON_MEMORY) + .try_into() + .unwrap(); + + let mut blocks = vec![Block::new(); numb_blocks].into_boxed_slice(); + + let argon = Argon2::new(Algorithm::Argon2d, Version::V0x13, params); + + argon + .fill_memory(key, RANDOMX_ARGON_SALT, &mut blocks) + .unwrap(); + blocks +} + +struct InternalCache { + memory_blocks: Box<[Block]>, + programs: Vec, +} + +impl InternalCache { + fn init(key: &[u8]) -> Self { + let memory_blocks = argon2_blocks(key); + + let mut blake_gen = Blake2Generator::new(key, 0); + + let programs = (0..RANDOMX_CACHE_ACCESSES) + .map(|_| SSProgram::generate(&mut blake_gen)) + .collect::>(); + + InternalCache { + memory_blocks, + programs, + } + } +} + +pub struct Cache { + internal_cache: Arc>, +} + +struct InternalDataset { + dataset: Vec, +} + +impl InternalDataset { + fn init(key: &[u8]) -> InternalDataset { + let cache = InternalCache::init(key); + let + + } +} + +fn init_data_set_item(cache: &InternalCache, item: u64) -> [u64; 8] { + let +} + +// 12118971377224777581 +#[test] +fn init() { + let mem = InternalCache::init(&[5]); + + println!("{:?}", mem.memory_blocks[1000]) +} diff --git a/random-x/src/lib.rs b/random-x/src/lib.rs new file mode 100644 index 0000000..dd4c6ae --- /dev/null +++ b/random-x/src/lib.rs @@ -0,0 +1,9 @@ +mod blake2_generator; +mod config; +//mod dataset; +mod registers; +mod superscalar; + +fn is_0_or_power_of_2(x: u64) -> bool { + (x & (x - 1)) == 0 +} diff --git a/random-x/src/registers.rs b/random-x/src/registers.rs new file mode 100644 index 0000000..c3a3cd5 --- /dev/null +++ b/random-x/src/registers.rs @@ -0,0 +1,3 @@ +mod integer; + +pub(crate) use integer::*; diff --git a/random-x/src/registers/integer.rs b/random-x/src/registers/integer.rs new file mode 100644 index 0000000..c37283c --- /dev/null +++ b/random-x/src/registers/integer.rs @@ -0,0 +1,58 @@ +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[repr(usize)] +pub enum RGroupRegisterID { + R0 = 0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, +} + +impl RGroupRegisterID { + pub fn iter() -> impl Iterator { + [ + RGroupRegisterID::R0, + RGroupRegisterID::R1, + RGroupRegisterID::R2, + RGroupRegisterID::R3, + RGroupRegisterID::R4, + RGroupRegisterID::R5, + RGroupRegisterID::R6, + RGroupRegisterID::R7, + ] + .into_iter() + } +} + +#[derive(Debug, Default, Clone)] +pub struct RGroupRegisters([u64; 8]); + +impl RGroupRegisters { + pub fn apply_to_dst(&mut self, dst: &RGroupRegisterID, f: impl FnOnce(u64) -> u64) { + *self.get_mut(dst) = f(self.get(dst)); + } + + pub fn apply_to_dst_with_src( + &mut self, + dst: &RGroupRegisterID, + src: &RGroupRegisterID, + f: impl FnOnce(u64, u64) -> u64, + ) { + *self.get_mut(dst) = f(self.get(dst), self.get(src)); + } + + pub fn set(&mut self, id: &RGroupRegisterID, val: u64) { + self.0[*id as usize] = val + } + + pub fn get(&self, id: &RGroupRegisterID) -> u64 { + self.0[*id as usize] + } + + pub fn get_mut(&mut self, id: &RGroupRegisterID) -> &mut u64 { + &mut self.0[*id as usize] + } +} diff --git a/random-x/src/superscalar.rs b/random-x/src/superscalar.rs new file mode 100644 index 0000000..74cf3d0 --- /dev/null +++ b/random-x/src/superscalar.rs @@ -0,0 +1,21 @@ +mod cpu; +mod executor; +mod generator; +mod instructions; + +use crate::blake2_generator::Blake2Generator; + +use crate::registers::RGroupRegisterID; +use generator::generate; +use instructions::ScalarInstruction; + +pub(crate) struct SSProgram { + program: Vec, + reg_with_max_latency: RGroupRegisterID, +} + +impl SSProgram { + pub fn generate(gen: &mut Blake2Generator) -> Self { + generate(gen) + } +} diff --git a/random-x/src/superscalar/cpu.rs b/random-x/src/superscalar/cpu.rs new file mode 100644 index 0000000..009fd43 --- /dev/null +++ b/random-x/src/superscalar/cpu.rs @@ -0,0 +1,294 @@ +use crate::config::RANDOMX_SUPERSCALAR_LATENCY; + +/// Max cycles + highest amount of cycles on a macro op. +const CYCLE_MAP_SIZE: usize = RANDOMX_SUPERSCALAR_LATENCY + 4; + +pub(crate) enum SlotLen { + L3, + L4, + L7, + L8, + L9, + L10, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ExecutionPort { + P0, + P1, + P5, +} + +enum AllowedPorts { + One(ExecutionPort), + Two(ExecutionPort, ExecutionPort), + All, +} + +impl AllowedPorts { + fn port_allowed(&self, port: &ExecutionPort) -> bool { + match self { + AllowedPorts::One(allowed_port) => allowed_port == port, + AllowedPorts::Two(allowed_port_1, allowed_port_2) => { + allowed_port_1 == port || allowed_port_2 == port + } + AllowedPorts::All => true, + } + } +} + +pub enum MacroOp { + SUB_RR, + XOR_RR, + LEA_SIB, + IMUL_RR { dependant: bool }, + ROR_RI, + ADD_RI, + XOR_RI, + MOV_RR, + MUL_R, + IMUL_R, + MOV_RI, +} + +impl MacroOp { + pub fn cycles_to_complete(&self) -> usize { + match self { + MacroOp::SUB_RR => 1, + MacroOp::XOR_RR => 1, + MacroOp::LEA_SIB => 1, + MacroOp::IMUL_RR { .. } => 3, + MacroOp::ROR_RI => 1, + MacroOp::ADD_RI => 1, + MacroOp::XOR_RI => 1, + MacroOp::MOV_RR => 0, + MacroOp::MUL_R => 4, + MacroOp::IMUL_R => 4, + MacroOp::MOV_RI => 1, + } + } + + pub fn can_be_eliminated(&self) -> bool { + self.micro_ops_needed() == 0 + } + + pub fn is_dependant_on_last_op(&self) -> bool { + match self { + MacroOp::IMUL_RR { dependant } => *dependant, + _ => false, + } + } + + pub fn micro_ops_needed(&self) -> usize { + match self { + MacroOp::SUB_RR => 1, + MacroOp::XOR_RR => 1, + MacroOp::LEA_SIB => 1, + MacroOp::IMUL_RR { .. } => 1, + MacroOp::ROR_RI => 1, + MacroOp::ADD_RI => 1, + MacroOp::XOR_RI => 1, + MacroOp::MOV_RR => 0, + MacroOp::MUL_R => 2, + MacroOp::IMUL_R => 2, + MacroOp::MOV_RI => 1, + } + } + + fn allowed_execution_ports(&self, micro_op_index: usize) -> AllowedPorts { + match self { + MacroOp::SUB_RR => AllowedPorts::All, + MacroOp::XOR_RR => AllowedPorts::All, + MacroOp::LEA_SIB => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P1), + MacroOp::IMUL_RR { .. } => AllowedPorts::One(ExecutionPort::P1), + MacroOp::ROR_RI => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P5), + MacroOp::ADD_RI => AllowedPorts::All, + MacroOp::XOR_RI => AllowedPorts::All, + MacroOp::MOV_RR => panic!("No execution units needed for MOV_RR"), + MacroOp::MUL_R => match micro_op_index { + 0 => AllowedPorts::One(ExecutionPort::P1), + 1 => AllowedPorts::One(ExecutionPort::P5), + _ => panic!("no execution port at that index"), + }, + MacroOp::IMUL_R => match micro_op_index { + 0 => AllowedPorts::One(ExecutionPort::P1), + 1 => AllowedPorts::One(ExecutionPort::P5), + _ => panic!("no execution port at that index"), + }, + MacroOp::MOV_RI => AllowedPorts::All, + } + } +} + +/// Represents the ports availability during a single cycle. +#[derive(Debug, Default, Copy, Clone)] +struct CycleSchedule { + p0: bool, + p1: bool, + p5: bool, +} + +impl CycleSchedule { + fn space_for_micro_op(&self, allowed_ports: &AllowedPorts) -> Option { + if !self.p5 && allowed_ports.port_allowed(&ExecutionPort::P5) { + Some(ExecutionPort::P5) + } else if !self.p0 && allowed_ports.port_allowed(&ExecutionPort::P0) { + Some(ExecutionPort::P0) + } else if !self.p1 && allowed_ports.port_allowed(&ExecutionPort::P1) { + Some(ExecutionPort::P1) + } else { + None + } + } + + fn set_port_busy(&mut self, port: ExecutionPort) { + match port { + ExecutionPort::P0 => self.p0 = true, + ExecutionPort::P1 => self.p1 = true, + ExecutionPort::P5 => self.p5 = true, + } + } +} + +pub(crate) struct MacroOpOpportunity { + cycle: usize, + micro_port_0: Option, + micro_port_1: Option, +} + +impl MacroOpOpportunity { + pub fn cycle(&self) -> usize { + self.cycle + } +} + +#[derive(Debug)] +pub(crate) struct ProgramSchedule { + ports_schedule: [CycleSchedule; CYCLE_MAP_SIZE], + full: bool, +} + +impl Default for ProgramSchedule { + fn default() -> Self { + Self { + ports_schedule: [CycleSchedule::default(); CYCLE_MAP_SIZE], + full: false, + } + } +} + +impl ProgramSchedule { + pub fn set_full(&mut self) { + self.full = true; + } + + pub fn is_full(&self) -> bool { + self.full + } + + pub fn schedule_macro_op_at_earliest( + &mut self, + op: &MacroOp, + cycle: usize, + last_op_completes_at: usize, + ) -> Option { + let opportunity = self.earliest_cycle_for_macro_op(op, cycle, last_op_completes_at)?; + let cycle = opportunity.cycle(); + if let Some(port0) = opportunity.micro_port_0 { + self.schedule_micro_op(cycle, port0); + if let Some(port1) = opportunity.micro_port_1 { + self.schedule_micro_op(cycle, port1); + }; + }; + + Some(cycle) + } + + pub fn earliest_cycle_for_macro_op( + &mut self, + op: &MacroOp, + cycle: usize, + last_op_completes_at: usize, + ) -> Option { + let mut cycle = if op.is_dependant_on_last_op() { + cycle.max(last_op_completes_at) + } else { + cycle + }; + + if op.can_be_eliminated() { + return Some(MacroOpOpportunity { + cycle, + micro_port_0: None, + micro_port_1: None, + }); + } + + match op.micro_ops_needed() { + 0 => Some(MacroOpOpportunity { + cycle, + micro_port_0: None, + micro_port_1: None, + }), + 1 => self + .earliest_cycle_for_mirco_op(&op.allowed_execution_ports(0), cycle) + .map(|(cycle, micro_port_0)| MacroOpOpportunity { + cycle, + micro_port_0: Some(micro_port_0), + micro_port_1: None, + }), + 2 => { + // both ops must happen in the same cycle + let allowed_0 = op.allowed_execution_ports(0); + let allowed_1 = op.allowed_execution_ports(1); + + while cycle < CYCLE_MAP_SIZE { + let (min_0_cycle, port_0) = + self.earliest_cycle_for_mirco_op(&allowed_0, cycle)?; + let (min_1_cycle, port_1) = + self.earliest_cycle_for_mirco_op(&allowed_1, cycle)?; + + if min_0_cycle == min_1_cycle { + return Some(MacroOpOpportunity { + cycle: min_0_cycle, + micro_port_0: Some(port_0), + micro_port_1: Some(port_1), + }); + } else { + cycle += 1; + } + } + None + } + _ => unreachable!(), + } + } + + fn schedule_micro_op_at_earliest( + &mut self, + allowed_ports: &AllowedPorts, + cycle: usize, + ) -> Option { + let (cycle, port) = self.earliest_cycle_for_mirco_op(allowed_ports, cycle)?; + self.schedule_micro_op(cycle, port); + Some(cycle) + } + + fn schedule_micro_op(&mut self, cycle: usize, port: ExecutionPort) { + self.ports_schedule[cycle].set_port_busy(port) + } + + fn earliest_cycle_for_mirco_op( + &mut self, + allowed_ports: &AllowedPorts, + cycle: usize, + ) -> Option<(usize, ExecutionPort)> { + for (cycle, cycle_schedule) in self.ports_schedule.iter().enumerate().skip(cycle) { + if let Some(port) = cycle_schedule.space_for_micro_op(allowed_ports) { + return Some((cycle, port)); + } + } + self.full = true; + None + } +} diff --git a/random-x/src/superscalar/executor.rs b/random-x/src/superscalar/executor.rs new file mode 100644 index 0000000..fef9037 --- /dev/null +++ b/random-x/src/superscalar/executor.rs @@ -0,0 +1,105 @@ +use crate::registers::RGroupRegisters; +use crate::superscalar::instructions::ScalarInstruction; + +const P2EXP63: u64 = 1 << 63; + +pub fn execute(program: &[ScalarInstruction], registers: &mut RGroupRegisters) { + for instruction in program { + match instruction { + ScalarInstruction::ISUB_R { dst, src } => { + let op = |dst_val: u64, src_val| dst_val.wrapping_sub(src_val); + registers.apply_to_dst_with_src(dst, src, op); + } + ScalarInstruction::IXOR_R { dst, src } => { + let op = |dst_val: u64, src_val| dst_val ^ src_val; + registers.apply_to_dst_with_src(dst, src, op); + } + ScalarInstruction::IADD_RS { + dst, + src, + mod_shift, + } => { + let op = |dst_val: u64, src_val| { + dst_val.wrapping_add(src_val << clamp_mod_shift(*mod_shift)) + }; + registers.apply_to_dst_with_src(dst, src, op); + } + ScalarInstruction::IMUL_R { dst, src } => { + let op = |dst_val: u64, src_val| dst_val.wrapping_mul(src_val); + registers.apply_to_dst_with_src(dst, src, op); + } + ScalarInstruction::IROR_C { dst, imm32 } => { + let op = |dst_val: u64| dst_val.rotate_right(*imm32); + registers.apply_to_dst(dst, op); + } + ScalarInstruction::IADD_C { dst, imm32 } => { + let op = |dst_val: u64| dst_val.wrapping_add(sign_extend_2s_compl(*imm32)); + registers.apply_to_dst(dst, op); + } + ScalarInstruction::IXOR_C { dst, imm32 } => { + let op = |dst_val: u64| dst_val ^ sign_extend_2s_compl(*imm32); + registers.apply_to_dst(dst, op); + } + ScalarInstruction::IMULH_R { dst, src } => { + registers.apply_to_dst_with_src(dst, src, high_mul); + } + ScalarInstruction::ISMULH_R { dst, src } => { + let op = |dst_val: u64, src_val: u64| { + signed_high_mul(dst_val as i64, src_val as i64) as u64 + }; + registers.apply_to_dst_with_src(dst, src, op); + } + ScalarInstruction::IMUL_RCP { dst, imm32 } => { + let op = |dst_val: u64| dst_val.wrapping_mul(randomx_reciprocal(*imm32 as u64)); + registers.apply_to_dst(dst, op); + } + } + } +} + +pub fn randomx_reciprocal(divisor: u64) -> u64 { + assert!(!divisor.is_power_of_two()); + assert_ne!(divisor, 0); + + let mut quotient = P2EXP63 / divisor; + let mut remainder = P2EXP63 % divisor; + let mut bsr = 0; + + let mut bit = divisor; + + while bit > 0 { + bsr += 1; + bit >>= 1; + } + + for _ in 0..bsr { + if remainder >= divisor.wrapping_sub(remainder) { + quotient = quotient.wrapping_mul(2).wrapping_add(1); + remainder = remainder.wrapping_mul(2).wrapping_sub(divisor); + } else { + quotient = quotient.wrapping_mul(2); + remainder = remainder.wrapping_mul(2); + } + } + quotient +} + +fn high_mul(a: u64, b: u64) -> u64 { + ((a as u128 * b as u128) >> 64) as u64 +} + +fn signed_high_mul(a: i64, b: i64) -> i64 { + ((a as i128 * b as i128) >> 64) as i64 +} + +pub fn sign_extend_2s_compl(imm: u32) -> u64 { + if imm > i32::MAX as u32 { + imm as u64 | 0xffffffff00000000 + } else { + imm as u64 + } +} + +fn clamp_mod_shift(x: u8) -> u64 { + (x as u64 >> 2) % 4 +} diff --git a/random-x/src/superscalar/generator.rs b/random-x/src/superscalar/generator.rs new file mode 100644 index 0000000..57f7d08 --- /dev/null +++ b/random-x/src/superscalar/generator.rs @@ -0,0 +1,797 @@ +use std::cmp::Ordering; + +use crate::config::SUPERSCALAR_MAX_SIZE; +use crate::registers::{RGroupRegisterID, RGroupRegisters}; +use crate::superscalar::cpu::{MacroOp, ProgramSchedule, SlotLen}; +use crate::superscalar::instructions::ScalarInstruction; +use crate::superscalar::SSProgram; +use crate::{ + blake2_generator::Blake2Generator, + config::RANDOMX_SUPERSCALAR_LATENCY, + is_0_or_power_of_2, + superscalar::instructions::{OpSource, ScalarInstructionID}, +}; + +const LOOK_FORWARD_CYCLES: usize = 4; +const MAX_THROWAWAY_COUNT: usize = 256; + +/// Groups of 3 or 4 Macro-op slots that sum to 16 +/// +/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#631-decoding-stage +/// table 6.3.1 +#[derive(Eq, PartialEq, Copy, Clone)] +enum DecoderGroup { + /// 0: 4-8-4 + D484, + /// 1: 7-3-3-3 + D7333, + /// 2: 3-7-3-3 + D3733, + /// 3: 4-9-3 + D493, + + /// 4: 4-4-4-4 + D4444, + /// 5: 3-3-10 + D3310, +} + +impl DecoderGroup { + fn slot_len(&self, index: usize) -> Option { + match self { + DecoderGroup::D484 => match index { + 0 | 2 => Some(SlotLen::L4), + 1 => Some(SlotLen::L8), + _ => None, + }, + DecoderGroup::D7333 => match index { + 0 => Some(SlotLen::L7), + 1..=3 => Some(SlotLen::L3), + _ => None, + }, + DecoderGroup::D3733 => match index { + 0 | 2 | 3 => Some(SlotLen::L3), + 1 => Some(SlotLen::L7), + _ => None, + }, + DecoderGroup::D493 => match index { + 0 => Some(SlotLen::L4), + 1 => Some(SlotLen::L9), + 2 => Some(SlotLen::L3), + _ => None, + }, + DecoderGroup::D4444 => match index { + 0..=3 => Some(SlotLen::L4), + _ => None, + }, + DecoderGroup::D3310 => match index { + 0 | 1 => Some(SlotLen::L3), + 2 => Some(SlotLen::L10), + _ => None, + }, + } + } + + /// Returns an iterator over the lengths with a bool `is_last` + pub fn iter_slot_len(&self) -> impl Iterator + '_ { + (0..self.size()).map(|i| (self.slot_len(i).unwrap(), self.size() - 1 == i)) + } + + pub fn size(&self) -> usize { + match self { + DecoderGroup::D484 => 3, + DecoderGroup::D7333 => 4, + DecoderGroup::D3733 => 4, + DecoderGroup::D493 => 3, + DecoderGroup::D4444 => 4, + DecoderGroup::D3310 => 3, + } + } + + fn next_group( + gen: &mut Blake2Generator, + instruction: Option, + total_muls_low: bool, + ) -> DecoderGroup { + if matches!( + instruction, + Some(ScalarInstructionID::IMULH_R) | Some(ScalarInstructionID::ISMULH_R) + ) { + return DecoderGroup::D3310; + } + + if total_muls_low { + return DecoderGroup::D4444; + } + + if instruction == Some(ScalarInstructionID::IMUL_RCP) { + return match (gen.next_u8() & 1).cmp(&1) { + Ordering::Equal => DecoderGroup::D484, + Ordering::Less => DecoderGroup::D493, + Ordering::Greater => unreachable!(), + }; + } + + match gen.next_u8() & 3 { + 0 => DecoderGroup::D484, + 1 => DecoderGroup::D7333, + 2 => DecoderGroup::D3733, + 3 => DecoderGroup::D493, + _ => unreachable!(), + } + } +} + +#[derive(Debug, Copy, Clone)] +pub(crate) struct SingleRegisterInfo { + id: RGroupRegisterID, + next_ready: usize, + last_instruction: Option, + last_source: OpSource, +} + +impl SingleRegisterInfo { + pub fn id(&self) -> RGroupRegisterID { + self.id + } + pub fn next_ready(&self) -> usize { + self.next_ready + } + pub fn last_instruction(&self) -> Option { + self.last_instruction + } + pub fn last_source(&self) -> OpSource { + self.last_source + } + pub fn set_next_ready(&mut self, next_ready: usize) { + self.next_ready = next_ready + } + pub fn set_last_instruction(&mut self, last_instruction: ScalarInstructionID) { + self.last_instruction = Some(last_instruction); + } + pub fn set_last_source(&mut self, last_source: OpSource) { + self.last_source = last_source + } +} + +#[derive(Debug)] +pub(crate) struct RegistersInfo { + registers: [SingleRegisterInfo; 8], +} + +impl Default for RegistersInfo { + fn default() -> Self { + let default = SingleRegisterInfo { + id: RGroupRegisterID::R0, + next_ready: 0, + last_instruction: None, + last_source: OpSource::Constant, + }; + let mut default = [default; 8]; + let reg_ids = [ + RGroupRegisterID::R1, + RGroupRegisterID::R2, + RGroupRegisterID::R3, + RGroupRegisterID::R4, + RGroupRegisterID::R5, + RGroupRegisterID::R6, + RGroupRegisterID::R7, + ]; + for (reg, id) in default.iter_mut().skip(1).zip(reg_ids) { + reg.id = id; + } + RegistersInfo { registers: default } + } +} + +impl RegistersInfo { + pub fn iter(&self) -> impl Iterator { + self.registers.iter() + } + pub fn ready_at_cycle(&self, cycle: usize) -> Vec<&SingleRegisterInfo> { + self.registers + .iter() + .filter(|reg| reg.next_ready <= cycle) + .collect::>() + } + pub fn get_mut(&mut self, id: RGroupRegisterID) -> &mut SingleRegisterInfo { + &mut self.registers[id as usize] + } +} + +pub(crate) fn select_register( + gen: &mut Blake2Generator, + available: &[&SingleRegisterInfo], +) -> Option { + if available.is_empty() { + return None; + } + let index = if available.len() > 1 { + // available is <= 8 so as is safe + (gen.next_u32() % available.len() as u32) + .try_into() + .expect("Could not fit u32 into usize") + } else { + 0 + }; + + Some(available[index].id) +} + +/// Returns an imm32 if the instruction requires one. +fn get_imm32(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option { + match id { + ScalarInstructionID::IADD_C | ScalarInstructionID::IXOR_C => Some(gen.next_u32()), + ScalarInstructionID::IROR_C => { + // imm32 % 64 != 0 + Some( + loop { + let imm8 = gen.next_u8() & 63; + if imm8 != 0 { + break imm8; + } + } + .into(), + ) + } + ScalarInstructionID::IMUL_RCP => { + // imm32 != 0, imm32 != 2N + Some(loop { + let imm32 = gen.next_u32(); + if !is_0_or_power_of_2(imm32.into()) { + break imm32; + } + }) + } + _ => None, + } +} + +fn get_mod_shift(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option { + match id { + ScalarInstructionID::IADD_RS => Some(gen.next_u8()), + _ => None, + } +} + +/// Used during [`ScalarInstructionBuilder`] creation. Returns the [`OpSource`] to give the register +/// if this is known otherwise [`None`] is returned and this field will be filled later. +fn get_src_to_give_register( + gen: &mut Blake2Generator, + id: &ScalarInstructionID, +) -> Option { + match id { + ScalarInstructionID::IADD_C + | ScalarInstructionID::IXOR_C + | ScalarInstructionID::IROR_C + | ScalarInstructionID::IMUL_RCP => Some(OpSource::Constant), + ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => { + // not actually the source value, the Monero C++ version sets this field to a random + // value, this has an issue of becoming an actual meaningful value though so we handle + // those rare cases here: + Some(OpSource::from_rand_i32(gen.next_u32() as i32)) + } + _ => None, + } +} + +struct ScalarInstructionBuilder { + /// The id of the instruction we are building. + id: ScalarInstructionID, + /// The true source register - the one we are actually getting the value from will be + /// None if this instruction doesn't need a register source. + true_src: Option, + /// The value src we tell the dst register, if this is a register then most of the time this + /// is the same as [`true_src`] but for `IMULH_R` and `ISMULH_R` it's not. + /// + /// `IMULH_R` and `ISMULH_R` generate a random i32 and set it for this slot . + src_to_give_register: Option, + /// The destination register for this instruction. + dst: Option, + /// A constant used in some instructions. + imm32: Option, + /// used in IADD_RS + mod_shift: Option, +} + +impl ScalarInstructionBuilder { + /// Creates a new [`ScalarInstructionBuilder`]. + /// + pub fn new( + gen: &mut Blake2Generator, + slot_len: &SlotLen, + group: &DecoderGroup, + is_last: bool, + ) -> Self { + // https://github.com/tevador/RandomX/blob/master/doc/specs.md#632-instruction-selection + let id = match slot_len { + SlotLen::L3 if !is_last => match gen.next_u8() & 1 { + 0 => ScalarInstructionID::ISUB_R, + _ => ScalarInstructionID::IXOR_R, + }, + SlotLen::L3 => match gen.next_u8() & 3 { + 0 => ScalarInstructionID::ISUB_R, + 1 => ScalarInstructionID::IXOR_R, + 2 => ScalarInstructionID::IMULH_R, + _ => ScalarInstructionID::ISMULH_R, + }, + SlotLen::L4 if group == &DecoderGroup::D4444 && !is_last => ScalarInstructionID::IMUL_R, + SlotLen::L4 => match gen.next_u8() & 1 { + 0 => ScalarInstructionID::IROR_C, + _ => ScalarInstructionID::IADD_RS, + }, + SlotLen::L7 | SlotLen::L8 | SlotLen::L9 => match gen.next_u8() & 1 { + 0 => ScalarInstructionID::IXOR_C, + _ => ScalarInstructionID::IADD_C, + }, + SlotLen::L10 => ScalarInstructionID::IMUL_RCP, + }; + + Self { + id, + true_src: None, + src_to_give_register: get_src_to_give_register(gen, &id), + dst: None, + imm32: get_imm32(gen, &id), + mod_shift: get_mod_shift(gen, &id), + } + } + + /// Set the source of the operation + fn set_src(&mut self, src: RGroupRegisterID) { + self.true_src = Some(src); + if self.src_to_give_register.is_none() { + // If the src_to_give_register field hasn't already been set then set it now. + // The only fields that have true_src as a register with a different src_to_give_register + // set this field at the start. + self.src_to_give_register = Some(OpSource::Register(src)); + } + } + + /// Select the source of this operation from the given registers. + /// + /// If no registers are available [`false`] is returned. + pub fn select_source( + &mut self, + gen: &mut Blake2Generator, + cycle: usize, + registers_info: &RegistersInfo, + ) -> bool { + let available_registers = registers_info.ready_at_cycle(cycle); + //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination + if available_registers.len() == 2 + && self.id == ScalarInstructionID::IADD_RS + && (available_registers[0].id() == RGroupRegisterID::R5 + || available_registers[1].id() == RGroupRegisterID::R5) + { + self.set_src(RGroupRegisterID::R5); + return true; + } + if let Some(reg) = select_register(gen, &available_registers) { + self.set_src(reg); + return true; + }; + + false + } + + /// Selects the destination of this operation from the given registers. + /// + /// If no registers are available [`false`] is returned. + fn select_destination( + &mut self, + gen: &mut Blake2Generator, + cycle: usize, + allow_chain_mul: bool, + registers_info: &RegistersInfo, + ) -> bool { + let available_registers = registers_info + .iter() + .filter(|reg| { + reg.next_ready() <= cycle + && (self.id.can_dst_be_src() || Some(reg.id()) != self.true_src) + && (allow_chain_mul + || self.id.instruction_group() != ScalarInstructionID::IMUL_R + || reg.last_instruction() != Some(ScalarInstructionID::IMUL_R)) + && (Some(self.id.instruction_group()) != reg.last_instruction() + || self.src_to_give_register != Some(reg.last_source())) + && (reg.id() != RGroupRegisterID::R5 || self.id != ScalarInstructionID::IADD_RS) + }) + .collect::>(); + let Some(reg) = select_register(gen, &available_registers) else { + return false; + }; + self.dst = Some(reg); + true + } + + fn construct(self) -> ScalarInstruction { + match self.id { + ScalarInstructionID::ISUB_R => ScalarInstruction::ISUB_R { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + }, + ScalarInstructionID::IXOR_R => ScalarInstruction::IXOR_R { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + }, + ScalarInstructionID::IADD_RS => ScalarInstruction::IADD_RS { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + mod_shift: self.mod_shift.unwrap(), + }, + ScalarInstructionID::IMUL_R => ScalarInstruction::IMUL_R { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + }, + ScalarInstructionID::IROR_C => ScalarInstruction::IROR_C { + dst: self.dst.unwrap(), + imm32: self.imm32.unwrap(), + }, + ScalarInstructionID::IADD_C => ScalarInstruction::IADD_C { + dst: self.dst.unwrap(), + imm32: self.imm32.unwrap(), + }, + ScalarInstructionID::IXOR_C => ScalarInstruction::IXOR_C { + dst: self.dst.unwrap(), + imm32: self.imm32.unwrap(), + }, + ScalarInstructionID::IMULH_R => ScalarInstruction::IMULH_R { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + }, + ScalarInstructionID::ISMULH_R => ScalarInstruction::ISMULH_R { + dst: self.dst.unwrap(), + src: self.true_src.unwrap(), + }, + ScalarInstructionID::IMUL_RCP => ScalarInstruction::IMUL_RCP { + dst: self.dst.unwrap(), + imm32: self.imm32.unwrap(), + }, + } + } +} + +#[derive(Debug, Default)] +struct ProgramState { + /// The current cycle we are generating for. + current_cycle: usize, + /// The cycle the last operation will complete at. + last_op_completes_at: usize, + + /// The amount of multiplication instructions the program + /// has generated. + mul_count: usize, + /// The amount of instructions in a row the program has thrown + /// away because they couldn't be completed. + throw_away_count: usize, + /// The execution port schedule of the program. + program_schedule: ProgramSchedule, + /// Information on the registers state. + registers_info: RegistersInfo, + /// The program + program: Vec, +} + +impl ProgramState { + fn allow_chain_mul(&self) -> bool { + self.throw_away_count > 0 + } +} + +/// A state machine that controls instruction generation. +enum ScalarInstructionBuilderSM { + /// The generate instruction state, the next call will + /// start a new instruction. + Generate { + /// The last instruction generated. + last_instruction: Option, + }, + /// A partially completed instruction, the next call will + /// push this instruction forward. + PartiallyComplete { + /// The instruction currently being generated. + builder: ScalarInstructionBuilder, + /// The macro op of the instruction we are going + /// to do next. + macro_op_idx: usize, + }, + /// NULL state, this state will only be finished on is the program is full. + NULL, +} + +impl ScalarInstructionBuilderSM { + pub fn push_forward( + &mut self, + gen: &mut Blake2Generator, + decoder_group: &DecoderGroup, + slot_len: &SlotLen, + is_last_slot: bool, + program_state: &mut ProgramState, + ) { + loop { + match std::mem::replace(self, ScalarInstructionBuilderSM::NULL) { + ScalarInstructionBuilderSM::NULL => { + return; + } + ScalarInstructionBuilderSM::Generate { .. } => { + if program_state.program_schedule.is_full() + || program_state.program.len() >= SUPERSCALAR_MAX_SIZE + { + return; + } + + let builder = + ScalarInstructionBuilder::new(gen, slot_len, decoder_group, is_last_slot); + + *self = ScalarInstructionBuilderSM::PartiallyComplete { + builder, + macro_op_idx: 0, + }; + } + ScalarInstructionBuilderSM::PartiallyComplete { + mut builder, + mut macro_op_idx, + } => { + let top_cycle = program_state.current_cycle; + + if macro_op_idx >= builder.id.number_of_macro_ops() { + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: Some(builder.id), + }; + continue; + } + + let Some(next_macro_op) = builder.id.macro_op(macro_op_idx) else { + unreachable!("We just checked if the macro op idx is too high") + }; + + let Some(opportunity) = + program_state.program_schedule.earliest_cycle_for_macro_op( + &next_macro_op, + program_state.current_cycle, + program_state.last_op_completes_at, + ) + else { + program_state.program_schedule.set_full(); + return; + }; + + let mut scheduled_cycle = opportunity.cycle(); + + if !Self::check_set_src( + &mut builder, + macro_op_idx, + gen, + &mut scheduled_cycle, + &mut program_state.current_cycle, + &program_state.registers_info, + ) { + // If the source couldn't be set throw the instruction away + if program_state.throw_away_count < MAX_THROWAWAY_COUNT { + program_state.throw_away_count += 1; + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: Some(builder.id), + }; + continue; + } + // If too many instructions are thrown away return for the next decoder + // idx + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: None, + }; + return; + } + + let allow_chain_mul = program_state.allow_chain_mul(); + + if !Self::check_set_dst( + &mut builder, + macro_op_idx, + gen, + &mut scheduled_cycle, + &mut program_state.current_cycle, + allow_chain_mul, + &program_state.registers_info, + ) { + // If the source couldn't be set throw the instruction away + if program_state.throw_away_count < MAX_THROWAWAY_COUNT { + program_state.throw_away_count += 1; + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: Some(builder.id), + }; + continue; + } + // If too many instructions are thrown away return for the next decoder + // idx + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: None, + }; + return; + } + + program_state.throw_away_count = 0; + + let Some(scheduled_cycle) = program_state + .program_schedule + .schedule_macro_op_at_earliest( + &next_macro_op, + scheduled_cycle, + program_state.last_op_completes_at, + ) + else { + program_state.program_schedule.set_full(); + return; + }; + + let completes_at = scheduled_cycle + next_macro_op.cycles_to_complete(); + program_state.last_op_completes_at = completes_at; + + if macro_op_idx == builder.id.macro_op_to_store_res() { + let reg = program_state.registers_info.get_mut(builder.dst.unwrap()); + reg.set_next_ready(completes_at); + reg.set_last_source(builder.src_to_give_register.unwrap()); + reg.set_last_instruction(builder.id.instruction_group()); + } + + macro_op_idx += 1; + program_state.current_cycle = top_cycle; + + if scheduled_cycle >= RANDOMX_SUPERSCALAR_LATENCY { + program_state.program_schedule.set_full(); + } + + if macro_op_idx >= builder.id.number_of_macro_ops() { + if builder.id.is_multiplication() { + program_state.mul_count += 1; + } + *self = ScalarInstructionBuilderSM::Generate { + last_instruction: Some(builder.id), + }; + program_state.program.push(builder.construct()); + } else { + *self = ScalarInstructionBuilderSM::PartiallyComplete { + builder, + macro_op_idx, + }; + } + return; + } + } + } + } + + /// Try set the instructions source. + /// + /// Will return true if the src has been set or if its not the correct macro op to set the dst. + /// + /// Will return false if its the correct macro op to set the dst and the src couldn't be set. + fn check_set_dst( + builder: &mut ScalarInstructionBuilder, + macro_op_idx: usize, + gen: &mut Blake2Generator, + scheduled_cycle: &mut usize, + cycle: &mut usize, + allow_chain_mul: bool, + registers_info: &RegistersInfo, + ) -> bool { + if builder.id.macro_op_to_select_dst() != macro_op_idx { + // We don't need to set the src at this macro op. + return true; + } + + let mut set = false; + for _ in 0..LOOK_FORWARD_CYCLES { + if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, ®isters_info) + { + *scheduled_cycle += 1; + *cycle += 1; + } else { + set = true; + break; + } + } + + set + } + + /// Try set the instructions source. + /// + /// Will return true if the src has been set or if its not he correct macro op to set the src. + /// + /// Will return false if its the correct macro op to set the src and the src couldn't be set. + fn check_set_src( + builder: &mut ScalarInstructionBuilder, + macro_op_idx: usize, + gen: &mut Blake2Generator, + scheduled_cycle: &mut usize, + cycle: &mut usize, + registers_info: &RegistersInfo, + ) -> bool { + if builder.id.macro_op_to_select_src() != Some(macro_op_idx) { + // We don't need to set the src at this macro op. + return true; + } + + let mut set = false; + for _ in 0..LOOK_FORWARD_CYCLES { + if !builder.select_source(gen, *scheduled_cycle, registers_info) { + *scheduled_cycle += 1; + *cycle += 1; + } else { + set = true; + break; + } + } + + set + } + + pub fn get_instruction_id(&self) -> Option { + match self { + ScalarInstructionBuilderSM::Generate { last_instruction } => *last_instruction, + ScalarInstructionBuilderSM::PartiallyComplete { builder, .. } => Some(builder.id), + ScalarInstructionBuilderSM::NULL => { + panic!("Should not be calling this function in this state") + } + } + } +} + +pub(crate) fn generate(gen: &mut Blake2Generator) -> SSProgram { + let mut program_state = ProgramState::default(); + + let mut instruction_sm = ScalarInstructionBuilderSM::Generate { + last_instruction: None, + }; + + for decoder_cycle in 0..RANDOMX_SUPERSCALAR_LATENCY { + if program_state.program_schedule.is_full() + || program_state.program.len() >= SUPERSCALAR_MAX_SIZE + { + break; + } + let current_decode_group = DecoderGroup::next_group( + gen, + instruction_sm.get_instruction_id(), + program_state.mul_count < decoder_cycle + 1, + ); + + for (slot_len, is_last) in current_decode_group.iter_slot_len() { + instruction_sm.push_forward( + gen, + ¤t_decode_group, + &slot_len, + is_last, + &mut program_state, + ); + } + program_state.current_cycle += 1; + } + + //Calculate ASIC latency: + //Assumes 1 cycle latency for all operations and unlimited parallelization. + let mut asic_latencies = RGroupRegisters::default(); + for instr in program_state.program.iter() { + let mut latency_dst = asic_latencies.get(&instr.dst()); + latency_dst += 1; + let latency_src = if let Some(src) = instr.src() { + asic_latencies.get(&src) + 1 + } else { + 0 + }; + asic_latencies.set(&instr.dst(), latency_src.max(latency_dst)); + } + + let mut reg_with_max_latency = RGroupRegisterID::R0; + for reg in RGroupRegisterID::iter().skip(1) { + if asic_latencies.get(®) > asic_latencies.get(®_with_max_latency) { + reg_with_max_latency = reg + } + } + + SSProgram { + program: program_state.program, + reg_with_max_latency, + } +} diff --git a/random-x/src/superscalar/instructions.rs b/random-x/src/superscalar/instructions.rs new file mode 100644 index 0000000..7c84fab --- /dev/null +++ b/random-x/src/superscalar/instructions.rs @@ -0,0 +1,264 @@ +use crate::registers::RGroupRegisterID; +use crate::superscalar::cpu::{MacroOp, SlotLen}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +pub enum ScalarInstructionID { + /// dst = dst - src + ISUB_R, + /// dst = dst ^ src + IXOR_R, + /// dst = dst + (src << mod_shift) + IADD_RS, + /// dst = dst * src + IMUL_R, + /// dst = dst >>> imm32 + IROR_C, + /// dst = dst + imm32 + IADD_C, + /// dst = dst ^ imm32 + IXOR_C, + /// dst = (dst * src) >> 64 + IMULH_R, + /// dst = (dst * src) >> 64 (signed) + ISMULH_R, + /// dst = 2x / imm32 * dst + IMUL_RCP, +} + +impl ScalarInstructionID { + pub fn macro_op_to_select_src(&self) -> Option { + match self { + ScalarInstructionID::ISUB_R + | ScalarInstructionID::IXOR_R + | ScalarInstructionID::IADD_RS + | ScalarInstructionID::IMUL_R => Some(0), + ScalarInstructionID::IROR_C + | ScalarInstructionID::IADD_C + | ScalarInstructionID::IXOR_C => None, + ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => Some(1), + ScalarInstructionID::IMUL_RCP => None, + } + } + + pub fn macro_op_to_select_dst(&self) -> usize { + match self { + ScalarInstructionID::IMUL_RCP => 1, + _ => 0, + } + } + + pub fn macro_op_to_store_res(&self) -> usize { + match self { + ScalarInstructionID::IMULH_R + | ScalarInstructionID::ISMULH_R + | ScalarInstructionID::IMUL_RCP => 1, + _ => 0, + } + } + + pub fn is_multiplication(&self) -> bool { + matches!( + self, + ScalarInstructionID::IMUL_R + | ScalarInstructionID::IMULH_R + | ScalarInstructionID::ISMULH_R + | ScalarInstructionID::IMUL_RCP + ) + } + /// is the destination allowed to be the same as the source + pub fn can_dst_be_src(&self) -> bool { + matches!( + self, + ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R + ) + } + + /// Returns the group of this operation. + /// + /// A group is related instructions that effect register choice during program construction. + pub fn instruction_group(&self) -> ScalarInstructionID { + match self { + // The only 2 instructions in the same group is ISUB_R & IADD_RS + // We could make group an enum but for just these 2 i don't think + // it's worth it. + ScalarInstructionID::ISUB_R => ScalarInstructionID::IADD_RS, + id => *id, + } + } + + pub fn number_of_macro_ops(&self) -> usize { + match self { + ScalarInstructionID::ISUB_R + | ScalarInstructionID::IXOR_R + | ScalarInstructionID::IADD_RS + | ScalarInstructionID::IMUL_R + | ScalarInstructionID::IROR_C + | ScalarInstructionID::IADD_C + | ScalarInstructionID::IXOR_C => 1, + ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => 3, + ScalarInstructionID::IMUL_RCP => 2, + } + } + + pub fn macro_op(&self, i: usize) -> Option { + Some(match self { + ScalarInstructionID::ISUB_R => MacroOp::SUB_RR, + ScalarInstructionID::IXOR_R => MacroOp::XOR_RR, + ScalarInstructionID::IADD_RS => MacroOp::LEA_SIB, + ScalarInstructionID::IMUL_R => MacroOp::IMUL_RR { dependant: false }, + ScalarInstructionID::IROR_C => MacroOp::ROR_RI, + ScalarInstructionID::IADD_C => MacroOp::ADD_RI, + ScalarInstructionID::IXOR_C => MacroOp::XOR_RI, + ScalarInstructionID::IMULH_R => match i { + 0 => MacroOp::MOV_RR, + 1 => MacroOp::MUL_R, + 2 => MacroOp::MOV_RR, + _ => return None, + }, + ScalarInstructionID::ISMULH_R => match i { + 0 => MacroOp::MOV_RR, + 1 => MacroOp::IMUL_R, + 2 => MacroOp::MOV_RR, + _ => return None, + }, + ScalarInstructionID::IMUL_RCP => match i { + 0 => MacroOp::MOV_RI, + 1 => MacroOp::IMUL_RR { dependant: true }, + _ => return None, + }, + }) + } +} + +#[derive(Debug, Copy, Clone)] +#[allow(non_camel_case_types)] +pub enum ScalarInstruction { + /// dst = dst - src + ISUB_R { + dst: RGroupRegisterID, + src: RGroupRegisterID, + }, + /// dst = dst ^ src + IXOR_R { + dst: RGroupRegisterID, + src: RGroupRegisterID, + }, + /// dst = dst + (src << mod_shift) + IADD_RS { + dst: RGroupRegisterID, + src: RGroupRegisterID, + mod_shift: u8, + }, + /// dst = dst * src + IMUL_R { + dst: RGroupRegisterID, + src: RGroupRegisterID, + }, + /// dst = dst >>> imm32 + IROR_C { dst: RGroupRegisterID, imm32: u32 }, + /// dst = dst + imm32 + IADD_C { dst: RGroupRegisterID, imm32: u32 }, + /// dst = dst ^ imm32 + IXOR_C { dst: RGroupRegisterID, imm32: u32 }, + /// dst = (dst * src) >> 64 + IMULH_R { + dst: RGroupRegisterID, + src: RGroupRegisterID, + }, + /// dst = (dst * src) >> 64 (signed) + ISMULH_R { + dst: RGroupRegisterID, + src: RGroupRegisterID, + }, + /// dst = 2x / imm32 * dst + IMUL_RCP { dst: RGroupRegisterID, imm32: u32 }, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum OpSource { + Constant, + Register(RGroupRegisterID), + /// Not actually a source, but the C++ version sets this field to a + /// random value on some instructions. + Randi32(i32), +} + +impl OpSource { + pub fn from_rand_i32(x: i32) -> Self { + match x { + -1 => OpSource::Constant, + 0 => OpSource::Register(RGroupRegisterID::R0), + 1 => OpSource::Register(RGroupRegisterID::R1), + 2 => OpSource::Register(RGroupRegisterID::R2), + 3 => OpSource::Register(RGroupRegisterID::R3), + 4 => OpSource::Register(RGroupRegisterID::R4), + 5 => OpSource::Register(RGroupRegisterID::R5), + 6 => OpSource::Register(RGroupRegisterID::R6), + 7 => OpSource::Register(RGroupRegisterID::R7), + rand => OpSource::Randi32(rand), + } + } +} + +impl ScalarInstruction { + pub fn dst(&self) -> RGroupRegisterID { + match self { + ScalarInstruction::ISUB_R { dst, .. } + | ScalarInstruction::IXOR_R { dst, .. } + | ScalarInstruction::IADD_RS { dst, .. } + | ScalarInstruction::IMUL_R { dst, .. } + | ScalarInstruction::IROR_C { dst, .. } + | ScalarInstruction::IADD_C { dst, .. } + | ScalarInstruction::IXOR_C { dst, .. } + | ScalarInstruction::IMULH_R { dst, .. } + | ScalarInstruction::ISMULH_R { dst, .. } + | ScalarInstruction::IMUL_RCP { dst, .. } => *dst, + } + } + + pub fn src(&self) -> Option { + match self { + ScalarInstruction::ISUB_R { src, .. } + | ScalarInstruction::IXOR_R { src, .. } + | ScalarInstruction::IADD_RS { src, .. } + | ScalarInstruction::IMUL_R { src, .. } + | ScalarInstruction::IMULH_R { src, .. } + | ScalarInstruction::ISMULH_R { src, .. } => Some(*src), + ScalarInstruction::IROR_C { .. } + | ScalarInstruction::IADD_C { .. } + | ScalarInstruction::IXOR_C { .. } + | ScalarInstruction::IMUL_RCP { .. } => None, + } + } + + pub fn id(&self) -> ScalarInstructionID { + match self { + ScalarInstruction::ISUB_R { .. } => ScalarInstructionID::ISUB_R, + ScalarInstruction::IXOR_R { .. } => ScalarInstructionID::IXOR_R, + ScalarInstruction::IADD_RS { .. } => ScalarInstructionID::IADD_RS, + ScalarInstruction::IMUL_R { .. } => ScalarInstructionID::IMUL_R, + ScalarInstruction::IROR_C { .. } => ScalarInstructionID::IROR_C, + ScalarInstruction::IADD_C { .. } => ScalarInstructionID::IADD_C, + ScalarInstruction::IXOR_C { .. } => ScalarInstructionID::IXOR_C, + ScalarInstruction::IMULH_R { .. } => ScalarInstructionID::IMULH_R, + ScalarInstruction::ISMULH_R { .. } => ScalarInstructionID::ISMULH_R, + ScalarInstruction::IMUL_RCP { .. } => ScalarInstructionID::IMUL_RCP, + } + } + + pub fn op_source(&self) -> OpSource { + match self { + ScalarInstruction::ISUB_R { src, .. } + | ScalarInstruction::IXOR_R { src, .. } + | ScalarInstruction::IADD_RS { src, .. } + | ScalarInstruction::IMUL_R { src, .. } + | ScalarInstruction::IMULH_R { src, .. } + | ScalarInstruction::ISMULH_R { src, .. } => OpSource::Register(*src), + ScalarInstruction::IROR_C { .. } + | ScalarInstruction::IADD_C { .. } + | ScalarInstruction::IXOR_C { .. } + | ScalarInstruction::IMUL_RCP { .. } => OpSource::Constant, + } + } +}