remove randomX lib

This commit is contained in:
Boog900 2023-10-29 00:08:37 +01:00
parent 216bedaf06
commit 2440ccbd8d
No known key found for this signature in database
GPG key ID: 5401367FB7302004
14 changed files with 0 additions and 2008 deletions

View file

@ -5,7 +5,6 @@ members = [
"common", "common",
"consensus", "consensus",
"cryptonight", "cryptonight",
"random-x",
#"cuprate", #"cuprate",
# "database", # "database",
"net/levin", "net/levin",

View file

@ -1,22 +0,0 @@
[package]
name = "random-x"
version = "0.1.0"
edition = "2021"
[features]
default = ["jit"]
jit = ["dep:dynasmrt"]
rayon = ["dep:rayon"]
[dependencies]
blake2 = "0.10"
argon2 = "0.5"
aes = {version = "0.8", features = ["hazmat"]}
hex-literal = "0.4"
dynasmrt = {version = "2.0.0", optional = true}
rayon = {version ="1.7", optional = true}
[profile.dev]
opt-level = 3

View file

@ -1,153 +0,0 @@
use aes::{
hazmat::{cipher_round as aes_enc, equiv_inv_cipher_round as aes_dec},
Block,
};
use hex_literal::hex;
// key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys")
const GENERATOR_1_KEY_0: [u8; 16] = hex!("53a5ac6d096671622b55b5db1749f4b4");
const GENERATOR_1_KEY_1: [u8; 16] = hex!("07af7c6d0d716a8478d325174edca10d");
const GENERATOR_1_KEY_2: [u8; 16] = hex!("f162123fc67e949f4f79c0f445e3203e");
const GENERATOR_1_KEY_3: [u8; 16] = hex!("3581ef6a7c31bab1884c311654911649");
// key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys 0-3")
const GENERATOR_4_KEY_0: [u8; 16] = hex!("ddaa2164db3d83d12b6d542f3fd2e599");
const GENERATOR_4_KEY_1: [u8; 16] = hex!("50340eb2553f91b6539df706e5cddfa5");
const GENERATOR_4_KEY_2: [u8; 16] = hex!("04d93e5caf7b5e519f67a40abf021c17");
const GENERATOR_4_KEY_3: [u8; 16] = hex!("63376285085d8fe7853767cd91d2ded8");
// key4, key5, key6, key7 = Hash512("RandomX AesGenerator4R keys 4-7")
const GENERATOR_4_KEY_4: [u8; 16] = hex!("736f82b5a6a7d6e36d8b513db4ff9e22");
const GENERATOR_4_KEY_5: [u8; 16] = hex!("f36b56c7d9b3109c4e4d02e9d2b772b2");
const GENERATOR_4_KEY_6: [u8; 16] = hex!("e7c973f28ba365f70a66a92ba7ef3bf6");
const GENERATOR_4_KEY_7: [u8; 16] = hex!("09d67c7ade395891fdd1060c2d76b0c0");
// state0, state1, state2, state3 = Hash512("RandomX AesHash1R state")
const HASH_1_STATE_0: [u8; 16] = hex!("0d2cb592de56a89f47db82ccad3a98d7");
const HASH_1_STATE_1: [u8; 16] = hex!("6e998d3398b7c7155a129ef55780e7ac");
const HASH_1_STATE_2: [u8; 16] = hex!("1700776ad0c762ae6b507950e47ca0e8");
const HASH_1_STATE_3: [u8; 16] = hex!("0c240a638d82ad070500a1794849997e");
// xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys")
const HASH_1_X_KEY_0: [u8; 16] = hex!("8983faf69f94248bbf56dc9001028906");
const HASH_1_X_KEY_1: [u8; 16] = hex!("d163b2613ce0f451c64310ee9bf918ed");
/// AesHash1R in the spec.
///
/// creates a 64 byte hash from the input.
///
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#34-aeshash1r
pub(crate) fn hash_aes_r1(buf: &[u8]) -> [u8; 64] {
assert_eq!(buf.len() % 64, 0);
let mut block_0 = Block::from(HASH_1_STATE_0);
let mut block_1 = Block::from(HASH_1_STATE_1);
let mut block_2 = Block::from(HASH_1_STATE_2);
let mut block_3 = Block::from(HASH_1_STATE_3);
for window in buf.windows(64) {
aes_enc(&mut block_0, Block::from_slice(&window[0..16]));
aes_dec(&mut block_1, Block::from_slice(&window[16..32]));
aes_enc(&mut block_2, Block::from_slice(&window[32..48]));
aes_dec(&mut block_3, Block::from_slice(&window[48..64]));
}
let x_key_0 = Block::from_slice(&HASH_1_X_KEY_0);
aes_enc(&mut block_0, x_key_0);
aes_dec(&mut block_1, x_key_0);
aes_enc(&mut block_2, x_key_0);
aes_dec(&mut block_3, x_key_0);
let x_key_1 = Block::from_slice(&HASH_1_X_KEY_1);
aes_enc(&mut block_0, x_key_1);
aes_dec(&mut block_1, x_key_1);
aes_enc(&mut block_2, x_key_1);
aes_dec(&mut block_3, x_key_1);
[block_0, block_1, block_2, block_3]
.concat()
.try_into()
.unwrap()
}
/// AesGenerator1R in the spec.
///
/// Fills the bytes with pseudorandom bytes seeded by the input.
///
/// `output` must be a multiple of 64.
///
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#32-aesgenerator1r
pub(crate) fn aes_fill_1r(input: &[u8; 64], output: &mut [u8]) {
assert_eq!(output.len() % 64, 0);
let key_0 = Block::from(GENERATOR_1_KEY_0);
let key_1 = Block::from(GENERATOR_1_KEY_1);
let key_2 = Block::from(GENERATOR_1_KEY_2);
let key_3 = Block::from(GENERATOR_1_KEY_3);
let mut block_0 = Block::clone_from_slice(&input[0..16]);
let mut block_1 = Block::clone_from_slice(&input[16..32]);
let mut block_2 = Block::clone_from_slice(&input[32..48]);
let mut block_3 = Block::clone_from_slice(&input[48..64]);
for idx in (0..output.len()).step_by(64) {
aes_dec(&mut block_0, &key_0);
aes_enc(&mut block_1, &key_1);
aes_dec(&mut block_2, &key_2);
aes_enc(&mut block_3, &key_3);
output[idx..idx + 16].clone_from_slice(block_0.as_slice());
output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice());
output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice());
output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice());
}
}
/// AesGenerator4R in the spec.
///
/// Fills the output with pseudorandom bytes seeded by the input.
///
/// `output` must be a multiple of 64.
///
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#33-aesgenerator4r
pub(crate) fn aes_fill_4r(input: &[u8; 64], output: &mut [u8]) {
assert_eq!(output.len() % 64, 0);
let key_0 = Block::from(GENERATOR_4_KEY_0);
let key_1 = Block::from(GENERATOR_4_KEY_1);
let key_2 = Block::from(GENERATOR_4_KEY_2);
let key_3 = Block::from(GENERATOR_4_KEY_3);
let key_4 = Block::from(GENERATOR_4_KEY_4);
let key_5 = Block::from(GENERATOR_4_KEY_5);
let key_6 = Block::from(GENERATOR_4_KEY_6);
let key_7 = Block::from(GENERATOR_4_KEY_7);
let mut block_0 = Block::clone_from_slice(&input[0..16]);
let mut block_1 = Block::clone_from_slice(&input[16..32]);
let mut block_2 = Block::clone_from_slice(&input[32..48]);
let mut block_3 = Block::clone_from_slice(&input[48..64]);
let aes_enc_4 = |block: &mut Block, key_a, key_b, key_c, key_d| {
aes_enc(block, key_a);
aes_enc(block, key_b);
aes_enc(block, key_c);
aes_enc(block, key_d);
};
let aes_dec_4 = |block: &mut Block, key_a, key_b, key_c, key_d| {
aes_dec(block, key_a);
aes_dec(block, key_b);
aes_dec(block, key_c);
aes_dec(block, key_d);
};
for idx in (0..output.len()).step_by(64) {
aes_dec_4(&mut block_0, &key_0, &key_1, &key_2, &key_3);
aes_enc_4(&mut block_1, &key_0, &key_1, &key_2, &key_3);
aes_dec_4(&mut block_2, &key_4, &key_5, &key_6, &key_7);
aes_enc_4(&mut block_3, &key_4, &key_5, &key_6, &key_7);
output[idx..idx + 16].clone_from_slice(block_0.as_slice());
output[idx + 16..idx + 32].clone_from_slice(block_1.as_slice());
output[idx + 32..idx + 48].clone_from_slice(block_2.as_slice());
output[idx + 48..idx + 64].clone_from_slice(block_3.as_slice());
}
}

View file

@ -1,47 +0,0 @@
use blake2::digest::FixedOutputReset;
use blake2::{Blake2b512, Digest};
const MAX_SEED_LEN: usize = 60;
pub struct Blake2Generator {
data: [u8; 64],
index: usize,
hasher: Blake2b512,
}
impl Blake2Generator {
pub fn new(seed: &[u8], nonce: u32) -> Self {
assert!(seed.len() <= MAX_SEED_LEN);
let mut data = [0; 64];
data[..seed.len()].copy_from_slice(seed);
data[MAX_SEED_LEN..].copy_from_slice(&nonce.to_le_bytes());
Blake2Generator {
data,
index: 64,
hasher: Blake2b512::default(),
}
}
pub fn next_u8(&mut self) -> u8 {
self.check_extend(1);
self.index += 1;
self.data[self.index - 1]
}
pub fn next_u32(&mut self) -> u32 {
self.check_extend(4);
self.index += 4;
u32::from_le_bytes(self.data[self.index - 4..self.index].try_into().unwrap())
}
fn check_extend(&mut self, bytes_needed: usize) {
if self.index + bytes_needed > self.data.len() {
self.hasher.update(self.data);
self.data = self.hasher.finalize_fixed_reset().into();
self.index = 0;
}
}
}

View file

@ -1,22 +0,0 @@
/// Target latency for SuperscalarHash (in cycles of the reference CPU).
pub(crate) const RANDOMX_SUPERSCALAR_LATENCY: usize = 170;
pub(crate) const SUPERSCALAR_MAX_SIZE: usize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
/// Dataset base size in bytes. Must be a power of 2.
pub(crate) const RANDOMX_DATASET_BASE_SIZE: usize = 2147483648;
pub(crate) const RANDOMX_DATASET_EXTRA_SIZE: usize = 33554368;
pub(crate) const RANDOMX_DATASET_SIZE: usize =
RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
pub(crate) const RANDOMX_ARGON_LANES: u32 = 1;
pub(crate) const RANDOMX_ARGON_ITERATIONS: u32 = 3;
pub(crate) const RANDOMX_ARGON_MEMORY: u32 = 262144;
pub(crate) const RANDOMX_ARGON_SALT: &[u8] = b"RandomX\x03";
pub(crate) const RANDOMX_CACHE_ACCESSES: usize = 8;

View file

@ -1,193 +0,0 @@
use std::sync::{Arc, RwLock};
use argon2::{Algorithm, Argon2, Block, Params, Version};
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use crate::blake2_generator::Blake2Generator;
use crate::{
config::{
RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_LANES, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_SALT,
RANDOMX_CACHE_ACCESSES, RANDOMX_DATASET_SIZE,
},
registers::{RGroupRegisterID, RGroupRegisters},
superscalar::SSProgram,
};
/// Generates the memory blocks used in the cache
fn argon2_blocks(key: &[u8]) -> Box<[Block]> {
let params = Params::new(
RANDOMX_ARGON_MEMORY,
RANDOMX_ARGON_ITERATIONS,
RANDOMX_ARGON_LANES,
None,
)
.unwrap();
let numb_blocks: usize = (RANDOMX_ARGON_LANES * RANDOMX_ARGON_MEMORY)
.try_into()
.unwrap();
let mut blocks = vec![Block::new(); numb_blocks].into_boxed_slice();
let argon = Argon2::new(Algorithm::Argon2d, Version::V0x13, params);
argon
.fill_memory(key, RANDOMX_ARGON_SALT, &mut blocks)
.unwrap();
blocks
}
/// The Cache.
///
/// The cache is used during light verification.
/// Internally this struct is a wrapper around an [`Arc`] internal cache, this allows
/// cheep clones and allows the cache to be shared between VMs on different threads.
#[derive(Debug, Clone)]
pub struct Cache {
internal_cache: Arc<RwLock<InternalCache>>,
}
impl Cache {
/// Initialises the cache with the provided key.
///
/// The key must be between 1-60 bytes (inclusive) otherwise this will panic.
pub fn init(key: &[u8]) -> Self {
let internal_cache = InternalCache::init(key);
Cache {
internal_cache: Arc::new(RwLock::new(internal_cache)),
}
}
}
/// The internal cache structure, used during light verification.
#[derive(Debug)]
struct InternalCache {
memory_blocks: Box<[Block]>,
programs: Vec<SSProgram>,
}
impl InternalCache {
fn init(key: &[u8]) -> Self {
let memory_blocks = argon2_blocks(key);
let mut blake_gen = Blake2Generator::new(key, 0);
let programs = (0..RANDOMX_CACHE_ACCESSES)
.map(|_| SSProgram::generate(&mut blake_gen))
.collect::<Vec<_>>();
InternalCache {
memory_blocks,
programs,
}
}
/// Gets an item from the cache at the specified index.
fn get_item(&self, idx: usize) -> [u64; 8] {
// one item is 8 u64s
// mask = (blocks in cache * bytes in a block / size of item) minus one.
let mask = (self.memory_blocks.len() * 1024 / 64) - 1;
// and the idx with the mask this is the same as doing mod (self.memory_blocks.len() * 1024 / 64)
let idx = idx & mask;
// block_idx = idx divided by amount of items in a block
let block_idx = idx / (1024 / 64);
// idx * 8 is to get the idx of a single u64
// we mask with amount of u64s in a block minus 1 which is the same as doing
// mod the amount of instructions in a block.
let block_u64_start = (idx * 8) & 127;
// The plus 8 cannot overflow as (idx * 8) & 127 wont give a number bigger than 120
return self.memory_blocks[block_idx].as_ref()[block_u64_start..block_u64_start + 8]
.try_into()
.unwrap();
}
/// Generates the dataset item at the specified index.
fn init_data_set_item(&self, item_number: usize) -> [u64; 8] {
let mut registers = RGroupRegisters::default();
registers.set(
&RGroupRegisterID::R0,
(TryInto::<u64>::try_into(item_number).unwrap() + 1_u64)
.wrapping_mul(6364136223846793005_u64),
);
let mut init_reg = |dst, val: u64| {
registers.apply_to_dst_with_src(&dst, &RGroupRegisterID::R0, |_, src| src ^ val)
};
init_reg(RGroupRegisterID::R1, 9298411001130361340);
init_reg(RGroupRegisterID::R2, 12065312585734608966);
init_reg(RGroupRegisterID::R3, 9306329213124626780);
init_reg(RGroupRegisterID::R4, 5281919268842080866);
init_reg(RGroupRegisterID::R5, 10536153434571861004);
init_reg(RGroupRegisterID::R6, 3398623926847679864);
init_reg(RGroupRegisterID::R7, 9549104520008361294);
let mut cache_index = item_number;
for program in &self.programs {
program.execute(&mut registers);
let cache_item = self.get_item(cache_index);
for (reg_id, item) in RGroupRegisterID::iter().zip(cache_item) {
registers.apply_to_dst(&reg_id, |dst| dst ^ item);
}
cache_index = registers
.get(&program.reg_with_max_latency())
.try_into()
.expect("u64 does not fit into usize");
}
registers.inner()
}
}
/// The Dataset used during mining.
///
/// Internally this struct is a wrapper around an [`Arc`] internal dataset, this allows
/// cheep clones and allows the dataset to be shared between VMs on different threads.
#[derive(Debug, Clone)]
pub struct Dataset {
internal_dataset: Arc<RwLock<InternalDataset>>,
}
impl Dataset {
/// Initialises the dataset with the provided key.
///
/// The key must be between 1-60 bytes (inclusive) otherwise this will panic.
///
/// This is very computationally intense so might take a long time to complete.
pub fn init(key: &[u8]) -> Dataset {
let internal_dataset = InternalDataset::init(key);
Dataset {
internal_dataset: Arc::new(RwLock::new(internal_dataset)),
}
}
}
/// The internal dataset used during mining.
#[derive(Debug)]
struct InternalDataset {
dataset: Vec<[u64; 8]>,
}
impl InternalDataset {
fn init(key: &[u8]) -> InternalDataset {
let cache = InternalCache::init(key);
#[cfg(feature = "rayon")]
let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8))
.into_par_iter()
.map(|i| cache.init_data_set_item(i))
.collect();
#[cfg(not(feature = "rayon"))]
let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8))
.map(|i| cache.init_data_set_item(i))
.collect();
Self { dataset }
}
}

View file

@ -1,12 +0,0 @@
mod aes_hash;
mod blake2_generator;
mod config;
mod dataset;
mod registers;
mod superscalar;
pub use dataset::{Cache, Dataset};
fn is_0_or_power_of_2(x: u64) -> bool {
(x & (x - 1)) == 0
}

View file

@ -1,3 +0,0 @@
mod integer;
pub(crate) use integer::*;

View file

@ -1,62 +0,0 @@
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(usize)]
pub enum RGroupRegisterID {
R0 = 0,
R1,
R2,
R3,
R4,
R5,
R6,
R7,
}
impl RGroupRegisterID {
pub fn iter() -> impl Iterator<Item = RGroupRegisterID> {
[
RGroupRegisterID::R0,
RGroupRegisterID::R1,
RGroupRegisterID::R2,
RGroupRegisterID::R3,
RGroupRegisterID::R4,
RGroupRegisterID::R5,
RGroupRegisterID::R6,
RGroupRegisterID::R7,
]
.into_iter()
}
}
#[derive(Debug, Default, Clone)]
pub struct RGroupRegisters([u64; 8]);
impl RGroupRegisters {
pub fn inner(self) -> [u64; 8] {
self.0
}
pub fn apply_to_dst(&mut self, dst: &RGroupRegisterID, f: impl FnOnce(u64) -> u64) {
*self.get_mut(dst) = f(self.get(dst));
}
pub fn apply_to_dst_with_src(
&mut self,
dst: &RGroupRegisterID,
src: &RGroupRegisterID,
f: impl FnOnce(u64, u64) -> u64,
) {
*self.get_mut(dst) = f(self.get(dst), self.get(src));
}
pub fn set(&mut self, id: &RGroupRegisterID, val: u64) {
self.0[*id as usize] = val
}
pub fn get(&self, id: &RGroupRegisterID) -> u64 {
self.0[*id as usize]
}
pub fn get_mut(&mut self, id: &RGroupRegisterID) -> &mut u64 {
&mut self.0[*id as usize]
}
}

View file

@ -1,32 +0,0 @@
mod cpu;
mod executor;
mod generator;
mod instructions;
mod program;
use crate::blake2_generator::Blake2Generator;
use crate::registers::{RGroupRegisterID, RGroupRegisters};
use executor::execute;
use generator::generate;
use instructions::ScalarInstruction;
#[derive(Debug)]
pub(crate) struct SSProgram {
program: Vec<ScalarInstruction>,
reg_with_max_latency: RGroupRegisterID,
}
impl SSProgram {
pub fn generate(gen: &mut Blake2Generator) -> Self {
generate(gen)
}
pub fn execute(&self, registers: &mut RGroupRegisters) {
execute(&self.program, registers)
}
pub fn reg_with_max_latency(&self) -> RGroupRegisterID {
self.reg_with_max_latency
}
}

View file

@ -1,295 +0,0 @@
use crate::config::RANDOMX_SUPERSCALAR_LATENCY;
/// Max cycles + highest amount of cycles on a macro op.
const CYCLE_MAP_SIZE: usize = RANDOMX_SUPERSCALAR_LATENCY + 4;
pub(crate) enum SlotLen {
L3,
L4,
L7,
L8,
L9,
L10,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ExecutionPort {
P0,
P1,
P5,
}
enum AllowedPorts {
One(ExecutionPort),
Two(ExecutionPort, ExecutionPort),
All,
}
impl AllowedPorts {
fn port_allowed(&self, port: &ExecutionPort) -> bool {
match self {
AllowedPorts::One(allowed_port) => allowed_port == port,
AllowedPorts::Two(allowed_port_1, allowed_port_2) => {
allowed_port_1 == port || allowed_port_2 == port
}
AllowedPorts::All => true,
}
}
}
#[allow(non_camel_case_types)]
pub enum MacroOp {
SUB_RR,
XOR_RR,
LEA_SIB,
IMUL_RR { dependant: bool },
ROR_RI,
ADD_RI,
XOR_RI,
MOV_RR,
MUL_R,
IMUL_R,
MOV_RI,
}
impl MacroOp {
pub fn cycles_to_complete(&self) -> usize {
match self {
MacroOp::SUB_RR => 1,
MacroOp::XOR_RR => 1,
MacroOp::LEA_SIB => 1,
MacroOp::IMUL_RR { .. } => 3,
MacroOp::ROR_RI => 1,
MacroOp::ADD_RI => 1,
MacroOp::XOR_RI => 1,
MacroOp::MOV_RR => 0,
MacroOp::MUL_R => 4,
MacroOp::IMUL_R => 4,
MacroOp::MOV_RI => 1,
}
}
pub fn can_be_eliminated(&self) -> bool {
self.micro_ops_needed() == 0
}
pub fn is_dependant_on_last_op(&self) -> bool {
match self {
MacroOp::IMUL_RR { dependant } => *dependant,
_ => false,
}
}
pub fn micro_ops_needed(&self) -> usize {
match self {
MacroOp::SUB_RR => 1,
MacroOp::XOR_RR => 1,
MacroOp::LEA_SIB => 1,
MacroOp::IMUL_RR { .. } => 1,
MacroOp::ROR_RI => 1,
MacroOp::ADD_RI => 1,
MacroOp::XOR_RI => 1,
MacroOp::MOV_RR => 0,
MacroOp::MUL_R => 2,
MacroOp::IMUL_R => 2,
MacroOp::MOV_RI => 1,
}
}
fn allowed_execution_ports(&self, micro_op_index: usize) -> AllowedPorts {
match self {
MacroOp::SUB_RR => AllowedPorts::All,
MacroOp::XOR_RR => AllowedPorts::All,
MacroOp::LEA_SIB => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P1),
MacroOp::IMUL_RR { .. } => AllowedPorts::One(ExecutionPort::P1),
MacroOp::ROR_RI => AllowedPorts::Two(ExecutionPort::P0, ExecutionPort::P5),
MacroOp::ADD_RI => AllowedPorts::All,
MacroOp::XOR_RI => AllowedPorts::All,
MacroOp::MOV_RR => panic!("No execution units needed for MOV_RR"),
MacroOp::MUL_R => match micro_op_index {
0 => AllowedPorts::One(ExecutionPort::P1),
1 => AllowedPorts::One(ExecutionPort::P5),
_ => panic!("no execution port at that index"),
},
MacroOp::IMUL_R => match micro_op_index {
0 => AllowedPorts::One(ExecutionPort::P1),
1 => AllowedPorts::One(ExecutionPort::P5),
_ => panic!("no execution port at that index"),
},
MacroOp::MOV_RI => AllowedPorts::All,
}
}
}
/// Represents the ports availability during a single cycle.
#[derive(Debug, Default, Copy, Clone)]
struct CycleSchedule {
p0: bool,
p1: bool,
p5: bool,
}
impl CycleSchedule {
fn space_for_micro_op(&self, allowed_ports: &AllowedPorts) -> Option<ExecutionPort> {
if !self.p5 && allowed_ports.port_allowed(&ExecutionPort::P5) {
Some(ExecutionPort::P5)
} else if !self.p0 && allowed_ports.port_allowed(&ExecutionPort::P0) {
Some(ExecutionPort::P0)
} else if !self.p1 && allowed_ports.port_allowed(&ExecutionPort::P1) {
Some(ExecutionPort::P1)
} else {
None
}
}
fn set_port_busy(&mut self, port: ExecutionPort) {
match port {
ExecutionPort::P0 => self.p0 = true,
ExecutionPort::P1 => self.p1 = true,
ExecutionPort::P5 => self.p5 = true,
}
}
}
pub(crate) struct MacroOpOpportunity {
cycle: usize,
micro_port_0: Option<ExecutionPort>,
micro_port_1: Option<ExecutionPort>,
}
impl MacroOpOpportunity {
pub fn cycle(&self) -> usize {
self.cycle
}
}
#[derive(Debug)]
pub(crate) struct ProgramSchedule {
ports_schedule: [CycleSchedule; CYCLE_MAP_SIZE],
full: bool,
}
impl Default for ProgramSchedule {
fn default() -> Self {
Self {
ports_schedule: [CycleSchedule::default(); CYCLE_MAP_SIZE],
full: false,
}
}
}
impl ProgramSchedule {
pub fn set_full(&mut self) {
self.full = true;
}
pub fn is_full(&self) -> bool {
self.full
}
pub fn schedule_macro_op_at_earliest(
&mut self,
op: &MacroOp,
cycle: usize,
last_op_completes_at: usize,
) -> Option<usize> {
let opportunity = self.earliest_cycle_for_macro_op(op, cycle, last_op_completes_at)?;
let cycle = opportunity.cycle();
if let Some(port0) = opportunity.micro_port_0 {
self.schedule_micro_op(cycle, port0);
if let Some(port1) = opportunity.micro_port_1 {
self.schedule_micro_op(cycle, port1);
};
};
Some(cycle)
}
pub fn earliest_cycle_for_macro_op(
&mut self,
op: &MacroOp,
cycle: usize,
last_op_completes_at: usize,
) -> Option<MacroOpOpportunity> {
let mut cycle = if op.is_dependant_on_last_op() {
cycle.max(last_op_completes_at)
} else {
cycle
};
if op.can_be_eliminated() {
return Some(MacroOpOpportunity {
cycle,
micro_port_0: None,
micro_port_1: None,
});
}
match op.micro_ops_needed() {
0 => Some(MacroOpOpportunity {
cycle,
micro_port_0: None,
micro_port_1: None,
}),
1 => self
.earliest_cycle_for_mirco_op(&op.allowed_execution_ports(0), cycle)
.map(|(cycle, micro_port_0)| MacroOpOpportunity {
cycle,
micro_port_0: Some(micro_port_0),
micro_port_1: None,
}),
2 => {
// both ops must happen in the same cycle
let allowed_0 = op.allowed_execution_ports(0);
let allowed_1 = op.allowed_execution_ports(1);
while cycle < CYCLE_MAP_SIZE {
let (min_0_cycle, port_0) =
self.earliest_cycle_for_mirco_op(&allowed_0, cycle)?;
let (min_1_cycle, port_1) =
self.earliest_cycle_for_mirco_op(&allowed_1, cycle)?;
if min_0_cycle == min_1_cycle {
return Some(MacroOpOpportunity {
cycle: min_0_cycle,
micro_port_0: Some(port_0),
micro_port_1: Some(port_1),
});
} else {
cycle += 1;
}
}
None
}
_ => unreachable!(),
}
}
fn schedule_micro_op_at_earliest(
&mut self,
allowed_ports: &AllowedPorts,
cycle: usize,
) -> Option<usize> {
let (cycle, port) = self.earliest_cycle_for_mirco_op(allowed_ports, cycle)?;
self.schedule_micro_op(cycle, port);
Some(cycle)
}
fn schedule_micro_op(&mut self, cycle: usize, port: ExecutionPort) {
self.ports_schedule[cycle].set_port_busy(port)
}
fn earliest_cycle_for_mirco_op(
&mut self,
allowed_ports: &AllowedPorts,
cycle: usize,
) -> Option<(usize, ExecutionPort)> {
for (cycle, cycle_schedule) in self.ports_schedule.iter().enumerate().skip(cycle) {
if let Some(port) = cycle_schedule.space_for_micro_op(allowed_ports) {
return Some((cycle, port));
}
}
self.full = true;
None
}
}

View file

@ -1,105 +0,0 @@
use crate::registers::RGroupRegisters;
use crate::superscalar::instructions::ScalarInstruction;
const P2EXP63: u64 = 1 << 63;
pub fn execute(program: &[ScalarInstruction], registers: &mut RGroupRegisters) {
for instruction in program {
match instruction {
ScalarInstruction::ISUB_R { dst, src } => {
let op = |dst_val: u64, src_val| dst_val.wrapping_sub(src_val);
registers.apply_to_dst_with_src(dst, src, op);
}
ScalarInstruction::IXOR_R { dst, src } => {
let op = |dst_val: u64, src_val| dst_val ^ src_val;
registers.apply_to_dst_with_src(dst, src, op);
}
ScalarInstruction::IADD_RS {
dst,
src,
mod_shift,
} => {
let op = |dst_val: u64, src_val| {
dst_val.wrapping_add(src_val << clamp_mod_shift(*mod_shift))
};
registers.apply_to_dst_with_src(dst, src, op);
}
ScalarInstruction::IMUL_R { dst, src } => {
let op = |dst_val: u64, src_val| dst_val.wrapping_mul(src_val);
registers.apply_to_dst_with_src(dst, src, op);
}
ScalarInstruction::IROR_C { dst, imm32 } => {
let op = |dst_val: u64| dst_val.rotate_right(*imm32);
registers.apply_to_dst(dst, op);
}
ScalarInstruction::IADD_C { dst, imm32 } => {
let op = |dst_val: u64| dst_val.wrapping_add(sign_extend_2s_compl(*imm32));
registers.apply_to_dst(dst, op);
}
ScalarInstruction::IXOR_C { dst, imm32 } => {
let op = |dst_val: u64| dst_val ^ sign_extend_2s_compl(*imm32);
registers.apply_to_dst(dst, op);
}
ScalarInstruction::IMULH_R { dst, src } => {
registers.apply_to_dst_with_src(dst, src, high_mul);
}
ScalarInstruction::ISMULH_R { dst, src } => {
let op = |dst_val: u64, src_val: u64| {
signed_high_mul(dst_val as i64, src_val as i64) as u64
};
registers.apply_to_dst_with_src(dst, src, op);
}
ScalarInstruction::IMUL_RCP { dst, imm32 } => {
let op = |dst_val: u64| dst_val.wrapping_mul(randomx_reciprocal(*imm32 as u64));
registers.apply_to_dst(dst, op);
}
}
}
}
pub fn randomx_reciprocal(divisor: u64) -> u64 {
assert!(!divisor.is_power_of_two());
assert_ne!(divisor, 0);
let mut quotient = P2EXP63 / divisor;
let mut remainder = P2EXP63 % divisor;
let mut bsr = 0;
let mut bit = divisor;
while bit > 0 {
bsr += 1;
bit >>= 1;
}
for _ in 0..bsr {
if remainder >= divisor.wrapping_sub(remainder) {
quotient = quotient.wrapping_mul(2).wrapping_add(1);
remainder = remainder.wrapping_mul(2).wrapping_sub(divisor);
} else {
quotient = quotient.wrapping_mul(2);
remainder = remainder.wrapping_mul(2);
}
}
quotient
}
fn high_mul(a: u64, b: u64) -> u64 {
((a as u128 * b as u128) >> 64) as u64
}
fn signed_high_mul(a: i64, b: i64) -> i64 {
((a as i128 * b as i128) >> 64) as i64
}
pub fn sign_extend_2s_compl(imm: u32) -> u64 {
if imm > i32::MAX as u32 {
imm as u64 | 0xffffffff00000000
} else {
imm as u64
}
}
fn clamp_mod_shift(x: u8) -> u64 {
(x as u64 >> 2) % 4
}

View file

@ -1,797 +0,0 @@
use std::cmp::Ordering;
use crate::config::SUPERSCALAR_MAX_SIZE;
use crate::registers::{RGroupRegisterID, RGroupRegisters};
use crate::superscalar::cpu::{ProgramSchedule, SlotLen};
use crate::superscalar::instructions::ScalarInstruction;
use crate::superscalar::SSProgram;
use crate::{
blake2_generator::Blake2Generator,
config::RANDOMX_SUPERSCALAR_LATENCY,
is_0_or_power_of_2,
superscalar::instructions::{OpSource, ScalarInstructionID},
};
const LOOK_FORWARD_CYCLES: usize = 4;
const MAX_THROWAWAY_COUNT: usize = 256;
/// Groups of 3 or 4 Macro-op slots that sum to 16
///
/// https://github.com/tevador/RandomX/blob/master/doc/specs.md#631-decoding-stage
/// table 6.3.1
#[derive(Eq, PartialEq, Copy, Clone)]
enum DecoderGroup {
/// 0: 4-8-4
D484,
/// 1: 7-3-3-3
D7333,
/// 2: 3-7-3-3
D3733,
/// 3: 4-9-3
D493,
/// 4: 4-4-4-4
D4444,
/// 5: 3-3-10
D3310,
}
impl DecoderGroup {
fn slot_len(&self, index: usize) -> Option<SlotLen> {
match self {
DecoderGroup::D484 => match index {
0 | 2 => Some(SlotLen::L4),
1 => Some(SlotLen::L8),
_ => None,
},
DecoderGroup::D7333 => match index {
0 => Some(SlotLen::L7),
1..=3 => Some(SlotLen::L3),
_ => None,
},
DecoderGroup::D3733 => match index {
0 | 2 | 3 => Some(SlotLen::L3),
1 => Some(SlotLen::L7),
_ => None,
},
DecoderGroup::D493 => match index {
0 => Some(SlotLen::L4),
1 => Some(SlotLen::L9),
2 => Some(SlotLen::L3),
_ => None,
},
DecoderGroup::D4444 => match index {
0..=3 => Some(SlotLen::L4),
_ => None,
},
DecoderGroup::D3310 => match index {
0 | 1 => Some(SlotLen::L3),
2 => Some(SlotLen::L10),
_ => None,
},
}
}
/// Returns an iterator over the lengths with a bool `is_last`
pub fn iter_slot_len(&self) -> impl Iterator<Item = (SlotLen, bool)> + '_ {
(0..self.size()).map(|i| (self.slot_len(i).unwrap(), self.size() - 1 == i))
}
pub fn size(&self) -> usize {
match self {
DecoderGroup::D484 => 3,
DecoderGroup::D7333 => 4,
DecoderGroup::D3733 => 4,
DecoderGroup::D493 => 3,
DecoderGroup::D4444 => 4,
DecoderGroup::D3310 => 3,
}
}
fn next_group(
gen: &mut Blake2Generator,
instruction: Option<ScalarInstructionID>,
total_muls_low: bool,
) -> DecoderGroup {
if matches!(
instruction,
Some(ScalarInstructionID::IMULH_R) | Some(ScalarInstructionID::ISMULH_R)
) {
return DecoderGroup::D3310;
}
if total_muls_low {
return DecoderGroup::D4444;
}
if instruction == Some(ScalarInstructionID::IMUL_RCP) {
return match (gen.next_u8() & 1).cmp(&1) {
Ordering::Equal => DecoderGroup::D484,
Ordering::Less => DecoderGroup::D493,
Ordering::Greater => unreachable!(),
};
}
match gen.next_u8() & 3 {
0 => DecoderGroup::D484,
1 => DecoderGroup::D7333,
2 => DecoderGroup::D3733,
3 => DecoderGroup::D493,
_ => unreachable!(),
}
}
}
#[derive(Debug, Copy, Clone)]
pub(crate) struct SingleRegisterInfo {
id: RGroupRegisterID,
next_ready: usize,
last_instruction: Option<ScalarInstructionID>,
last_source: OpSource,
}
impl SingleRegisterInfo {
pub fn id(&self) -> RGroupRegisterID {
self.id
}
pub fn next_ready(&self) -> usize {
self.next_ready
}
pub fn last_instruction(&self) -> Option<ScalarInstructionID> {
self.last_instruction
}
pub fn last_source(&self) -> OpSource {
self.last_source
}
pub fn set_next_ready(&mut self, next_ready: usize) {
self.next_ready = next_ready
}
pub fn set_last_instruction(&mut self, last_instruction: ScalarInstructionID) {
self.last_instruction = Some(last_instruction);
}
pub fn set_last_source(&mut self, last_source: OpSource) {
self.last_source = last_source
}
}
#[derive(Debug)]
pub(crate) struct RegistersInfo {
registers: [SingleRegisterInfo; 8],
}
impl Default for RegistersInfo {
fn default() -> Self {
let default = SingleRegisterInfo {
id: RGroupRegisterID::R0,
next_ready: 0,
last_instruction: None,
last_source: OpSource::Constant,
};
let mut default = [default; 8];
let reg_ids = [
RGroupRegisterID::R1,
RGroupRegisterID::R2,
RGroupRegisterID::R3,
RGroupRegisterID::R4,
RGroupRegisterID::R5,
RGroupRegisterID::R6,
RGroupRegisterID::R7,
];
for (reg, id) in default.iter_mut().skip(1).zip(reg_ids) {
reg.id = id;
}
RegistersInfo { registers: default }
}
}
impl RegistersInfo {
pub fn iter(&self) -> impl Iterator<Item = &SingleRegisterInfo> {
self.registers.iter()
}
pub fn ready_at_cycle(&self, cycle: usize) -> Vec<&SingleRegisterInfo> {
self.registers
.iter()
.filter(|reg| reg.next_ready <= cycle)
.collect::<Vec<_>>()
}
pub fn get_mut(&mut self, id: RGroupRegisterID) -> &mut SingleRegisterInfo {
&mut self.registers[id as usize]
}
}
pub(crate) fn select_register(
gen: &mut Blake2Generator,
available: &[&SingleRegisterInfo],
) -> Option<RGroupRegisterID> {
if available.is_empty() {
return None;
}
let index = if available.len() > 1 {
// available is <= 8 so as is safe
(gen.next_u32() % available.len() as u32)
.try_into()
.expect("Could not fit u32 into usize")
} else {
0
};
Some(available[index].id)
}
/// Returns an imm32 if the instruction requires one.
fn get_imm32(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option<u32> {
match id {
ScalarInstructionID::IADD_C | ScalarInstructionID::IXOR_C => Some(gen.next_u32()),
ScalarInstructionID::IROR_C => {
// imm32 % 64 != 0
Some(
loop {
let imm8 = gen.next_u8() & 63;
if imm8 != 0 {
break imm8;
}
}
.into(),
)
}
ScalarInstructionID::IMUL_RCP => {
// imm32 != 0, imm32 != 2N
Some(loop {
let imm32 = gen.next_u32();
if !is_0_or_power_of_2(imm32.into()) {
break imm32;
}
})
}
_ => None,
}
}
fn get_mod_shift(gen: &mut Blake2Generator, id: &ScalarInstructionID) -> Option<u8> {
match id {
// keep the shit between 0 and 3.
ScalarInstructionID::IADD_RS => Some((gen.next_u8() >> 2) % 4),
_ => None,
}
}
/// Used during [`ScalarInstructionBuilder`] creation. Returns the [`OpSource`] to give the register
/// if this is known otherwise [`None`] is returned and this field will be filled later.
fn get_src_to_give_register(
gen: &mut Blake2Generator,
id: &ScalarInstructionID,
) -> Option<OpSource> {
match id {
ScalarInstructionID::IADD_C
| ScalarInstructionID::IXOR_C
| ScalarInstructionID::IROR_C
| ScalarInstructionID::IMUL_RCP => Some(OpSource::Constant),
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => {
// not actually the source value, the Monero C++ version sets this field to a random
// value, this has an issue of becoming an actual meaningful value though so we handle
// those rare cases here:
Some(OpSource::from_rand_i32(gen.next_u32() as i32))
}
_ => None,
}
}
struct ScalarInstructionBuilder {
/// The id of the instruction we are building.
id: ScalarInstructionID,
/// The true source register - the one we are actually getting the value from will be
/// None if this instruction doesn't need a register source.
true_src: Option<RGroupRegisterID>,
/// The value src we tell the dst register, if this is a register then most of the time this
/// is the same as [`true_src`] but for `IMULH_R` and `ISMULH_R` it's not.
///
/// `IMULH_R` and `ISMULH_R` generate a random i32 and set it for this slot .
src_to_give_register: Option<OpSource>,
/// The destination register for this instruction.
dst: Option<RGroupRegisterID>,
/// A constant used in some instructions.
imm32: Option<u32>,
/// used in IADD_RS
mod_shift: Option<u8>,
}
impl ScalarInstructionBuilder {
/// Creates a new [`ScalarInstructionBuilder`].
///
pub fn new(
gen: &mut Blake2Generator,
slot_len: &SlotLen,
group: &DecoderGroup,
is_last: bool,
) -> Self {
// https://github.com/tevador/RandomX/blob/master/doc/specs.md#632-instruction-selection
let id = match slot_len {
SlotLen::L3 if !is_last => match gen.next_u8() & 1 {
0 => ScalarInstructionID::ISUB_R,
_ => ScalarInstructionID::IXOR_R,
},
SlotLen::L3 => match gen.next_u8() & 3 {
0 => ScalarInstructionID::ISUB_R,
1 => ScalarInstructionID::IXOR_R,
2 => ScalarInstructionID::IMULH_R,
_ => ScalarInstructionID::ISMULH_R,
},
SlotLen::L4 if group == &DecoderGroup::D4444 && !is_last => ScalarInstructionID::IMUL_R,
SlotLen::L4 => match gen.next_u8() & 1 {
0 => ScalarInstructionID::IROR_C,
_ => ScalarInstructionID::IADD_RS,
},
SlotLen::L7 | SlotLen::L8 | SlotLen::L9 => match gen.next_u8() & 1 {
0 => ScalarInstructionID::IXOR_C,
_ => ScalarInstructionID::IADD_C,
},
SlotLen::L10 => ScalarInstructionID::IMUL_RCP,
};
Self {
id,
true_src: None,
src_to_give_register: get_src_to_give_register(gen, &id),
dst: None,
imm32: get_imm32(gen, &id),
mod_shift: get_mod_shift(gen, &id),
}
}
/// Set the source of the operation
fn set_src(&mut self, src: RGroupRegisterID) {
self.true_src = Some(src);
if self.src_to_give_register.is_none() {
// If the src_to_give_register field hasn't already been set then set it now.
// The only fields that have true_src as a register with a different src_to_give_register
// set this field at the start.
self.src_to_give_register = Some(OpSource::Register(src));
}
}
/// Select the source of this operation from the given registers.
///
/// If no registers are available [`false`] is returned.
pub fn select_source(
&mut self,
gen: &mut Blake2Generator,
cycle: usize,
registers_info: &RegistersInfo,
) -> bool {
let available_registers = registers_info.ready_at_cycle(cycle);
//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
if available_registers.len() == 2
&& self.id == ScalarInstructionID::IADD_RS
&& (available_registers[0].id() == RGroupRegisterID::R5
|| available_registers[1].id() == RGroupRegisterID::R5)
{
self.set_src(RGroupRegisterID::R5);
return true;
}
if let Some(reg) = select_register(gen, &available_registers) {
self.set_src(reg);
return true;
};
false
}
/// Selects the destination of this operation from the given registers.
///
/// If no registers are available [`false`] is returned.
fn select_destination(
&mut self,
gen: &mut Blake2Generator,
cycle: usize,
allow_chain_mul: bool,
registers_info: &RegistersInfo,
) -> bool {
let available_registers = registers_info
.iter()
.filter(|reg| {
reg.next_ready() <= cycle
&& (self.id.can_dst_be_src() || Some(reg.id()) != self.true_src)
&& (allow_chain_mul
|| self.id.instruction_group() != ScalarInstructionID::IMUL_R
|| reg.last_instruction() != Some(ScalarInstructionID::IMUL_R))
&& (Some(self.id.instruction_group()) != reg.last_instruction()
|| self.src_to_give_register != Some(reg.last_source()))
&& (reg.id() != RGroupRegisterID::R5 || self.id != ScalarInstructionID::IADD_RS)
})
.collect::<Vec<_>>();
let Some(reg) = select_register(gen, &available_registers) else {
return false;
};
self.dst = Some(reg);
true
}
fn construct(self) -> ScalarInstruction {
match self.id {
ScalarInstructionID::ISUB_R => ScalarInstruction::ISUB_R {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
},
ScalarInstructionID::IXOR_R => ScalarInstruction::IXOR_R {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
},
ScalarInstructionID::IADD_RS => ScalarInstruction::IADD_RS {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
mod_shift: self.mod_shift.unwrap(),
},
ScalarInstructionID::IMUL_R => ScalarInstruction::IMUL_R {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
},
ScalarInstructionID::IROR_C => ScalarInstruction::IROR_C {
dst: self.dst.unwrap(),
imm32: self.imm32.unwrap(),
},
ScalarInstructionID::IADD_C => ScalarInstruction::IADD_C {
dst: self.dst.unwrap(),
imm32: self.imm32.unwrap(),
},
ScalarInstructionID::IXOR_C => ScalarInstruction::IXOR_C {
dst: self.dst.unwrap(),
imm32: self.imm32.unwrap(),
},
ScalarInstructionID::IMULH_R => ScalarInstruction::IMULH_R {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
},
ScalarInstructionID::ISMULH_R => ScalarInstruction::ISMULH_R {
dst: self.dst.unwrap(),
src: self.true_src.unwrap(),
},
ScalarInstructionID::IMUL_RCP => ScalarInstruction::IMUL_RCP {
dst: self.dst.unwrap(),
imm32: self.imm32.unwrap(),
},
}
}
}
#[derive(Debug, Default)]
struct ProgramState {
/// The current cycle we are generating for.
current_cycle: usize,
/// The cycle the last operation will complete at.
last_op_completes_at: usize,
/// The amount of multiplication instructions the program
/// has generated.
mul_count: usize,
/// The amount of instructions in a row the program has thrown
/// away because they couldn't be completed.
throw_away_count: usize,
/// The execution port schedule of the program.
program_schedule: ProgramSchedule,
/// Information on the registers state.
registers_info: RegistersInfo,
/// The program
program: Vec<ScalarInstruction>,
}
impl ProgramState {
fn allow_chain_mul(&self) -> bool {
self.throw_away_count > 0
}
}
/// A state machine that controls instruction generation.
enum ScalarInstructionBuilderSM {
/// The generate instruction state, the next call will
/// start a new instruction.
Generate {
/// The last instruction generated.
last_instruction: Option<ScalarInstructionID>,
},
/// A partially completed instruction, the next call will
/// push this instruction forward.
PartiallyComplete {
/// The instruction currently being generated.
builder: ScalarInstructionBuilder,
/// The macro op of the instruction we are going
/// to do next.
macro_op_idx: usize,
},
/// NULL state, this state will only be finished on is the program is full.
Null,
}
impl ScalarInstructionBuilderSM {
pub fn push_forward(
&mut self,
gen: &mut Blake2Generator,
decoder_group: &DecoderGroup,
slot_len: &SlotLen,
is_last_slot: bool,
program_state: &mut ProgramState,
) {
loop {
match std::mem::replace(self, ScalarInstructionBuilderSM::Null) {
ScalarInstructionBuilderSM::Null => {
return;
}
ScalarInstructionBuilderSM::Generate { .. } => {
if program_state.program_schedule.is_full()
|| program_state.program.len() >= SUPERSCALAR_MAX_SIZE
{
return;
}
let builder =
ScalarInstructionBuilder::new(gen, slot_len, decoder_group, is_last_slot);
*self = ScalarInstructionBuilderSM::PartiallyComplete {
builder,
macro_op_idx: 0,
};
}
ScalarInstructionBuilderSM::PartiallyComplete {
mut builder,
mut macro_op_idx,
} => {
let top_cycle = program_state.current_cycle;
if macro_op_idx >= builder.id.number_of_macro_ops() {
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: Some(builder.id),
};
continue;
}
let Some(next_macro_op) = builder.id.macro_op(macro_op_idx) else {
unreachable!("We just checked if the macro op idx is too high")
};
let Some(opportunity) =
program_state.program_schedule.earliest_cycle_for_macro_op(
&next_macro_op,
program_state.current_cycle,
program_state.last_op_completes_at,
)
else {
program_state.program_schedule.set_full();
return;
};
let mut scheduled_cycle = opportunity.cycle();
if !Self::check_set_src(
&mut builder,
macro_op_idx,
gen,
&mut scheduled_cycle,
&mut program_state.current_cycle,
&program_state.registers_info,
) {
// If the source couldn't be set throw the instruction away
if program_state.throw_away_count < MAX_THROWAWAY_COUNT {
program_state.throw_away_count += 1;
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: Some(builder.id),
};
continue;
}
// If too many instructions are thrown away return for the next decoder
// idx
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: None,
};
return;
}
let allow_chain_mul = program_state.allow_chain_mul();
if !Self::check_set_dst(
&mut builder,
macro_op_idx,
gen,
&mut scheduled_cycle,
&mut program_state.current_cycle,
allow_chain_mul,
&program_state.registers_info,
) {
// If the source couldn't be set throw the instruction away
if program_state.throw_away_count < MAX_THROWAWAY_COUNT {
program_state.throw_away_count += 1;
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: Some(builder.id),
};
continue;
}
// If too many instructions are thrown away return for the next decoder
// idx
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: None,
};
return;
}
program_state.throw_away_count = 0;
let Some(scheduled_cycle) = program_state
.program_schedule
.schedule_macro_op_at_earliest(
&next_macro_op,
scheduled_cycle,
program_state.last_op_completes_at,
)
else {
program_state.program_schedule.set_full();
return;
};
let completes_at = scheduled_cycle + next_macro_op.cycles_to_complete();
program_state.last_op_completes_at = completes_at;
if macro_op_idx == builder.id.macro_op_to_store_res() {
let reg = program_state.registers_info.get_mut(builder.dst.unwrap());
reg.set_next_ready(completes_at);
reg.set_last_source(builder.src_to_give_register.unwrap());
reg.set_last_instruction(builder.id.instruction_group());
}
macro_op_idx += 1;
program_state.current_cycle = top_cycle;
if scheduled_cycle >= RANDOMX_SUPERSCALAR_LATENCY {
program_state.program_schedule.set_full();
}
if macro_op_idx >= builder.id.number_of_macro_ops() {
if builder.id.is_multiplication() {
program_state.mul_count += 1;
}
*self = ScalarInstructionBuilderSM::Generate {
last_instruction: Some(builder.id),
};
program_state.program.push(builder.construct());
} else {
*self = ScalarInstructionBuilderSM::PartiallyComplete {
builder,
macro_op_idx,
};
}
return;
}
}
}
}
/// Try set the instructions source.
///
/// Will return true if the src has been set or if its not the correct macro op to set the dst.
///
/// Will return false if its the correct macro op to set the dst and the src couldn't be set.
fn check_set_dst(
builder: &mut ScalarInstructionBuilder,
macro_op_idx: usize,
gen: &mut Blake2Generator,
scheduled_cycle: &mut usize,
cycle: &mut usize,
allow_chain_mul: bool,
registers_info: &RegistersInfo,
) -> bool {
if builder.id.macro_op_to_select_dst() != macro_op_idx {
// We don't need to set the src at this macro op.
return true;
}
let mut set = false;
for _ in 0..LOOK_FORWARD_CYCLES {
if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, registers_info) {
*scheduled_cycle += 1;
*cycle += 1;
} else {
set = true;
break;
}
}
set
}
/// Try set the instructions source.
///
/// Will return true if the src has been set or if its not he correct macro op to set the src.
///
/// Will return false if its the correct macro op to set the src and the src couldn't be set.
fn check_set_src(
builder: &mut ScalarInstructionBuilder,
macro_op_idx: usize,
gen: &mut Blake2Generator,
scheduled_cycle: &mut usize,
cycle: &mut usize,
registers_info: &RegistersInfo,
) -> bool {
if builder.id.macro_op_to_select_src() != Some(macro_op_idx) {
// We don't need to set the src at this macro op.
return true;
}
let mut set = false;
for _ in 0..LOOK_FORWARD_CYCLES {
if !builder.select_source(gen, *scheduled_cycle, registers_info) {
*scheduled_cycle += 1;
*cycle += 1;
} else {
set = true;
break;
}
}
set
}
pub fn get_instruction_id(&self) -> Option<ScalarInstructionID> {
match self {
ScalarInstructionBuilderSM::Generate { last_instruction } => *last_instruction,
ScalarInstructionBuilderSM::PartiallyComplete { builder, .. } => Some(builder.id),
ScalarInstructionBuilderSM::Null => {
panic!("Should not be calling this function in this state")
}
}
}
}
pub(crate) fn generate(gen: &mut Blake2Generator) -> SSProgram {
let mut program_state = ProgramState::default();
let mut instruction_sm = ScalarInstructionBuilderSM::Generate {
last_instruction: None,
};
for decoder_cycle in 0..RANDOMX_SUPERSCALAR_LATENCY {
if program_state.program_schedule.is_full()
|| program_state.program.len() >= SUPERSCALAR_MAX_SIZE
{
break;
}
let current_decode_group = DecoderGroup::next_group(
gen,
instruction_sm.get_instruction_id(),
program_state.mul_count < decoder_cycle + 1,
);
for (slot_len, is_last) in current_decode_group.iter_slot_len() {
instruction_sm.push_forward(
gen,
&current_decode_group,
&slot_len,
is_last,
&mut program_state,
);
}
program_state.current_cycle += 1;
}
//Calculate ASIC latency:
//Assumes 1 cycle latency for all operations and unlimited parallelization.
let mut asic_latencies = RGroupRegisters::default();
for instr in program_state.program.iter() {
let mut latency_dst = asic_latencies.get(&instr.dst());
latency_dst += 1;
let latency_src = if let Some(src) = instr.src() {
asic_latencies.get(&src) + 1
} else {
0
};
asic_latencies.set(&instr.dst(), latency_src.max(latency_dst));
}
let mut reg_with_max_latency = RGroupRegisterID::R0;
for reg in RGroupRegisterID::iter().skip(1) {
if asic_latencies.get(&reg) > asic_latencies.get(&reg_with_max_latency) {
reg_with_max_latency = reg
}
}
SSProgram {
program: program_state.program,
reg_with_max_latency,
}
}

View file

@ -1,264 +0,0 @@
use crate::registers::RGroupRegisterID;
use crate::superscalar::cpu::MacroOp;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[allow(non_camel_case_types)]
pub enum ScalarInstructionID {
/// dst = dst - src
ISUB_R,
/// dst = dst ^ src
IXOR_R,
/// dst = dst + (src << mod_shift)
IADD_RS,
/// dst = dst * src
IMUL_R,
/// dst = dst >>> imm32
IROR_C,
/// dst = dst + imm32
IADD_C,
/// dst = dst ^ imm32
IXOR_C,
/// dst = (dst * src) >> 64
IMULH_R,
/// dst = (dst * src) >> 64 (signed)
ISMULH_R,
/// dst = 2x / imm32 * dst
IMUL_RCP,
}
impl ScalarInstructionID {
pub fn macro_op_to_select_src(&self) -> Option<usize> {
match self {
ScalarInstructionID::ISUB_R
| ScalarInstructionID::IXOR_R
| ScalarInstructionID::IADD_RS
| ScalarInstructionID::IMUL_R => Some(0),
ScalarInstructionID::IROR_C
| ScalarInstructionID::IADD_C
| ScalarInstructionID::IXOR_C => None,
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => Some(1),
ScalarInstructionID::IMUL_RCP => None,
}
}
pub fn macro_op_to_select_dst(&self) -> usize {
match self {
ScalarInstructionID::IMUL_RCP => 1,
_ => 0,
}
}
pub fn macro_op_to_store_res(&self) -> usize {
match self {
ScalarInstructionID::IMULH_R
| ScalarInstructionID::ISMULH_R
| ScalarInstructionID::IMUL_RCP => 1,
_ => 0,
}
}
pub fn is_multiplication(&self) -> bool {
matches!(
self,
ScalarInstructionID::IMUL_R
| ScalarInstructionID::IMULH_R
| ScalarInstructionID::ISMULH_R
| ScalarInstructionID::IMUL_RCP
)
}
/// is the destination allowed to be the same as the source
pub fn can_dst_be_src(&self) -> bool {
matches!(
self,
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R
)
}
/// Returns the group of this operation.
///
/// A group is related instructions that effect register choice during program construction.
pub fn instruction_group(&self) -> ScalarInstructionID {
match self {
// The only 2 instructions in the same group is ISUB_R & IADD_RS
// We could make group an enum but for just these 2 i don't think
// it's worth it.
ScalarInstructionID::ISUB_R => ScalarInstructionID::IADD_RS,
id => *id,
}
}
pub fn number_of_macro_ops(&self) -> usize {
match self {
ScalarInstructionID::ISUB_R
| ScalarInstructionID::IXOR_R
| ScalarInstructionID::IADD_RS
| ScalarInstructionID::IMUL_R
| ScalarInstructionID::IROR_C
| ScalarInstructionID::IADD_C
| ScalarInstructionID::IXOR_C => 1,
ScalarInstructionID::IMULH_R | ScalarInstructionID::ISMULH_R => 3,
ScalarInstructionID::IMUL_RCP => 2,
}
}
pub fn macro_op(&self, i: usize) -> Option<MacroOp> {
Some(match self {
ScalarInstructionID::ISUB_R => MacroOp::SUB_RR,
ScalarInstructionID::IXOR_R => MacroOp::XOR_RR,
ScalarInstructionID::IADD_RS => MacroOp::LEA_SIB,
ScalarInstructionID::IMUL_R => MacroOp::IMUL_RR { dependant: false },
ScalarInstructionID::IROR_C => MacroOp::ROR_RI,
ScalarInstructionID::IADD_C => MacroOp::ADD_RI,
ScalarInstructionID::IXOR_C => MacroOp::XOR_RI,
ScalarInstructionID::IMULH_R => match i {
0 => MacroOp::MOV_RR,
1 => MacroOp::MUL_R,
2 => MacroOp::MOV_RR,
_ => return None,
},
ScalarInstructionID::ISMULH_R => match i {
0 => MacroOp::MOV_RR,
1 => MacroOp::IMUL_R,
2 => MacroOp::MOV_RR,
_ => return None,
},
ScalarInstructionID::IMUL_RCP => match i {
0 => MacroOp::MOV_RI,
1 => MacroOp::IMUL_RR { dependant: true },
_ => return None,
},
})
}
}
#[derive(Debug, Copy, Clone)]
#[allow(non_camel_case_types)]
pub enum ScalarInstruction {
/// dst = dst - src
ISUB_R {
dst: RGroupRegisterID,
src: RGroupRegisterID,
},
/// dst = dst ^ src
IXOR_R {
dst: RGroupRegisterID,
src: RGroupRegisterID,
},
/// dst = dst + (src << mod_shift)
IADD_RS {
dst: RGroupRegisterID,
src: RGroupRegisterID,
mod_shift: u8,
},
/// dst = dst * src
IMUL_R {
dst: RGroupRegisterID,
src: RGroupRegisterID,
},
/// dst = dst >>> imm32
IROR_C { dst: RGroupRegisterID, imm32: u32 },
/// dst = dst + imm32
IADD_C { dst: RGroupRegisterID, imm32: u32 },
/// dst = dst ^ imm32
IXOR_C { dst: RGroupRegisterID, imm32: u32 },
/// dst = (dst * src) >> 64
IMULH_R {
dst: RGroupRegisterID,
src: RGroupRegisterID,
},
/// dst = (dst * src) >> 64 (signed)
ISMULH_R {
dst: RGroupRegisterID,
src: RGroupRegisterID,
},
/// dst = 2x / imm32 * dst
IMUL_RCP { dst: RGroupRegisterID, imm32: u32 },
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum OpSource {
Constant,
Register(RGroupRegisterID),
/// Not actually a source, but the C++ version sets this field to a
/// random value on some instructions.
Randi32(i32),
}
impl OpSource {
pub fn from_rand_i32(x: i32) -> Self {
match x {
-1 => OpSource::Constant,
0 => OpSource::Register(RGroupRegisterID::R0),
1 => OpSource::Register(RGroupRegisterID::R1),
2 => OpSource::Register(RGroupRegisterID::R2),
3 => OpSource::Register(RGroupRegisterID::R3),
4 => OpSource::Register(RGroupRegisterID::R4),
5 => OpSource::Register(RGroupRegisterID::R5),
6 => OpSource::Register(RGroupRegisterID::R6),
7 => OpSource::Register(RGroupRegisterID::R7),
rand => OpSource::Randi32(rand),
}
}
}
impl ScalarInstruction {
pub fn dst(&self) -> RGroupRegisterID {
match self {
ScalarInstruction::ISUB_R { dst, .. }
| ScalarInstruction::IXOR_R { dst, .. }
| ScalarInstruction::IADD_RS { dst, .. }
| ScalarInstruction::IMUL_R { dst, .. }
| ScalarInstruction::IROR_C { dst, .. }
| ScalarInstruction::IADD_C { dst, .. }
| ScalarInstruction::IXOR_C { dst, .. }
| ScalarInstruction::IMULH_R { dst, .. }
| ScalarInstruction::ISMULH_R { dst, .. }
| ScalarInstruction::IMUL_RCP { dst, .. } => *dst,
}
}
pub fn src(&self) -> Option<RGroupRegisterID> {
match self {
ScalarInstruction::ISUB_R { src, .. }
| ScalarInstruction::IXOR_R { src, .. }
| ScalarInstruction::IADD_RS { src, .. }
| ScalarInstruction::IMUL_R { src, .. }
| ScalarInstruction::IMULH_R { src, .. }
| ScalarInstruction::ISMULH_R { src, .. } => Some(*src),
ScalarInstruction::IROR_C { .. }
| ScalarInstruction::IADD_C { .. }
| ScalarInstruction::IXOR_C { .. }
| ScalarInstruction::IMUL_RCP { .. } => None,
}
}
pub fn id(&self) -> ScalarInstructionID {
match self {
ScalarInstruction::ISUB_R { .. } => ScalarInstructionID::ISUB_R,
ScalarInstruction::IXOR_R { .. } => ScalarInstructionID::IXOR_R,
ScalarInstruction::IADD_RS { .. } => ScalarInstructionID::IADD_RS,
ScalarInstruction::IMUL_R { .. } => ScalarInstructionID::IMUL_R,
ScalarInstruction::IROR_C { .. } => ScalarInstructionID::IROR_C,
ScalarInstruction::IADD_C { .. } => ScalarInstructionID::IADD_C,
ScalarInstruction::IXOR_C { .. } => ScalarInstructionID::IXOR_C,
ScalarInstruction::IMULH_R { .. } => ScalarInstructionID::IMULH_R,
ScalarInstruction::ISMULH_R { .. } => ScalarInstructionID::ISMULH_R,
ScalarInstruction::IMUL_RCP { .. } => ScalarInstructionID::IMUL_RCP,
}
}
pub fn op_source(&self) -> OpSource {
match self {
ScalarInstruction::ISUB_R { src, .. }
| ScalarInstruction::IXOR_R { src, .. }
| ScalarInstruction::IADD_RS { src, .. }
| ScalarInstruction::IMUL_R { src, .. }
| ScalarInstruction::IMULH_R { src, .. }
| ScalarInstruction::ISMULH_R { src, .. } => OpSource::Register(*src),
ScalarInstruction::IROR_C { .. }
| ScalarInstruction::IADD_C { .. }
| ScalarInstruction::IXOR_C { .. }
| ScalarInstruction::IMUL_RCP { .. } => OpSource::Constant,
}
}
}