From 545189f523bb9995031c9543eee607dbdf51d041 Mon Sep 17 00:00:00 2001 From: Boog900 <54e72d8a-345f-4599-bd90-c6b9bc7d0ec5@aleeas.com> Date: Sun, 17 Sep 2023 19:01:56 +0100 Subject: [PATCH] random-x: add dataset/ cache --- random-x/Cargo.toml | 5 +- random-x/src/dataset.rs | 147 ++++++++++++++++++++--- random-x/src/lib.rs | 4 +- random-x/src/registers/integer.rs | 4 + random-x/src/superscalar.rs | 13 +- random-x/src/superscalar/cpu.rs | 1 + random-x/src/superscalar/generator.rs | 13 +- random-x/src/superscalar/instructions.rs | 2 +- 8 files changed, 158 insertions(+), 31 deletions(-) diff --git a/random-x/Cargo.toml b/random-x/Cargo.toml index 22a6f4df..eae01067 100644 --- a/random-x/Cargo.toml +++ b/random-x/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [features] default = ["jit"] jit = ["dep:dynasmrt"] +rayon = ["dep:rayon"] [dependencies] blake2 = "0.10" @@ -15,5 +16,7 @@ hex-literal = "0.4" dynasmrt = {version = "2.0.0", optional = true} +rayon = {version ="1.7", optional = true} + [profile.dev] -opt-level = 2 \ No newline at end of file +opt-level = 3 \ No newline at end of file diff --git a/random-x/src/dataset.rs b/random-x/src/dataset.rs index 8415eea4..481a8207 100644 --- a/random-x/src/dataset.rs +++ b/random-x/src/dataset.rs @@ -1,17 +1,20 @@ -use argon2::{Algorithm, Argon2, Block, Params, Version}; use std::sync::{Arc, RwLock}; +use argon2::{Algorithm, Argon2, Block, Params, Version}; +#[cfg(feature = "rayon")] +use rayon::prelude::*; + use crate::blake2_generator::Blake2Generator; use crate::{ config::{ RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_LANES, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_SALT, - RANDOMX_CACHE_ACCESSES, + RANDOMX_CACHE_ACCESSES, RANDOMX_DATASET_SIZE, }, + registers::{RGroupRegisterID, RGroupRegisters}, superscalar::SSProgram, }; -trait Dataset {} - +/// Generates the memory blocks used in the cache fn argon2_blocks(key: &[u8]) -> Box<[Block]> { let params = Params::new( RANDOMX_ARGON_MEMORY, @@ -35,6 +38,31 @@ fn argon2_blocks(key: &[u8]) -> Box<[Block]> { blocks } +/// The Cache. +/// +/// The cache is used during light verification. +/// Internally this struct is a wrapper around an [`Arc`] internal cache, this allows +/// cheep clones and allows the cache to be shared between VMs on different threads. +#[derive(Debug, Clone)] +pub struct Cache { + internal_cache: Arc>, +} + +impl Cache { + /// Initialises the cache with the provided key. + /// + /// The key must be between 1-60 bytes (inclusive) otherwise this will panic. + pub fn init(key: &[u8]) -> Self { + let internal_cache = InternalCache::init(key); + Cache { + internal_cache: Arc::new(RwLock::new(internal_cache)), + } + } +} + +/// The internal cache structure, used during light verification. +#[derive(Debug)] + struct InternalCache { memory_blocks: Box<[Block]>, programs: Vec, @@ -55,32 +83,111 @@ impl InternalCache { programs, } } + + /// Gets an item from the cache at the specified index. + fn get_item(&self, idx: usize) -> [u64; 8] { + // one item is 8 u64s + // mask = (blocks in cache * bytes in a block / size of item) minus one. + let mask = (self.memory_blocks.len() * 1024 / 64) - 1; + // and the idx with the mask this is the same as doing mod (self.memory_blocks.len() * 1024 / 64) + let idx = idx & mask; + + // block_idx = idx divided by amount of items in a block + let block_idx = idx / (1024 / 64); + // idx * 8 is to get the idx of a single u64 + // we mask with amount of u64s in a block minus 1 which is the same as doing + // mod the amount of instructions in a block. + let block_u64_start = (idx * 8) & 127; + // The plus 8 cannot overflow as (idx * 8) & 127 wont give a number bigger than 120 + return self.memory_blocks[block_idx].as_ref()[block_u64_start..block_u64_start + 8] + .try_into() + .unwrap(); + } + + /// Generates the dataset item at the specified index. + fn init_data_set_item(&self, item_number: usize) -> [u64; 8] { + let mut registers = RGroupRegisters::default(); + registers.set( + &RGroupRegisterID::R0, + (TryInto::::try_into(item_number).unwrap() + 1_u64) + .wrapping_mul(6364136223846793005_u64), + ); + + let mut init_reg = |dst, val: u64| { + registers.apply_to_dst_with_src(&dst, &RGroupRegisterID::R0, |_, src| src ^ val) + }; + + init_reg(RGroupRegisterID::R1, 9298411001130361340); + init_reg(RGroupRegisterID::R2, 12065312585734608966); + init_reg(RGroupRegisterID::R3, 9306329213124626780); + init_reg(RGroupRegisterID::R4, 5281919268842080866); + init_reg(RGroupRegisterID::R5, 10536153434571861004); + init_reg(RGroupRegisterID::R6, 3398623926847679864); + init_reg(RGroupRegisterID::R7, 9549104520008361294); + + let mut cache_index = item_number; + + for program in &self.programs { + program.execute(&mut registers); + + let cache_item = self.get_item(cache_index); + for (reg_id, item) in RGroupRegisterID::iter().zip(cache_item) { + registers.apply_to_dst(®_id, |dst| dst ^ item); + } + + cache_index = registers + .get(&program.reg_with_max_latency()) + .try_into() + .expect("u64 does not fit into usize"); + } + registers.inner() + } } -pub struct Cache { - internal_cache: Arc>, +/// The Dataset used during mining. +/// +/// Internally this struct is a wrapper around an [`Arc`] internal dataset, this allows +/// cheep clones and allows the dataset to be shared between VMs on different threads. +#[derive(Debug, Clone)] +pub struct Dataset { + internal_dataset: Arc>, } +impl Dataset { + /// Initialises the dataset with the provided key. + /// + /// The key must be between 1-60 bytes (inclusive) otherwise this will panic. + /// + /// This is very computationally intense so might take a long time to complete. + pub fn init(key: &[u8]) -> Dataset { + let internal_dataset = InternalDataset::init(key); + Dataset { + internal_dataset: Arc::new(RwLock::new(internal_dataset)), + } + } +} + +/// The internal dataset used during mining. +#[derive(Debug)] struct InternalDataset { - dataset: Vec, + dataset: Vec<[u64; 8]>, } impl InternalDataset { fn init(key: &[u8]) -> InternalDataset { let cache = InternalCache::init(key); - let + #[cfg(feature = "rayon")] + let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8)) + .into_par_iter() + .map(|i| cache.init_data_set_item(i)) + .collect(); + + #[cfg(not(feature = "rayon"))] + let dataset: Vec<[u64; 8]> = (0..RANDOMX_DATASET_SIZE / (64 * 8)) + .map(|i| cache.init_data_set_item(i)) + .collect(); + + Self { dataset } } } - -fn init_data_set_item(cache: &InternalCache, item: u64) -> [u64; 8] { - let -} - -// 12118971377224777581 -#[test] -fn init() { - let mem = InternalCache::init(&[5]); - - println!("{:?}", mem.memory_blocks[1000]) -} diff --git a/random-x/src/lib.rs b/random-x/src/lib.rs index 18491b74..0b98002b 100644 --- a/random-x/src/lib.rs +++ b/random-x/src/lib.rs @@ -1,10 +1,12 @@ mod aes_hash; mod blake2_generator; mod config; -//mod dataset; +mod dataset; mod registers; mod superscalar; +pub use dataset::{Cache, Dataset}; + fn is_0_or_power_of_2(x: u64) -> bool { (x & (x - 1)) == 0 } diff --git a/random-x/src/registers/integer.rs b/random-x/src/registers/integer.rs index c37283c8..979a4337 100644 --- a/random-x/src/registers/integer.rs +++ b/random-x/src/registers/integer.rs @@ -31,6 +31,10 @@ impl RGroupRegisterID { pub struct RGroupRegisters([u64; 8]); impl RGroupRegisters { + pub fn inner(self) -> [u64; 8] { + self.0 + } + pub fn apply_to_dst(&mut self, dst: &RGroupRegisterID, f: impl FnOnce(u64) -> u64) { *self.get_mut(dst) = f(self.get(dst)); } diff --git a/random-x/src/superscalar.rs b/random-x/src/superscalar.rs index 74cf3d00..94d3a759 100644 --- a/random-x/src/superscalar.rs +++ b/random-x/src/superscalar.rs @@ -2,13 +2,16 @@ mod cpu; mod executor; mod generator; mod instructions; +mod program; use crate::blake2_generator::Blake2Generator; -use crate::registers::RGroupRegisterID; +use crate::registers::{RGroupRegisterID, RGroupRegisters}; +use executor::execute; use generator::generate; use instructions::ScalarInstruction; +#[derive(Debug)] pub(crate) struct SSProgram { program: Vec, reg_with_max_latency: RGroupRegisterID, @@ -18,4 +21,12 @@ impl SSProgram { pub fn generate(gen: &mut Blake2Generator) -> Self { generate(gen) } + + pub fn execute(&self, registers: &mut RGroupRegisters) { + execute(&self.program, registers) + } + + pub fn reg_with_max_latency(&self) -> RGroupRegisterID { + self.reg_with_max_latency + } } diff --git a/random-x/src/superscalar/cpu.rs b/random-x/src/superscalar/cpu.rs index 009fd43f..42b02982 100644 --- a/random-x/src/superscalar/cpu.rs +++ b/random-x/src/superscalar/cpu.rs @@ -37,6 +37,7 @@ impl AllowedPorts { } } +#[allow(non_camel_case_types)] pub enum MacroOp { SUB_RR, XOR_RR, diff --git a/random-x/src/superscalar/generator.rs b/random-x/src/superscalar/generator.rs index 57f7d082..78bddf16 100644 --- a/random-x/src/superscalar/generator.rs +++ b/random-x/src/superscalar/generator.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use crate::config::SUPERSCALAR_MAX_SIZE; use crate::registers::{RGroupRegisterID, RGroupRegisters}; -use crate::superscalar::cpu::{MacroOp, ProgramSchedule, SlotLen}; +use crate::superscalar::cpu::{ProgramSchedule, SlotLen}; use crate::superscalar::instructions::ScalarInstruction; use crate::superscalar::SSProgram; use crate::{ @@ -497,7 +497,7 @@ enum ScalarInstructionBuilderSM { macro_op_idx: usize, }, /// NULL state, this state will only be finished on is the program is full. - NULL, + Null, } impl ScalarInstructionBuilderSM { @@ -510,8 +510,8 @@ impl ScalarInstructionBuilderSM { program_state: &mut ProgramState, ) { loop { - match std::mem::replace(self, ScalarInstructionBuilderSM::NULL) { - ScalarInstructionBuilderSM::NULL => { + match std::mem::replace(self, ScalarInstructionBuilderSM::Null) { + ScalarInstructionBuilderSM::Null => { return; } ScalarInstructionBuilderSM::Generate { .. } => { @@ -682,8 +682,7 @@ impl ScalarInstructionBuilderSM { let mut set = false; for _ in 0..LOOK_FORWARD_CYCLES { - if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, ®isters_info) - { + if !builder.select_destination(gen, *scheduled_cycle, allow_chain_mul, registers_info) { *scheduled_cycle += 1; *cycle += 1; } else { @@ -731,7 +730,7 @@ impl ScalarInstructionBuilderSM { match self { ScalarInstructionBuilderSM::Generate { last_instruction } => *last_instruction, ScalarInstructionBuilderSM::PartiallyComplete { builder, .. } => Some(builder.id), - ScalarInstructionBuilderSM::NULL => { + ScalarInstructionBuilderSM::Null => { panic!("Should not be calling this function in this state") } } diff --git a/random-x/src/superscalar/instructions.rs b/random-x/src/superscalar/instructions.rs index 7c84fabc..2f504ed6 100644 --- a/random-x/src/superscalar/instructions.rs +++ b/random-x/src/superscalar/instructions.rs @@ -1,5 +1,5 @@ use crate::registers::RGroupRegisterID; -use crate::superscalar::cpu::{MacroOp, SlotLen}; +use crate::superscalar::cpu::MacroOp; #[derive(Debug, Copy, Clone, Eq, PartialEq)] #[allow(non_camel_case_types)]