From b6c4adc83a199886d6f932c1321857fb8a535af5 Mon Sep 17 00:00:00 2001 From: SyntheticBird <118022351+SyntheticBird45@users.noreply.github.com> Date: Sat, 2 Nov 2024 00:45:56 +0000 Subject: [PATCH] p2p: Implement P2P Bucket data structure (#329) Implements P2P Bucket data structure This commit implements a "Bucket" data structure that is a collection of data that discriminates its items into "buckets" (vector of size N) following a defined function. - Implements Bucket data structure and Bucketable trait - Implements Bucketable for Ipv4Addr - Added the crate to the workspace dependencies - Added arrayvec as a dependency --- Cargo.lock | 8 + Cargo.toml | 3 + books/architecture/src/appendix/crates.md | 1 + p2p/bucket/Cargo.toml | 13 ++ p2p/bucket/src/lib.rs | 172 ++++++++++++++++++++++ 5 files changed, 197 insertions(+) create mode 100644 p2p/bucket/Cargo.toml create mode 100644 p2p/bucket/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 7ad2f2ac..9a0ebd5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -855,6 +855,14 @@ dependencies = [ "tracing", ] +[[package]] +name = "cuprate-p2p-bucket" +version = "0.1.0" +dependencies = [ + "arrayvec", + "rand", +] + [[package]] name = "cuprate-p2p-core" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d5aca71e..614788d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "net/wire", "p2p/p2p", "p2p/p2p-core", + "p2p/bucket", "p2p/dandelion-tower", "p2p/async-buffer", "p2p/address-book", @@ -64,6 +65,7 @@ cuprate-levin = { path = "net/levin" ,default-feature cuprate-wire = { path = "net/wire" ,default-features = false} cuprate-p2p = { path = "p2p/p2p" ,default-features = false} cuprate-p2p-core = { path = "p2p/p2p-core" ,default-features = false} +cuprate-p2p-bucket = { path = "p2p/p2p-bucket" ,default-features = false} cuprate-dandelion-tower = { path = "p2p/dandelion-tower" ,default-features = false} cuprate-async-buffer = { path = "p2p/async-buffer" ,default-features = false} cuprate-address-book = { path = "p2p/address-book" ,default-features = false} @@ -80,6 +82,7 @@ cuprate-rpc-interface = { path = "rpc/interface" ,default-feature # External dependencies anyhow = { version = "1.0.89", default-features = false } +arrayvec = { version = "0.7", default-features = false } async-trait = { version = "0.1.82", default-features = false } bitflags = { version = "2.6.0", default-features = false } blake3 = { version = "1", default-features = false } diff --git a/books/architecture/src/appendix/crates.md b/books/architecture/src/appendix/crates.md index fe8f1f05..ac2780e1 100644 --- a/books/architecture/src/appendix/crates.md +++ b/books/architecture/src/appendix/crates.md @@ -35,6 +35,7 @@ cargo doc --open --package cuprate-blockchain | [`cuprate-async-buffer`](https://doc.cuprate.org/cuprate_async_buffer) | [`p2p/async-buffer/`](https://github.com/Cuprate/cuprate/tree/main/p2p/async-buffer) | A bounded SPSC, FIFO, asynchronous buffer that supports arbitrary weights for values | [`cuprate-dandelion-tower`](https://doc.cuprate.org/cuprate_dandelion_tower) | [`p2p/dandelion-tower/`](https://github.com/Cuprate/cuprate/tree/main/p2p/dandelion-tower) | TODO | [`cuprate-p2p`](https://doc.cuprate.org/cuprate_p2p) | [`p2p/p2p/`](https://github.com/Cuprate/cuprate/tree/main/p2p/p2p) | TODO +| [`cuprate-p2p-bucket`](https://doc.cuprate.org/cuprate_p2p_bucket) | [`p2p/bucket/`](https://github.com/Cuprate/cuprate/tree/main/p2p/bucket) | A collection data structure discriminating its items into "buckets" of limited size. | [`cuprate-p2p-core`](https://doc.cuprate.org/cuprate_p2p_core) | [`p2p/p2p-core/`](https://github.com/Cuprate/cuprate/tree/main/p2p/p2p-core) | TODO ## Storage diff --git a/p2p/bucket/Cargo.toml b/p2p/bucket/Cargo.toml new file mode 100644 index 00000000..1a53e85a --- /dev/null +++ b/p2p/bucket/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "cuprate-p2p-bucket" +version = "0.1.0" +edition = "2021" +license = "MIT" +authors = ["SyntheticBird"] + +[dependencies] +arrayvec = { workspace = true } +rand = { workspace = true, features = ["std", "std_rng"]} + +[lints] +workspace = true diff --git a/p2p/bucket/src/lib.rs b/p2p/bucket/src/lib.rs new file mode 100644 index 00000000..0f73eea2 --- /dev/null +++ b/p2p/bucket/src/lib.rs @@ -0,0 +1,172 @@ +//! Bucket data structure +//! +//! A collection data structure that discriminates its unique items and place them into "buckets". +//! +//! The item must implement the [`Bucketable`] trait that defines how to create the discriminant +//! from the item type. The data structure will internally contain any item into "buckets" or vectors +//! of sized capacity `N` that regroup all the stored items with this specific discriminant. +//! +//! A practical example of this data structure is for storing `N` amount of IP discriminated by their subnets. +//! You can store in each "buckets" corresponding to a `/16` subnet up to `N` IPs of that subnet. +//! +//! # Example +//! +//! ``` +//! use cuprate_p2p_bucket::Bucket; +//! use std::net::Ipv4Addr; +//! +//! // Create a new bucket that can store at most 2 IPs in a particular `/16` subnet. +//! let mut bucket = Bucket::<2,Ipv4Addr>::new(); +//! +//! // Fulfill the `96.96.0.0/16` bucket. +//! bucket.push("96.96.0.1".parse().unwrap()); +//! bucket.push("96.96.0.2".parse().unwrap()); +//! assert_eq!(2, bucket.len()); +//! assert_eq!(2, bucket.len_bucket(&[96_u8,96_u8]).unwrap()); +//! +//! // Push a new IP from another subnet +//! bucket.push("127.0.0.1".parse().unwrap()); +//! assert_eq!(3, bucket.len()); +//! assert_eq!(2, bucket.len_bucket(&[96_u8,96_u8]).unwrap()); +//! assert_eq!(1, bucket.len_bucket(&[127_u8,0_u8]).unwrap()); +//! +//! // Attempting to push a new IP within `96.96.0.0/16` bucket will return the IP back +//! // as this subnet is already full. +//! let pushed = bucket.push("96.96.0.3".parse().unwrap()); +//! assert!(pushed.is_some()); +//! assert_eq!(2, bucket.len_bucket(&[96_u8,96_u8]).unwrap()); +//! +//! ``` + +use arrayvec::{ArrayVec, CapacityError}; +use rand::random; + +use std::{collections::BTreeMap, net::Ipv4Addr}; + +/// A discriminant that can be computed from the type. +pub trait Bucketable: Sized + Eq + Clone { + /// The type of the discriminant being used in the Binary tree. + type Discriminant: Ord + AsRef<[u8]>; + + /// Method that can compute the discriminant from the item. + fn discriminant(&self) -> Self::Discriminant; +} + +/// A collection data structure discriminating its unique items +/// with a specified method. Limiting the amount of items stored +/// with that discriminant to the const `N`. +pub struct Bucket { + /// The storage of the bucket + storage: BTreeMap>, +} + +impl Bucket { + /// Create a new Bucket + pub const fn new() -> Self { + Self { + storage: BTreeMap::new(), + } + } + + /// Push a new element into the Bucket + /// + /// Will internally create a new vector for each new discriminant being + /// generated from an item. + /// + /// This function WILL NOT push the element if it already exists. + /// + /// Return `None` if the item has been pushed or ignored. `Some(I)` if + /// the vector is full. + /// + /// # Example + /// + /// ``` + /// use cuprate_p2p_bucket::Bucket; + /// use std::net::Ipv4Addr; + /// + /// let mut bucket = Bucket::<8,Ipv4Addr>::new(); + /// + /// // Push a first IP address. + /// bucket.push("127.0.0.1".parse().unwrap()); + /// assert_eq!(1, bucket.len()); + /// + /// // Push the same IP address a second time. + /// bucket.push("127.0.0.1".parse().unwrap()); + /// assert_eq!(1, bucket.len()); + /// ``` + pub fn push(&mut self, item: I) -> Option { + let discriminant = item.discriminant(); + + if let Some(vec) = self.storage.get_mut(&discriminant) { + // Push the item if it doesn't exist. + if !vec.contains(&item) { + return vec.try_push(item).err().map(CapacityError::element); + } + } else { + // Initialize the vector if not found. + let mut vec = ArrayVec::::new(); + vec.push(item); + self.storage.insert(discriminant, vec); + } + + None + } + + /// Will attempt to remove an item from the bucket. + pub fn remove(&mut self, item: &I) -> Option { + self.storage.get_mut(&item.discriminant()).and_then(|vec| { + vec.iter() + .enumerate() + .find_map(|(i, v)| (item == v).then_some(i)) + .map(|index| vec.swap_remove(index)) + }) + } + + /// Return the number of item stored within the storage + pub fn len(&self) -> usize { + self.storage.values().map(ArrayVec::len).sum() + } + + /// Return the number of item stored with a specific discriminant. + /// + /// This method returns None if the bucket with this discriminant + /// doesn't exist. + pub fn len_bucket(&self, discriminant: &I::Discriminant) -> Option { + self.storage.get(discriminant).map(ArrayVec::len) + } + + /// Return `true` if the storage contains no items + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return a reference to an item chosen at random. + /// + /// Repeated use of this function will provide a normal distribution of + /// items based on their discriminants. + pub fn get_random(&mut self) -> Option<&I> { + // Get the total amount of discriminants to explore. + let len = self.storage.len(); + + // Get a random bucket. + let (_, vec) = self.storage.iter().nth(random::() / len).unwrap(); + + // Return a reference chose at random. + vec.get(random::() / vec.len()) + } +} + +impl Default for Bucket { + fn default() -> Self { + Self::new() + } +} + +impl Bucketable for Ipv4Addr { + /// We are discriminating by `/16` subnets. + type Discriminant = [u8; 2]; + + fn discriminant(&self) -> Self::Discriminant { + [self.octets()[0], self.octets()[1]] + } +}