diff --git a/Cargo.lock b/Cargo.lock index 57e438de..08edc496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1314,6 +1314,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "cbor4ii" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b4c883b9cc4757b061600d39001d4d0232bece4a3174696cf8f58a14db107d" +dependencies = [ + "serde", +] + [[package]] name = "cc" version = "1.0.88" @@ -4120,6 +4129,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8e3b4d67870478db72bac87bfc260ee6641d0734e0e3e275798f089c3fecfd4" dependencies = [ "async-trait", + "cbor4ii", "futures", "instant", "libp2p-core", @@ -4127,6 +4137,7 @@ dependencies = [ "libp2p-swarm", "log", "rand", + "serde", "smallvec", "void", ] diff --git a/coordinator/Cargo.toml b/coordinator/Cargo.toml index 12f8e763..d12c3933 100644 --- a/coordinator/Cargo.toml +++ b/coordinator/Cargo.toml @@ -51,7 +51,7 @@ env_logger = { version = "0.10", default-features = false, features = ["humantim futures-util = { version = "0.3", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = ["rt-multi-thread", "sync", "time", "macros"] } -libp2p = { version = "0.52", default-features = false, features = ["tokio", "tcp", "noise", "yamux", "gossipsub", "macros"] } +libp2p = { version = "0.52", default-features = false, features = ["tokio", "tcp", "noise", "yamux", "cbor", "request-response", "gossipsub", "macros"] } [dev-dependencies] tributary = { package = "tributary-chain", path = "./tributary", features = ["tests"] } diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index e6e49c3e..4de23ae0 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -1292,7 +1292,6 @@ pub async fn run( p2p.clone(), cosign_channel.clone(), tributary_event_listener_4, - ::generator() * key.deref(), )); // Handle all messages from processors diff --git a/coordinator/src/p2p.rs b/coordinator/src/p2p.rs index 6c845f4e..4e476c36 100644 --- a/coordinator/src/p2p.rs +++ b/coordinator/src/p2p.rs @@ -9,8 +9,6 @@ use std::{ use async_trait::async_trait; use rand_core::{RngCore, OsRng}; -use ciphersuite::{Ciphersuite, Ristretto}; - use scale::Encode; use borsh::{BorshSerialize, BorshDeserialize}; use serai_client::{primitives::NetworkId, validator_sets::primitives::ValidatorSet, Serai}; @@ -23,12 +21,16 @@ use tokio::{ time::sleep, }; +// TODO: Remove cbor use libp2p::{ core::multiaddr::{Protocol, Multiaddr}, identity::Keypair, PeerId, tcp::Config as TcpConfig, noise, yamux, + request_response::{ + Config as RrConfig, Message as RrMessage, Event as RrEvent, cbor::Behaviour as RrBehavior, + }, gossipsub::{ IdentTopic, FastMessageId, MessageId, MessageAuthenticity, ValidationMode, ConfigBuilder, IdentityTransform, AllowAllSubscriptionFilter, Event as GsEvent, PublishError, @@ -135,14 +137,14 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p { async fn subscribe(&self, set: ValidatorSet, genesis: [u8; 32]); async fn unsubscribe(&self, set: ValidatorSet, genesis: [u8; 32]); - async fn send_raw(&self, to: Self::Id, genesis: Option<[u8; 32]>, msg: Vec); - async fn broadcast_raw(&self, genesis: Option<[u8; 32]>, msg: Vec); + async fn send_raw(&self, to: Self::Id, msg: Vec); + async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec); async fn receive_raw(&self) -> (Self::Id, Vec); async fn send(&self, to: Self::Id, kind: P2pMessageKind, msg: Vec) { let mut actual_msg = kind.serialize(); actual_msg.extend(msg); - self.send_raw(to, kind.genesis(), actual_msg).await; + self.send_raw(to, actual_msg).await; } async fn broadcast(&self, kind: P2pMessageKind, msg: Vec) { let mut actual_msg = kind.serialize(); @@ -159,7 +161,7 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p { } ); */ - self.broadcast_raw(kind.genesis(), actual_msg).await; + self.broadcast_raw(kind, actual_msg).await; } async fn receive(&self) -> Message { let (sender, kind, msg) = loop { @@ -194,6 +196,7 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p { #[derive(NetworkBehaviour)] struct Behavior { + reqres: RrBehavior, Vec>, gossipsub: GsBehavior, } @@ -201,7 +204,8 @@ struct Behavior { #[derive(Clone)] pub struct LibP2p { subscribe: Arc>>, - broadcast: Arc, Vec)>>>, + send: Arc)>>>, + broadcast: Arc)>>>, receive: Arc)>>>, } impl fmt::Debug for LibP2p { @@ -221,6 +225,7 @@ impl LibP2p { let throwaway_key_pair = Keypair::generate_ed25519(); let behavior = Behavior { + reqres: { RrBehavior::new([], RrConfig::default()) }, gossipsub: { let heartbeat_interval = tributary::tendermint::LATENCY_TIME / 2; let heartbeats_per_block = @@ -284,6 +289,7 @@ impl LibP2p { const PORT: u16 = 30563; // 5132 ^ (('c' << 8) | 'o') swarm.listen_on(format!("/ip4/0.0.0.0/tcp/{PORT}").parse().unwrap()).unwrap(); + let (send_send, mut send_recv) = mpsc::unbounded_channel(); let (broadcast_send, mut broadcast_recv) = mpsc::unbounded_channel(); let (receive_send, receive_recv) = mpsc::unbounded_channel(); let (subscribe_send, mut subscribe_recv) = mpsc::unbounded_channel(); @@ -486,17 +492,32 @@ impl LibP2p { } } + msg = send_recv.recv() => { + let (peer, msg): (PeerId, Vec) = + msg.expect("send_recv closed. are we shutting down?"); + swarm.behaviour_mut().reqres.send_request(&peer, msg); + }, + // Handle any queued outbound messages msg = broadcast_recv.recv() => { - let (genesis, msg): (Option<[u8; 32]>, Vec) = + let (kind, msg): (P2pMessageKind, Vec) = msg.expect("broadcast_recv closed. are we shutting down?"); - let set = genesis.and_then(|genesis| set_for_genesis.get(&genesis).copied()); - broadcast_raw( - &mut swarm, - &mut time_of_last_p2p_message, - set, - msg, - ); + if matches!(kind, P2pMessageKind::KeepAlive) || + matches!(kind, P2pMessageKind::Heartbeat(_)) { + // Use request/response + for peer_id in swarm.connected_peers().copied().collect::>() { + swarm.behaviour_mut().reqres.send_request(&peer_id, msg.clone()); + } + } else { + // Use gossipsub + let set = kind.genesis().and_then(|genesis| set_for_genesis.get(&genesis).copied()); + broadcast_raw( + &mut swarm, + &mut time_of_last_p2p_message, + set, + msg, + ); + } } // Handle new incoming messages @@ -572,9 +593,21 @@ impl LibP2p { connected_peers.len(), ); } + Some(SwarmEvent::Behaviour(BehaviorEvent::Reqres( + RrEvent::Message { peer, message }, + ))) => { + let message = match message { + RrMessage::Request { request, .. } => request, + RrMessage::Response { response, .. } => response, + }; + receive_send + .send((peer, message)) + .expect("receive_send closed. are we shutting down?"); + } Some(SwarmEvent::Behaviour(BehaviorEvent::Gossipsub( GsEvent::Message { propagation_source, message, .. }, ))) => { + // TODO: Ban Heartbeat/Blocks received over gossipsub receive_send .send((propagation_source, message.data)) .expect("receive_send closed. are we shutting down?"); @@ -623,6 +656,7 @@ impl LibP2p { LibP2p { subscribe: Arc::new(Mutex::new(subscribe_send)), + send: Arc::new(Mutex::new(send_send)), broadcast: Arc::new(Mutex::new(broadcast_send)), receive: Arc::new(Mutex::new(receive_recv)), } @@ -651,16 +685,16 @@ impl P2p for LibP2p { .expect("subscribe_send closed. are we shutting down?"); } - async fn send_raw(&self, _: Self::Id, genesis: Option<[u8; 32]>, msg: Vec) { - self.broadcast_raw(genesis, msg).await; + async fn send_raw(&self, peer: Self::Id, msg: Vec) { + self.send.lock().await.send((peer, msg)).expect("send_send closed. are we shutting down?"); } - async fn broadcast_raw(&self, genesis: Option<[u8; 32]>, msg: Vec) { + async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec) { self .broadcast .lock() .await - .send((genesis, msg)) + .send((kind, msg)) .expect("broadcast_send closed. are we shutting down?"); } @@ -678,17 +712,6 @@ impl TributaryP2p for LibP2p { } } -fn heartbeat_time_unit() -> u64 { - // Also include the timestamp so LibP2p doesn't flag this as an old message re-circulating - let timestamp = SystemTime::now() - .duration_since(SystemTime::UNIX_EPOCH) - .expect("system clock is wrong") - .as_secs(); - // Divide by the block time so if multiple parties send a Heartbeat, they're more likely to - // overlap - timestamp / u64::from(Tributary::::block_time()) -} - pub async fn heartbeat_tributaries_task( p2p: P, mut tributary_event: broadcast::Receiver>, @@ -723,8 +746,11 @@ pub async fn heartbeat_tributaries_task( if SystemTime::now() > (block_time + Duration::from_secs(60)) { log::warn!("last known tributary block was over a minute ago"); let mut msg = tip.to_vec(); - let time_unit = heartbeat_time_unit::(); - msg.extend(time_unit.to_le_bytes()); + let time: u64 = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("system clock is wrong") + .as_secs(); + msg.extend(time.to_le_bytes()); P2p::broadcast(&p2p, P2pMessageKind::Heartbeat(tributary.genesis()), msg).await; } } @@ -738,7 +764,6 @@ pub async fn handle_p2p_task( p2p: P, cosign_channel: mpsc::UnboundedSender, mut tributary_event: broadcast::Receiver>, - our_key: ::G, ) { let channels = Arc::new(RwLock::new(HashMap::<_, mpsc::UnboundedSender>>::new())); tokio::spawn({ @@ -764,7 +789,6 @@ pub async fn handle_p2p_task( tokio::spawn({ let p2p = p2p.clone(); async move { - let mut last_replied_to_heartbeat = 0; loop { let Some(mut msg) = recv.recv().await else { // Channel closure happens when the tributary retires @@ -781,76 +805,37 @@ pub async fn handle_p2p_task( } } - // TODO2: Rate limit this per timestamp - // And/or slash on Heartbeat which justifies a response, since the node + // TODO: Slash on Heartbeat which justifies a response, since the node // obviously was offline and we must now use our bandwidth to compensate for // them? P2pMessageKind::Heartbeat(msg_genesis) => { assert_eq!(msg_genesis, genesis); - - let current_time_unit = heartbeat_time_unit::(); - if current_time_unit.saturating_sub(last_replied_to_heartbeat) < 10 { - continue; - } - if msg.msg.len() != 40 { log::error!("validator sent invalid heartbeat"); continue; } // Only respond to recent heartbeats - let msg_time_unit = u64::from_le_bytes(msg.msg[32 .. 40].try_into().expect( + let msg_time = u64::from_le_bytes(msg.msg[32 .. 40].try_into().expect( "length-checked heartbeat message didn't have 8 bytes for the u64", )); - if current_time_unit.saturating_sub(msg_time_unit) > 1 { + if SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("system clock is wrong") + .as_secs() + .saturating_sub(msg_time) > + 10 + { continue; } - // This is the network's last replied to, not ours specifically - last_replied_to_heartbeat = current_time_unit; + log::debug!("received heartbeat with a recent timestamp"); let reader = tributary.tributary.reader(); - // Have sqrt(n) nodes reply with the blocks - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let mut responders = f32::from(tributary.spec.n(&[])).sqrt().floor() as u64; - // Try to have at least 3 responders - if responders < 3 { - responders = tributary.spec.n(&[]).min(3).into(); - } - - // Decide which nodes will respond by using the latest block's hash as a - // mutually agreed upon entropy source - // This isn't a secure source of entropy, yet it's fine for this - let entropy = u64::from_le_bytes(reader.tip()[.. 8].try_into().unwrap()); - // If n = 10, responders = 3, we want `start` to be 0 ..= 7 - // (so the highest is 7, 8, 9) - // entropy % (10 + 1) - 3 = entropy % 8 = 0 ..= 7 - let start = usize::try_from( - entropy % (u64::from(tributary.spec.n(&[]) + 1) - responders), - ) - .unwrap(); - let mut selected = false; - for validator in &tributary.spec.validators() - [start .. (start + usize::try_from(responders).unwrap())] - { - if our_key == validator.0 { - selected = true; - continue; - } - } - if !selected { - log::debug!("received heartbeat and not selected to respond"); - continue; - } - - log::debug!("received heartbeat and selected to respond"); - let p2p = p2p.clone(); // Spawn a dedicated task as this may require loading large amounts of data // from disk and take a notable amount of time tokio::spawn(async move { - // Have the selected nodes respond - // TODO: Spawn a dedicated topic for this heartbeat response? let mut latest = msg.msg[.. 32].try_into().unwrap(); let mut to_send = vec![]; while let Some(next) = reader.block_after(&latest) { diff --git a/coordinator/src/tests/mod.rs b/coordinator/src/tests/mod.rs index 45a62297..55b6c99f 100644 --- a/coordinator/src/tests/mod.rs +++ b/coordinator/src/tests/mod.rs @@ -65,11 +65,11 @@ impl P2p for LocalP2p { async fn subscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {} async fn unsubscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {} - async fn send_raw(&self, to: Self::Id, _genesis: Option<[u8; 32]>, msg: Vec) { + async fn send_raw(&self, to: Self::Id, msg: Vec) { self.1.write().await.1[to].push_back((self.0, msg)); } - async fn broadcast_raw(&self, _genesis: Option<[u8; 32]>, msg: Vec) { + async fn broadcast_raw(&self, _kind: P2pMessageKind, msg: Vec) { // Content-based deduplication let mut lock = self.1.write().await; { diff --git a/coordinator/src/tests/tributary/handle_p2p.rs b/coordinator/src/tests/tributary/handle_p2p.rs index daa0cf02..756f4561 100644 --- a/coordinator/src/tests/tributary/handle_p2p.rs +++ b/coordinator/src/tests/tributary/handle_p2p.rs @@ -3,8 +3,6 @@ use std::sync::Arc; use rand_core::OsRng; -use ciphersuite::{Ciphersuite, Ristretto}; - use tokio::{ sync::{mpsc, broadcast}, time::sleep, @@ -37,17 +35,12 @@ async fn handle_p2p_test() { let mut tributary_senders = vec![]; let mut tributary_arcs = vec![]; - for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() { + for (p2p, tributary) in tributaries.drain(..) { let tributary = Arc::new(tributary); tributary_arcs.push(tributary.clone()); let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); let (cosign_send, _) = mpsc::unbounded_channel(); - tokio::spawn(handle_p2p_task( - p2p, - cosign_send, - new_tributary_recv, - ::generator() * *keys[i], - )); + tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv)); new_tributary_send .send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary })) .map_err(|_| "failed to send ActiveTributary") diff --git a/coordinator/src/tests/tributary/sync.rs b/coordinator/src/tests/tributary/sync.rs index 9f01ca4d..18f60864 100644 --- a/coordinator/src/tests/tributary/sync.rs +++ b/coordinator/src/tests/tributary/sync.rs @@ -45,17 +45,12 @@ async fn sync_test() { let mut tributary_senders = vec![]; let mut tributary_arcs = vec![]; let mut p2p_threads = vec![]; - for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() { + for (p2p, tributary) in tributaries.drain(..) { let tributary = Arc::new(tributary); tributary_arcs.push(tributary.clone()); let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); let (cosign_send, _) = mpsc::unbounded_channel(); - let thread = tokio::spawn(handle_p2p_task( - p2p, - cosign_send, - new_tributary_recv, - ::generator() * *keys[i], - )); + let thread = tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv)); new_tributary_send .send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary })) .map_err(|_| "failed to send ActiveTributary") @@ -91,7 +86,7 @@ async fn sync_test() { let syncer_tributary = Arc::new(syncer_tributary); let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5); let (cosign_send, _) = mpsc::unbounded_channel(); - tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv, syncer_key)); + tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv)); syncer_tributary_send .send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(),