mirror of
https://github.com/serai-dex/serai.git
synced 2024-12-22 11:39:35 +00:00
Move keep alive, heartbeat, block to request/response
This commit is contained in:
parent
b5e22dca8f
commit
8cef9eff6f
7 changed files with 87 additions and 104 deletions
11
Cargo.lock
generated
11
Cargo.lock
generated
|
@ -1314,6 +1314,15 @@ dependencies = [
|
|||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cbor4ii"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59b4c883b9cc4757b061600d39001d4d0232bece4a3174696cf8f58a14db107d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.88"
|
||||
|
@ -4120,6 +4129,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "d8e3b4d67870478db72bac87bfc260ee6641d0734e0e3e275798f089c3fecfd4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"cbor4ii",
|
||||
"futures",
|
||||
"instant",
|
||||
"libp2p-core",
|
||||
|
@ -4127,6 +4137,7 @@ dependencies = [
|
|||
"libp2p-swarm",
|
||||
"log",
|
||||
"rand",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"void",
|
||||
]
|
||||
|
|
|
@ -51,7 +51,7 @@ env_logger = { version = "0.10", default-features = false, features = ["humantim
|
|||
|
||||
futures-util = { version = "0.3", default-features = false, features = ["std"] }
|
||||
tokio = { version = "1", default-features = false, features = ["rt-multi-thread", "sync", "time", "macros"] }
|
||||
libp2p = { version = "0.52", default-features = false, features = ["tokio", "tcp", "noise", "yamux", "gossipsub", "macros"] }
|
||||
libp2p = { version = "0.52", default-features = false, features = ["tokio", "tcp", "noise", "yamux", "cbor", "request-response", "gossipsub", "macros"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tributary = { package = "tributary-chain", path = "./tributary", features = ["tests"] }
|
||||
|
|
|
@ -1292,7 +1292,6 @@ pub async fn run<D: Db, Pro: Processors, P: P2p>(
|
|||
p2p.clone(),
|
||||
cosign_channel.clone(),
|
||||
tributary_event_listener_4,
|
||||
<Ristretto as Ciphersuite>::generator() * key.deref(),
|
||||
));
|
||||
|
||||
// Handle all messages from processors
|
||||
|
|
|
@ -9,8 +9,6 @@ use std::{
|
|||
use async_trait::async_trait;
|
||||
use rand_core::{RngCore, OsRng};
|
||||
|
||||
use ciphersuite::{Ciphersuite, Ristretto};
|
||||
|
||||
use scale::Encode;
|
||||
use borsh::{BorshSerialize, BorshDeserialize};
|
||||
use serai_client::{primitives::NetworkId, validator_sets::primitives::ValidatorSet, Serai};
|
||||
|
@ -23,12 +21,16 @@ use tokio::{
|
|||
time::sleep,
|
||||
};
|
||||
|
||||
// TODO: Remove cbor
|
||||
use libp2p::{
|
||||
core::multiaddr::{Protocol, Multiaddr},
|
||||
identity::Keypair,
|
||||
PeerId,
|
||||
tcp::Config as TcpConfig,
|
||||
noise, yamux,
|
||||
request_response::{
|
||||
Config as RrConfig, Message as RrMessage, Event as RrEvent, cbor::Behaviour as RrBehavior,
|
||||
},
|
||||
gossipsub::{
|
||||
IdentTopic, FastMessageId, MessageId, MessageAuthenticity, ValidationMode, ConfigBuilder,
|
||||
IdentityTransform, AllowAllSubscriptionFilter, Event as GsEvent, PublishError,
|
||||
|
@ -135,14 +137,14 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p {
|
|||
async fn subscribe(&self, set: ValidatorSet, genesis: [u8; 32]);
|
||||
async fn unsubscribe(&self, set: ValidatorSet, genesis: [u8; 32]);
|
||||
|
||||
async fn send_raw(&self, to: Self::Id, genesis: Option<[u8; 32]>, msg: Vec<u8>);
|
||||
async fn broadcast_raw(&self, genesis: Option<[u8; 32]>, msg: Vec<u8>);
|
||||
async fn send_raw(&self, to: Self::Id, msg: Vec<u8>);
|
||||
async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec<u8>);
|
||||
async fn receive_raw(&self) -> (Self::Id, Vec<u8>);
|
||||
|
||||
async fn send(&self, to: Self::Id, kind: P2pMessageKind, msg: Vec<u8>) {
|
||||
let mut actual_msg = kind.serialize();
|
||||
actual_msg.extend(msg);
|
||||
self.send_raw(to, kind.genesis(), actual_msg).await;
|
||||
self.send_raw(to, actual_msg).await;
|
||||
}
|
||||
async fn broadcast(&self, kind: P2pMessageKind, msg: Vec<u8>) {
|
||||
let mut actual_msg = kind.serialize();
|
||||
|
@ -159,7 +161,7 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p {
|
|||
}
|
||||
);
|
||||
*/
|
||||
self.broadcast_raw(kind.genesis(), actual_msg).await;
|
||||
self.broadcast_raw(kind, actual_msg).await;
|
||||
}
|
||||
async fn receive(&self) -> Message<Self> {
|
||||
let (sender, kind, msg) = loop {
|
||||
|
@ -194,6 +196,7 @@ pub trait P2p: Send + Sync + Clone + fmt::Debug + TributaryP2p {
|
|||
|
||||
#[derive(NetworkBehaviour)]
|
||||
struct Behavior {
|
||||
reqres: RrBehavior<Vec<u8>, Vec<u8>>,
|
||||
gossipsub: GsBehavior,
|
||||
}
|
||||
|
||||
|
@ -201,7 +204,8 @@ struct Behavior {
|
|||
#[derive(Clone)]
|
||||
pub struct LibP2p {
|
||||
subscribe: Arc<Mutex<mpsc::UnboundedSender<(bool, ValidatorSet, [u8; 32])>>>,
|
||||
broadcast: Arc<Mutex<mpsc::UnboundedSender<(Option<[u8; 32]>, Vec<u8>)>>>,
|
||||
send: Arc<Mutex<mpsc::UnboundedSender<(PeerId, Vec<u8>)>>>,
|
||||
broadcast: Arc<Mutex<mpsc::UnboundedSender<(P2pMessageKind, Vec<u8>)>>>,
|
||||
receive: Arc<Mutex<mpsc::UnboundedReceiver<(PeerId, Vec<u8>)>>>,
|
||||
}
|
||||
impl fmt::Debug for LibP2p {
|
||||
|
@ -221,6 +225,7 @@ impl LibP2p {
|
|||
let throwaway_key_pair = Keypair::generate_ed25519();
|
||||
|
||||
let behavior = Behavior {
|
||||
reqres: { RrBehavior::new([], RrConfig::default()) },
|
||||
gossipsub: {
|
||||
let heartbeat_interval = tributary::tendermint::LATENCY_TIME / 2;
|
||||
let heartbeats_per_block =
|
||||
|
@ -284,6 +289,7 @@ impl LibP2p {
|
|||
const PORT: u16 = 30563; // 5132 ^ (('c' << 8) | 'o')
|
||||
swarm.listen_on(format!("/ip4/0.0.0.0/tcp/{PORT}").parse().unwrap()).unwrap();
|
||||
|
||||
let (send_send, mut send_recv) = mpsc::unbounded_channel();
|
||||
let (broadcast_send, mut broadcast_recv) = mpsc::unbounded_channel();
|
||||
let (receive_send, receive_recv) = mpsc::unbounded_channel();
|
||||
let (subscribe_send, mut subscribe_recv) = mpsc::unbounded_channel();
|
||||
|
@ -486,17 +492,32 @@ impl LibP2p {
|
|||
}
|
||||
}
|
||||
|
||||
msg = send_recv.recv() => {
|
||||
let (peer, msg): (PeerId, Vec<u8>) =
|
||||
msg.expect("send_recv closed. are we shutting down?");
|
||||
swarm.behaviour_mut().reqres.send_request(&peer, msg);
|
||||
},
|
||||
|
||||
// Handle any queued outbound messages
|
||||
msg = broadcast_recv.recv() => {
|
||||
let (genesis, msg): (Option<[u8; 32]>, Vec<u8>) =
|
||||
let (kind, msg): (P2pMessageKind, Vec<u8>) =
|
||||
msg.expect("broadcast_recv closed. are we shutting down?");
|
||||
let set = genesis.and_then(|genesis| set_for_genesis.get(&genesis).copied());
|
||||
broadcast_raw(
|
||||
&mut swarm,
|
||||
&mut time_of_last_p2p_message,
|
||||
set,
|
||||
msg,
|
||||
);
|
||||
if matches!(kind, P2pMessageKind::KeepAlive) ||
|
||||
matches!(kind, P2pMessageKind::Heartbeat(_)) {
|
||||
// Use request/response
|
||||
for peer_id in swarm.connected_peers().copied().collect::<Vec<_>>() {
|
||||
swarm.behaviour_mut().reqres.send_request(&peer_id, msg.clone());
|
||||
}
|
||||
} else {
|
||||
// Use gossipsub
|
||||
let set = kind.genesis().and_then(|genesis| set_for_genesis.get(&genesis).copied());
|
||||
broadcast_raw(
|
||||
&mut swarm,
|
||||
&mut time_of_last_p2p_message,
|
||||
set,
|
||||
msg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle new incoming messages
|
||||
|
@ -572,9 +593,21 @@ impl LibP2p {
|
|||
connected_peers.len(),
|
||||
);
|
||||
}
|
||||
Some(SwarmEvent::Behaviour(BehaviorEvent::Reqres(
|
||||
RrEvent::Message { peer, message },
|
||||
))) => {
|
||||
let message = match message {
|
||||
RrMessage::Request { request, .. } => request,
|
||||
RrMessage::Response { response, .. } => response,
|
||||
};
|
||||
receive_send
|
||||
.send((peer, message))
|
||||
.expect("receive_send closed. are we shutting down?");
|
||||
}
|
||||
Some(SwarmEvent::Behaviour(BehaviorEvent::Gossipsub(
|
||||
GsEvent::Message { propagation_source, message, .. },
|
||||
))) => {
|
||||
// TODO: Ban Heartbeat/Blocks received over gossipsub
|
||||
receive_send
|
||||
.send((propagation_source, message.data))
|
||||
.expect("receive_send closed. are we shutting down?");
|
||||
|
@ -623,6 +656,7 @@ impl LibP2p {
|
|||
|
||||
LibP2p {
|
||||
subscribe: Arc::new(Mutex::new(subscribe_send)),
|
||||
send: Arc::new(Mutex::new(send_send)),
|
||||
broadcast: Arc::new(Mutex::new(broadcast_send)),
|
||||
receive: Arc::new(Mutex::new(receive_recv)),
|
||||
}
|
||||
|
@ -651,16 +685,16 @@ impl P2p for LibP2p {
|
|||
.expect("subscribe_send closed. are we shutting down?");
|
||||
}
|
||||
|
||||
async fn send_raw(&self, _: Self::Id, genesis: Option<[u8; 32]>, msg: Vec<u8>) {
|
||||
self.broadcast_raw(genesis, msg).await;
|
||||
async fn send_raw(&self, peer: Self::Id, msg: Vec<u8>) {
|
||||
self.send.lock().await.send((peer, msg)).expect("send_send closed. are we shutting down?");
|
||||
}
|
||||
|
||||
async fn broadcast_raw(&self, genesis: Option<[u8; 32]>, msg: Vec<u8>) {
|
||||
async fn broadcast_raw(&self, kind: P2pMessageKind, msg: Vec<u8>) {
|
||||
self
|
||||
.broadcast
|
||||
.lock()
|
||||
.await
|
||||
.send((genesis, msg))
|
||||
.send((kind, msg))
|
||||
.expect("broadcast_send closed. are we shutting down?");
|
||||
}
|
||||
|
||||
|
@ -678,17 +712,6 @@ impl TributaryP2p for LibP2p {
|
|||
}
|
||||
}
|
||||
|
||||
fn heartbeat_time_unit<D: Db, P: P2p>() -> u64 {
|
||||
// Also include the timestamp so LibP2p doesn't flag this as an old message re-circulating
|
||||
let timestamp = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.expect("system clock is wrong")
|
||||
.as_secs();
|
||||
// Divide by the block time so if multiple parties send a Heartbeat, they're more likely to
|
||||
// overlap
|
||||
timestamp / u64::from(Tributary::<D, Transaction, P>::block_time())
|
||||
}
|
||||
|
||||
pub async fn heartbeat_tributaries_task<D: Db, P: P2p>(
|
||||
p2p: P,
|
||||
mut tributary_event: broadcast::Receiver<TributaryEvent<D, P>>,
|
||||
|
@ -723,8 +746,11 @@ pub async fn heartbeat_tributaries_task<D: Db, P: P2p>(
|
|||
if SystemTime::now() > (block_time + Duration::from_secs(60)) {
|
||||
log::warn!("last known tributary block was over a minute ago");
|
||||
let mut msg = tip.to_vec();
|
||||
let time_unit = heartbeat_time_unit::<D, P>();
|
||||
msg.extend(time_unit.to_le_bytes());
|
||||
let time: u64 = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.expect("system clock is wrong")
|
||||
.as_secs();
|
||||
msg.extend(time.to_le_bytes());
|
||||
P2p::broadcast(&p2p, P2pMessageKind::Heartbeat(tributary.genesis()), msg).await;
|
||||
}
|
||||
}
|
||||
|
@ -738,7 +764,6 @@ pub async fn handle_p2p_task<D: Db, P: P2p>(
|
|||
p2p: P,
|
||||
cosign_channel: mpsc::UnboundedSender<CosignedBlock>,
|
||||
mut tributary_event: broadcast::Receiver<TributaryEvent<D, P>>,
|
||||
our_key: <Ristretto as Ciphersuite>::G,
|
||||
) {
|
||||
let channels = Arc::new(RwLock::new(HashMap::<_, mpsc::UnboundedSender<Message<P>>>::new()));
|
||||
tokio::spawn({
|
||||
|
@ -764,7 +789,6 @@ pub async fn handle_p2p_task<D: Db, P: P2p>(
|
|||
tokio::spawn({
|
||||
let p2p = p2p.clone();
|
||||
async move {
|
||||
let mut last_replied_to_heartbeat = 0;
|
||||
loop {
|
||||
let Some(mut msg) = recv.recv().await else {
|
||||
// Channel closure happens when the tributary retires
|
||||
|
@ -781,76 +805,37 @@ pub async fn handle_p2p_task<D: Db, P: P2p>(
|
|||
}
|
||||
}
|
||||
|
||||
// TODO2: Rate limit this per timestamp
|
||||
// And/or slash on Heartbeat which justifies a response, since the node
|
||||
// TODO: Slash on Heartbeat which justifies a response, since the node
|
||||
// obviously was offline and we must now use our bandwidth to compensate for
|
||||
// them?
|
||||
P2pMessageKind::Heartbeat(msg_genesis) => {
|
||||
assert_eq!(msg_genesis, genesis);
|
||||
|
||||
let current_time_unit = heartbeat_time_unit::<D, P>();
|
||||
if current_time_unit.saturating_sub(last_replied_to_heartbeat) < 10 {
|
||||
continue;
|
||||
}
|
||||
|
||||
if msg.msg.len() != 40 {
|
||||
log::error!("validator sent invalid heartbeat");
|
||||
continue;
|
||||
}
|
||||
// Only respond to recent heartbeats
|
||||
let msg_time_unit = u64::from_le_bytes(msg.msg[32 .. 40].try_into().expect(
|
||||
let msg_time = u64::from_le_bytes(msg.msg[32 .. 40].try_into().expect(
|
||||
"length-checked heartbeat message didn't have 8 bytes for the u64",
|
||||
));
|
||||
if current_time_unit.saturating_sub(msg_time_unit) > 1 {
|
||||
if SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.expect("system clock is wrong")
|
||||
.as_secs()
|
||||
.saturating_sub(msg_time) >
|
||||
10
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// This is the network's last replied to, not ours specifically
|
||||
last_replied_to_heartbeat = current_time_unit;
|
||||
log::debug!("received heartbeat with a recent timestamp");
|
||||
|
||||
let reader = tributary.tributary.reader();
|
||||
|
||||
// Have sqrt(n) nodes reply with the blocks
|
||||
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
|
||||
let mut responders = f32::from(tributary.spec.n(&[])).sqrt().floor() as u64;
|
||||
// Try to have at least 3 responders
|
||||
if responders < 3 {
|
||||
responders = tributary.spec.n(&[]).min(3).into();
|
||||
}
|
||||
|
||||
// Decide which nodes will respond by using the latest block's hash as a
|
||||
// mutually agreed upon entropy source
|
||||
// This isn't a secure source of entropy, yet it's fine for this
|
||||
let entropy = u64::from_le_bytes(reader.tip()[.. 8].try_into().unwrap());
|
||||
// If n = 10, responders = 3, we want `start` to be 0 ..= 7
|
||||
// (so the highest is 7, 8, 9)
|
||||
// entropy % (10 + 1) - 3 = entropy % 8 = 0 ..= 7
|
||||
let start = usize::try_from(
|
||||
entropy % (u64::from(tributary.spec.n(&[]) + 1) - responders),
|
||||
)
|
||||
.unwrap();
|
||||
let mut selected = false;
|
||||
for validator in &tributary.spec.validators()
|
||||
[start .. (start + usize::try_from(responders).unwrap())]
|
||||
{
|
||||
if our_key == validator.0 {
|
||||
selected = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if !selected {
|
||||
log::debug!("received heartbeat and not selected to respond");
|
||||
continue;
|
||||
}
|
||||
|
||||
log::debug!("received heartbeat and selected to respond");
|
||||
|
||||
let p2p = p2p.clone();
|
||||
// Spawn a dedicated task as this may require loading large amounts of data
|
||||
// from disk and take a notable amount of time
|
||||
tokio::spawn(async move {
|
||||
// Have the selected nodes respond
|
||||
// TODO: Spawn a dedicated topic for this heartbeat response?
|
||||
let mut latest = msg.msg[.. 32].try_into().unwrap();
|
||||
let mut to_send = vec![];
|
||||
while let Some(next) = reader.block_after(&latest) {
|
||||
|
|
|
@ -65,11 +65,11 @@ impl P2p for LocalP2p {
|
|||
async fn subscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {}
|
||||
async fn unsubscribe(&self, _set: ValidatorSet, _genesis: [u8; 32]) {}
|
||||
|
||||
async fn send_raw(&self, to: Self::Id, _genesis: Option<[u8; 32]>, msg: Vec<u8>) {
|
||||
async fn send_raw(&self, to: Self::Id, msg: Vec<u8>) {
|
||||
self.1.write().await.1[to].push_back((self.0, msg));
|
||||
}
|
||||
|
||||
async fn broadcast_raw(&self, _genesis: Option<[u8; 32]>, msg: Vec<u8>) {
|
||||
async fn broadcast_raw(&self, _kind: P2pMessageKind, msg: Vec<u8>) {
|
||||
// Content-based deduplication
|
||||
let mut lock = self.1.write().await;
|
||||
{
|
||||
|
|
|
@ -3,8 +3,6 @@ use std::sync::Arc;
|
|||
|
||||
use rand_core::OsRng;
|
||||
|
||||
use ciphersuite::{Ciphersuite, Ristretto};
|
||||
|
||||
use tokio::{
|
||||
sync::{mpsc, broadcast},
|
||||
time::sleep,
|
||||
|
@ -37,17 +35,12 @@ async fn handle_p2p_test() {
|
|||
|
||||
let mut tributary_senders = vec![];
|
||||
let mut tributary_arcs = vec![];
|
||||
for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() {
|
||||
for (p2p, tributary) in tributaries.drain(..) {
|
||||
let tributary = Arc::new(tributary);
|
||||
tributary_arcs.push(tributary.clone());
|
||||
let (new_tributary_send, new_tributary_recv) = broadcast::channel(5);
|
||||
let (cosign_send, _) = mpsc::unbounded_channel();
|
||||
tokio::spawn(handle_p2p_task(
|
||||
p2p,
|
||||
cosign_send,
|
||||
new_tributary_recv,
|
||||
<Ristretto as Ciphersuite>::generator() * *keys[i],
|
||||
));
|
||||
tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv));
|
||||
new_tributary_send
|
||||
.send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary }))
|
||||
.map_err(|_| "failed to send ActiveTributary")
|
||||
|
|
|
@ -45,17 +45,12 @@ async fn sync_test() {
|
|||
let mut tributary_senders = vec![];
|
||||
let mut tributary_arcs = vec![];
|
||||
let mut p2p_threads = vec![];
|
||||
for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() {
|
||||
for (p2p, tributary) in tributaries.drain(..) {
|
||||
let tributary = Arc::new(tributary);
|
||||
tributary_arcs.push(tributary.clone());
|
||||
let (new_tributary_send, new_tributary_recv) = broadcast::channel(5);
|
||||
let (cosign_send, _) = mpsc::unbounded_channel();
|
||||
let thread = tokio::spawn(handle_p2p_task(
|
||||
p2p,
|
||||
cosign_send,
|
||||
new_tributary_recv,
|
||||
<Ristretto as Ciphersuite>::generator() * *keys[i],
|
||||
));
|
||||
let thread = tokio::spawn(handle_p2p_task(p2p, cosign_send, new_tributary_recv));
|
||||
new_tributary_send
|
||||
.send(TributaryEvent::NewTributary(ActiveTributary { spec: spec.clone(), tributary }))
|
||||
.map_err(|_| "failed to send ActiveTributary")
|
||||
|
@ -91,7 +86,7 @@ async fn sync_test() {
|
|||
let syncer_tributary = Arc::new(syncer_tributary);
|
||||
let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5);
|
||||
let (cosign_send, _) = mpsc::unbounded_channel();
|
||||
tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv, syncer_key));
|
||||
tokio::spawn(handle_p2p_task(syncer_p2p.clone(), cosign_send, syncer_tributary_recv));
|
||||
syncer_tributary_send
|
||||
.send(TributaryEvent::NewTributary(ActiveTributary {
|
||||
spec: spec.clone(),
|
||||
|
|
Loading…
Reference in a new issue