Spawn an async test per P2P message to try and resolve latency issues

This commit is contained in:
Luke Parker 2023-08-31 02:35:50 -04:00
parent 3af9dc5d6f
commit 9bf24480f4
No known key found for this signature in database
3 changed files with 107 additions and 105 deletions

View file

@ -330,6 +330,13 @@ pub async fn handle_p2p<D: Db, P: P2p>(
) { ) {
loop { loop {
let mut msg = p2p.receive().await; let mut msg = p2p.receive().await;
// Spawn a dedicated task to handle this message, ensuring any singularly latent message
// doesn't hold everything up
// TODO2: Move to one task per tributary
tokio::spawn({
let p2p = p2p.clone();
let tributaries = tributaries.clone();
async move {
match msg.kind { match msg.kind {
P2pMessageKind::KeepAlive => {} P2pMessageKind::KeepAlive => {}
@ -337,7 +344,7 @@ pub async fn handle_p2p<D: Db, P: P2p>(
let tributaries = tributaries.read().await; let tributaries = tributaries.read().await;
let Some(tributary) = tributaries.get(&genesis) else { let Some(tributary) = tributaries.get(&genesis) else {
log::debug!("received p2p message for unknown network"); log::debug!("received p2p message for unknown network");
continue; return;
}; };
log::trace!("handling message for tributary {:?}", tributary.spec.set()); log::trace!("handling message for tributary {:?}", tributary.spec.set());
@ -350,13 +357,13 @@ pub async fn handle_p2p<D: Db, P: P2p>(
P2pMessageKind::Heartbeat(genesis) => { P2pMessageKind::Heartbeat(genesis) => {
if msg.msg.len() != 40 { if msg.msg.len() != 40 {
log::error!("validator sent invalid heartbeat"); log::error!("validator sent invalid heartbeat");
continue; return;
} }
let tributaries = tributaries.read().await; let tributaries = tributaries.read().await;
let Some(tributary) = tributaries.get(&genesis) else { let Some(tributary) = tributaries.get(&genesis) else {
log::debug!("received heartbeat message for unknown network"); log::debug!("received heartbeat message for unknown network");
continue; return;
}; };
let tributary_read = tributary.tributary.read().await; let tributary_read = tributary.tributary.read().await;
@ -391,7 +398,7 @@ pub async fn handle_p2p<D: Db, P: P2p>(
} }
if !selected { if !selected {
log::debug!("received heartbeat and not selected to respond"); log::debug!("received heartbeat and not selected to respond");
continue; return;
} }
log::debug!("received heartbeat and selected to respond"); log::debug!("received heartbeat and selected to respond");
@ -414,31 +421,12 @@ pub async fn handle_p2p<D: Db, P: P2p>(
let mut msg_ref: &[u8] = msg.msg.as_ref(); let mut msg_ref: &[u8] = msg.msg.as_ref();
let Ok(block) = Block::<Transaction>::read(&mut msg_ref) else { let Ok(block) = Block::<Transaction>::read(&mut msg_ref) else {
log::error!("received block message with an invalidly serialized block"); log::error!("received block message with an invalidly serialized block");
continue; return;
}; };
// Get just the commit // Get just the commit
msg.msg.drain(.. (msg.msg.len() - msg_ref.len())); msg.msg.drain(.. (msg.msg.len() - msg_ref.len()));
msg.msg.drain((msg.msg.len() - 8) ..); msg.msg.drain((msg.msg.len() - 8) ..);
// Spawn a dedicated task to add this block, as it may take a notable amount of time
// While we could use a long-lived task to add each block, that task would only add one
// block at a time *across all tributaries*
// We either need:
// 1) One task per tributary
// 2) Background tasks
// 3) For sync_block to return instead of waiting for provided transactions which are
// missing
// sync_block waiting is preferable since we know the block is valid by its commit, meaning
// we are the node behind
// As for 1/2, 1 may be preferable since this message may frequently occur
// This is suitably performant, as tokio HTTP servers will even spawn a new task per
// connection
// In order to reduce congestion though, we should at least check if we take value from
// this message before running spawn
// TODO2
tokio::spawn({
let tributaries = tributaries.clone();
async move {
let tributaries = tributaries.read().await; let tributaries = tributaries.read().await;
let Some(tributary) = tributaries.get(&genesis) else { let Some(tributary) = tributaries.get(&genesis) else {
log::debug!("received block message for unknown network"); log::debug!("received block message for unknown network");
@ -448,10 +436,10 @@ pub async fn handle_p2p<D: Db, P: P2p>(
let res = tributary.tributary.read().await.sync_block(block, msg.msg).await; let res = tributary.tributary.read().await.sync_block(block, msg.msg).await;
log::debug!("received block from {:?}, sync_block returned {}", msg.sender, res); log::debug!("received block from {:?}, sync_block returned {}", msg.sender, res);
} }
}
}
}); });
} }
}
}
} }
pub async fn publish_transaction<D: Db, P: P2p>( pub async fn publish_transaction<D: Db, P: P2p>(

View file

@ -399,6 +399,8 @@ impl<D: Db, T: TransactionTrait, P: P2p> Network for TendermintNetwork<D, T, P>
hex::encode(hash), hex::encode(hash),
hex::encode(self.genesis) hex::encode(self.genesis)
); );
// TODO: Use a notification system for when we have a new provided, in order to minimize
// latency
sleep(Duration::from_secs(Self::block_time().into())).await; sleep(Duration::from_secs(Self::block_time().into())).await;
} }
_ => return invalid_block(), _ => return invalid_block(),

View file

@ -189,8 +189,7 @@ impl<N: Network + 'static> TendermintMachine<N> {
// Push it on to the queue. This is done so we only handle one message at a time, and so we // Push it on to the queue. This is done so we only handle one message at a time, and so we
// can handle our own message before broadcasting it. That way, we fail before before // can handle our own message before broadcasting it. That way, we fail before before
// becoming malicious // becoming malicious
// push_front to prioritize our own messages self.queue.push_back(msg);
self.queue.push_front(msg);
} }
} }
@ -220,6 +219,12 @@ impl<N: Network + 'static> TendermintMachine<N> {
// Sleep until this round ends // Sleep until this round ends
let round_end = self.block.end_time[&end_round]; let round_end = self.block.end_time[&end_round];
let time_until_round_end = round_end.instant().saturating_duration_since(Instant::now()); let time_until_round_end = round_end.instant().saturating_duration_since(Instant::now());
if time_until_round_end == Duration::ZERO {
log::trace!(
"resetting when prior round ended {}ms ago",
Instant::now().saturating_duration_since(round_end.instant()).as_millis(),
);
}
log::trace!("sleeping until round ends in {}ms", time_until_round_end.as_millis()); log::trace!("sleeping until round ends in {}ms", time_until_round_end.as_millis());
sleep(time_until_round_end).await; sleep(time_until_round_end).await;
@ -575,6 +580,13 @@ impl<N: Network + 'static> TendermintMachine<N> {
Err(TendermintError::Temporal)?; Err(TendermintError::Temporal)?;
} }
if (msg.block == self.block.number) &&
(msg.round == self.block.round().number) &&
(msg.data.step() == Step::Propose)
{
log::trace!("received Propose for block {}, round {}", msg.block.0, msg.round.0);
}
// If this is a precommit, verify its signature // If this is a precommit, verify its signature
self.verify_precommit_signature(signed)?; self.verify_precommit_signature(signed)?;