2023-08-25 01:55:59 +00:00
|
|
|
use core::{ops::Deref, future::Future};
|
2023-04-23 07:48:50 +00:00
|
|
|
use std::{
|
|
|
|
sync::Arc,
|
2023-04-23 22:55:43 +00:00
|
|
|
time::{SystemTime, Duration},
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
collections::{VecDeque, HashMap},
|
2023-04-23 07:48:50 +00:00
|
|
|
};
|
2023-04-17 04:50:56 +00:00
|
|
|
|
2023-08-06 16:38:44 +00:00
|
|
|
use zeroize::{Zeroize, Zeroizing};
|
2023-04-25 07:14:42 +00:00
|
|
|
use rand_core::OsRng;
|
2023-04-16 07:16:53 +00:00
|
|
|
|
2023-09-01 03:39:36 +00:00
|
|
|
use ciphersuite::{
|
|
|
|
group::ff::{Field, PrimeField},
|
|
|
|
Ciphersuite, Ristretto,
|
|
|
|
};
|
|
|
|
use schnorr::SchnorrSignature;
|
2023-09-01 04:03:53 +00:00
|
|
|
use frost::Participant;
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-07-18 05:53:51 +00:00
|
|
|
use serai_db::{DbTxn, Db};
|
|
|
|
use serai_env as env;
|
2023-05-10 04:46:51 +00:00
|
|
|
|
2023-08-24 23:06:22 +00:00
|
|
|
use serai_client::{primitives::NetworkId, Public, Serai};
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-07-18 05:53:51 +00:00
|
|
|
use message_queue::{Service, client::MessageQueue};
|
|
|
|
|
2023-08-30 21:25:04 +00:00
|
|
|
use futures::stream::StreamExt;
|
2023-09-25 22:23:39 +00:00
|
|
|
use tokio::{
|
2023-09-25 23:28:53 +00:00
|
|
|
sync::{RwLock, mpsc, broadcast},
|
2023-09-25 22:23:39 +00:00
|
|
|
time::sleep,
|
|
|
|
};
|
2023-04-23 07:48:50 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
use ::tributary::{ReadWrite, ProvidedError, TransactionKind, TransactionTrait, Block, Tributary};
|
2023-04-17 04:50:56 +00:00
|
|
|
|
2023-04-20 09:05:17 +00:00
|
|
|
mod tributary;
|
2023-09-25 19:42:39 +00:00
|
|
|
use crate::tributary::{
|
|
|
|
TributarySpec, SignData, Transaction, TributaryDb, NonceDecider, scanner::RecognizedIdType,
|
|
|
|
};
|
2023-04-16 04:51:56 +00:00
|
|
|
|
2023-04-23 08:31:00 +00:00
|
|
|
mod db;
|
|
|
|
use db::MainDb;
|
|
|
|
|
2023-04-16 04:51:56 +00:00
|
|
|
mod p2p;
|
|
|
|
pub use p2p::*;
|
|
|
|
|
2023-04-25 07:14:42 +00:00
|
|
|
use processor_messages::{key_gen, sign, coordinator, ProcessorMessage};
|
|
|
|
|
2023-05-10 03:44:41 +00:00
|
|
|
pub mod processors;
|
|
|
|
use processors::Processors;
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-04-15 21:38:47 +00:00
|
|
|
mod substrate;
|
2023-09-29 07:51:01 +00:00
|
|
|
use substrate::SubstrateDb;
|
2023-04-11 23:04:53 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
2023-04-23 02:27:12 +00:00
|
|
|
pub mod tests;
|
2023-04-11 23:04:53 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
#[derive(Clone)]
|
2023-04-24 03:15:15 +00:00
|
|
|
pub struct ActiveTributary<D: Db, P: P2p> {
|
2023-04-24 06:50:03 +00:00
|
|
|
pub spec: TributarySpec,
|
2023-09-25 23:28:53 +00:00
|
|
|
pub tributary: Arc<Tributary<D, Transaction, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 22:27:16 +00:00
|
|
|
// Adds a tributary into the specified HashMap
|
|
|
|
async fn add_tributary<D: Db, Pro: Processors, P: P2p>(
|
2023-04-24 03:15:15 +00:00
|
|
|
db: D,
|
2023-04-16 07:16:53 +00:00
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
2023-09-25 22:27:16 +00:00
|
|
|
processors: &Pro,
|
2023-04-16 07:16:53 +00:00
|
|
|
p2p: P,
|
2023-09-25 23:28:53 +00:00
|
|
|
tributaries: &broadcast::Sender<ActiveTributary<D, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
spec: TributarySpec,
|
2023-09-25 23:28:53 +00:00
|
|
|
) {
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("adding tributary {:?}", spec.set());
|
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
let tributary = Tributary::<_, Transaction, _>::new(
|
2023-08-30 21:25:04 +00:00
|
|
|
// TODO2: Use a db on a distinct volume to protect against DoS attacks
|
2023-04-24 03:15:15 +00:00
|
|
|
db,
|
|
|
|
spec.genesis(),
|
|
|
|
spec.start_time(),
|
2023-09-25 22:27:16 +00:00
|
|
|
key.clone(),
|
2023-04-24 03:15:15 +00:00
|
|
|
spec.validators(),
|
|
|
|
p2p,
|
|
|
|
)
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
2023-09-25 22:27:16 +00:00
|
|
|
// Trigger a DKG for the newly added Tributary
|
|
|
|
// If we're rebooting, we'll re-fire this message
|
|
|
|
// This is safe due to the message-queue deduplicating based off the intent system
|
|
|
|
let set = spec.set();
|
|
|
|
processors
|
|
|
|
.send(
|
|
|
|
set.network,
|
2023-09-29 08:19:59 +00:00
|
|
|
processor_messages::key_gen::CoordinatorMessage::GenerateKey {
|
|
|
|
id: processor_messages::key_gen::KeyGenId { set, attempt: 0 },
|
|
|
|
params: frost::ThresholdParams::new(
|
|
|
|
spec.t(),
|
|
|
|
spec.n(),
|
|
|
|
spec
|
|
|
|
.i(Ristretto::generator() * key.deref())
|
|
|
|
.expect("adding a tributary for a set we aren't in set for"),
|
|
|
|
)
|
|
|
|
.unwrap(),
|
|
|
|
},
|
2023-09-25 22:27:16 +00:00
|
|
|
)
|
|
|
|
.await;
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
tributaries
|
|
|
|
.send(ActiveTributary { spec, tributary: Arc::new(tributary) })
|
|
|
|
.map_err(|_| "all ActiveTributary recipients closed")
|
|
|
|
.unwrap();
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
|
2023-05-10 03:44:41 +00:00
|
|
|
pub async fn scan_substrate<D: Db, Pro: Processors>(
|
2023-04-24 03:15:15 +00:00
|
|
|
db: D,
|
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
2023-05-10 03:44:41 +00:00
|
|
|
processors: Pro,
|
2023-08-14 10:08:55 +00:00
|
|
|
serai: Arc<Serai>,
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec: mpsc::UnboundedSender<TributarySpec>,
|
2023-04-16 07:16:53 +00:00
|
|
|
) {
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("scanning substrate");
|
|
|
|
|
2023-09-29 07:51:01 +00:00
|
|
|
let mut db = SubstrateDb::new(db);
|
2023-08-02 16:18:50 +00:00
|
|
|
let mut next_substrate_block = db.next_block();
|
2023-04-20 09:05:17 +00:00
|
|
|
|
2023-08-30 21:25:04 +00:00
|
|
|
let new_substrate_block_notifier = {
|
|
|
|
let serai = &serai;
|
|
|
|
move || async move {
|
|
|
|
loop {
|
|
|
|
match serai.newly_finalized_block().await {
|
|
|
|
Ok(sub) => return sub,
|
|
|
|
Err(e) => {
|
|
|
|
log::error!("couldn't communicate with serai node: {e}");
|
|
|
|
sleep(Duration::from_secs(5)).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
let mut substrate_block_notifier = new_substrate_block_notifier().await;
|
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
loop {
|
2023-08-30 21:25:04 +00:00
|
|
|
// await the next block, yet if our notifier had an error, re-create it
|
|
|
|
{
|
2023-08-30 21:57:33 +00:00
|
|
|
let Ok(next_block) =
|
|
|
|
tokio::time::timeout(Duration::from_secs(60), substrate_block_notifier.next()).await
|
|
|
|
else {
|
|
|
|
// Timed out, which may be because Serai isn't finalizing or may be some issue with the
|
|
|
|
// notifier
|
|
|
|
if serai.get_latest_block().await.map(|block| block.number()).ok() ==
|
|
|
|
Some(next_substrate_block.saturating_sub(1))
|
|
|
|
{
|
|
|
|
log::info!("serai hasn't finalized a block in the last 60s...");
|
|
|
|
} else {
|
|
|
|
substrate_block_notifier = new_substrate_block_notifier().await;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
|
|
|
|
// next_block is a Option<Result>
|
|
|
|
if next_block.and_then(Result::ok).is_none() {
|
2023-08-30 21:25:04 +00:00
|
|
|
substrate_block_notifier = new_substrate_block_notifier().await;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
match substrate::handle_new_blocks(
|
|
|
|
&mut db,
|
|
|
|
&key,
|
2023-04-26 04:10:06 +00:00
|
|
|
|db: &mut D, spec: TributarySpec| {
|
2023-08-08 19:12:47 +00:00
|
|
|
log::info!("creating new tributary for {:?}", spec.set());
|
|
|
|
|
2023-04-26 04:10:06 +00:00
|
|
|
// Save it to the database
|
2023-09-27 04:00:31 +00:00
|
|
|
let mut txn = db.txn();
|
|
|
|
MainDb::<D>::add_active_tributary(&mut txn, &spec);
|
|
|
|
txn.commit();
|
2023-04-26 04:10:06 +00:00
|
|
|
|
2023-09-26 01:54:52 +00:00
|
|
|
// If we reboot before this is read, the fact it was saved to the database means it'll be
|
|
|
|
// handled on reboot
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec.send(spec).unwrap();
|
2023-04-26 04:10:06 +00:00
|
|
|
},
|
2023-05-10 03:44:41 +00:00
|
|
|
&processors,
|
2023-04-24 03:15:15 +00:00
|
|
|
&serai,
|
2023-08-02 16:18:50 +00:00
|
|
|
&mut next_substrate_block,
|
2023-04-24 03:15:15 +00:00
|
|
|
)
|
|
|
|
.await
|
|
|
|
{
|
2023-08-30 21:25:04 +00:00
|
|
|
Ok(()) => {}
|
2023-04-24 03:15:15 +00:00
|
|
|
Err(e) => {
|
|
|
|
log::error!("couldn't communicate with serai node: {e}");
|
|
|
|
sleep(Duration::from_secs(5)).await;
|
2023-04-20 09:05:17 +00:00
|
|
|
}
|
2023-04-23 07:48:50 +00:00
|
|
|
}
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
}
|
2023-04-23 22:29:50 +00:00
|
|
|
|
2023-09-26 00:27:44 +00:00
|
|
|
pub(crate) trait RIDTrait<FRid>:
|
|
|
|
Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid
|
|
|
|
{
|
|
|
|
}
|
|
|
|
impl<FRid, F: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid>
|
|
|
|
RIDTrait<FRid> for F
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) async fn scan_tributaries<
|
2023-08-25 01:55:59 +00:00
|
|
|
D: Db,
|
|
|
|
Pro: Processors,
|
|
|
|
P: P2p,
|
2023-09-26 00:27:44 +00:00
|
|
|
FRid: Send + Future<Output = ()>,
|
|
|
|
RID: 'static + Send + Sync + RIDTrait<FRid>,
|
2023-08-25 01:55:59 +00:00
|
|
|
>(
|
2023-04-24 03:15:15 +00:00
|
|
|
raw_db: D,
|
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
2023-08-25 01:55:59 +00:00
|
|
|
recognized_id: RID,
|
2023-05-10 03:44:41 +00:00
|
|
|
processors: Pro,
|
2023-08-14 10:08:55 +00:00
|
|
|
serai: Arc<Serai>,
|
2023-09-25 23:28:53 +00:00
|
|
|
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
) {
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("scanning tributaries");
|
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
loop {
|
2023-09-26 00:27:44 +00:00
|
|
|
match new_tributary.recv().await {
|
|
|
|
Ok(ActiveTributary { spec, tributary }) => {
|
|
|
|
// For each Tributary, spawn a dedicated scanner task
|
|
|
|
tokio::spawn({
|
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
|
|
|
let recognized_id = recognized_id.clone();
|
|
|
|
let processors = processors.clone();
|
2023-08-14 10:08:55 +00:00
|
|
|
let serai = serai.clone();
|
|
|
|
async move {
|
2023-09-26 00:27:44 +00:00
|
|
|
let spec = &spec;
|
|
|
|
let reader = tributary.reader();
|
|
|
|
let mut tributary_db = tributary::TributaryDb::new(raw_db.clone());
|
2023-08-14 10:08:55 +00:00
|
|
|
loop {
|
2023-09-26 03:11:36 +00:00
|
|
|
// Obtain the next block notification now to prevent obtaining it immediately after
|
|
|
|
// the next block occurs
|
|
|
|
let next_block_notification = tributary.next_block_notification().await;
|
|
|
|
|
2023-09-26 00:27:44 +00:00
|
|
|
tributary::scanner::handle_new_blocks::<_, _, _, _, _, _, P>(
|
|
|
|
&mut tributary_db,
|
|
|
|
&key,
|
|
|
|
recognized_id.clone(),
|
|
|
|
&processors,
|
|
|
|
|set, tx| {
|
|
|
|
let serai = serai.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
match serai.publish(&tx).await {
|
|
|
|
Ok(_) => {
|
|
|
|
log::info!("set key pair for {set:?}");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// This is assumed to be some ephemeral error due to the assumed fault-free
|
|
|
|
// creation
|
|
|
|
// TODO2: Differentiate connection errors from invariants
|
|
|
|
Err(e) => {
|
2023-10-11 02:53:15 +00:00
|
|
|
if let Ok(latest) = serai.get_latest_block_hash().await {
|
|
|
|
// Check if this failed because the keys were already set by someone
|
|
|
|
// else
|
|
|
|
if matches!(serai.get_keys(spec.set(), latest).await, Ok(Some(_))) {
|
|
|
|
log::info!("another coordinator set key pair for {:?}", set);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The above block may return false if the keys have been pruned from
|
|
|
|
// the state
|
|
|
|
// Check if this session is no longer the latest session, meaning it at
|
|
|
|
// some point did set keys, and we're just operating off very
|
|
|
|
// historical data
|
|
|
|
if let Ok(Some(current_session)) =
|
|
|
|
serai.get_session(spec.set().network, latest).await
|
|
|
|
{
|
|
|
|
if current_session.0 > spec.set().session.0 {
|
|
|
|
log::warn!(
|
|
|
|
"trying to set keys for a set which isn't the latest {:?}",
|
|
|
|
set
|
|
|
|
);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-09-26 00:27:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
log::error!(
|
|
|
|
"couldn't connect to Serai node to publish set_keys TX: {:?}",
|
|
|
|
e
|
|
|
|
);
|
|
|
|
tokio::time::sleep(Duration::from_secs(10)).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-08-14 10:53:20 +00:00
|
|
|
}
|
2023-09-26 00:27:44 +00:00
|
|
|
},
|
|
|
|
spec,
|
|
|
|
&reader,
|
|
|
|
)
|
|
|
|
.await;
|
|
|
|
|
2023-09-26 03:11:36 +00:00
|
|
|
next_block_notification
|
|
|
|
.await
|
|
|
|
.map_err(|_| "")
|
|
|
|
.expect("tributary dropped its notifications?");
|
2023-08-14 10:08:55 +00:00
|
|
|
}
|
|
|
|
}
|
2023-09-26 00:27:44 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Lagged(_)) => {
|
|
|
|
panic!("scan_tributaries lagged to handle new_tributary")
|
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
|
2023-04-23 08:31:00 +00:00
|
|
|
}
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
}
|
2023-04-23 22:55:43 +00:00
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
pub async fn heartbeat_tributaries<D: Db, P: P2p>(
|
|
|
|
p2p: P,
|
2023-09-25 23:28:53 +00:00
|
|
|
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
) {
|
|
|
|
let ten_blocks_of_time =
|
2023-04-24 06:50:03 +00:00
|
|
|
Duration::from_secs((10 * Tributary::<D, Transaction, P>::block_time()).into());
|
2023-04-24 03:15:15 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
let mut readers = vec![];
|
2023-04-24 03:15:15 +00:00
|
|
|
loop {
|
2023-09-27 17:00:04 +00:00
|
|
|
while let Ok(ActiveTributary { spec: _, tributary }) = {
|
2023-09-25 23:28:53 +00:00
|
|
|
match new_tributary.try_recv() {
|
|
|
|
Ok(tributary) => Ok(tributary),
|
|
|
|
Err(broadcast::error::TryRecvError::Empty) => Err(()),
|
|
|
|
Err(broadcast::error::TryRecvError::Lagged(_)) => {
|
2023-09-26 00:27:44 +00:00
|
|
|
panic!("heartbeat_tributaries lagged to handle new_tributary")
|
2023-09-25 23:28:53 +00:00
|
|
|
}
|
|
|
|
Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"),
|
|
|
|
}
|
|
|
|
} {
|
|
|
|
readers.push(tributary.reader());
|
2023-09-25 22:07:26 +00:00
|
|
|
}
|
|
|
|
|
2023-09-25 21:15:36 +00:00
|
|
|
for tributary in &readers {
|
|
|
|
let tip = tributary.tip();
|
|
|
|
let block_time =
|
|
|
|
SystemTime::UNIX_EPOCH + Duration::from_secs(tributary.time_of_block(&tip).unwrap_or(0));
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Only trigger syncing if the block is more than a minute behind
|
|
|
|
if SystemTime::now() > (block_time + Duration::from_secs(60)) {
|
|
|
|
log::warn!("last known tributary block was over a minute ago");
|
2023-08-08 19:12:47 +00:00
|
|
|
let mut msg = tip.to_vec();
|
|
|
|
// Also include the timestamp so LibP2p doesn't flag this as an old message re-circulating
|
|
|
|
let timestamp = SystemTime::now()
|
|
|
|
.duration_since(SystemTime::UNIX_EPOCH)
|
|
|
|
.expect("system clock is wrong")
|
|
|
|
.as_secs();
|
|
|
|
// Divide by the block time so if multiple parties send a Heartbeat, they're more likely to
|
|
|
|
// overlap
|
|
|
|
let time_unit = timestamp / u64::from(Tributary::<D, Transaction, P>::block_time());
|
|
|
|
msg.extend(time_unit.to_le_bytes());
|
|
|
|
P2p::broadcast(&p2p, P2pMessageKind::Heartbeat(tributary.genesis()), msg).await;
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
2023-04-23 20:56:23 +00:00
|
|
|
}
|
2023-04-23 07:48:50 +00:00
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
// Only check once every 10 blocks of time
|
|
|
|
sleep(ten_blocks_of_time).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn handle_p2p<D: Db, P: P2p>(
|
2023-04-24 04:53:15 +00:00
|
|
|
our_key: <Ristretto as Ciphersuite>::G,
|
2023-04-24 03:15:15 +00:00
|
|
|
p2p: P,
|
2023-09-25 23:28:53 +00:00
|
|
|
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
) {
|
2023-09-26 02:58:40 +00:00
|
|
|
let channels = Arc::new(RwLock::new(HashMap::new()));
|
|
|
|
tokio::spawn({
|
|
|
|
let p2p = p2p.clone();
|
|
|
|
let channels = channels.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
let tributary = new_tributary.recv().await.unwrap();
|
|
|
|
let genesis = tributary.spec.genesis();
|
2023-09-25 23:28:53 +00:00
|
|
|
|
2023-09-26 02:58:40 +00:00
|
|
|
let (send, mut recv) = mpsc::unbounded_channel();
|
|
|
|
channels.write().await.insert(genesis, send);
|
2023-04-24 04:53:15 +00:00
|
|
|
|
2023-09-26 02:58:40 +00:00
|
|
|
tokio::spawn({
|
|
|
|
let p2p = p2p.clone();
|
|
|
|
async move {
|
2023-09-26 03:11:36 +00:00
|
|
|
loop {
|
|
|
|
let mut msg: Message<P> = recv.recv().await.unwrap();
|
|
|
|
match msg.kind {
|
|
|
|
P2pMessageKind::KeepAlive => {}
|
|
|
|
|
|
|
|
P2pMessageKind::Tributary(msg_genesis) => {
|
|
|
|
assert_eq!(msg_genesis, genesis);
|
|
|
|
log::trace!("handling message for tributary {:?}", tributary.spec.set());
|
|
|
|
if tributary.tributary.handle_message(&msg.msg).await {
|
|
|
|
P2p::broadcast(&p2p, msg.kind, msg.msg).await;
|
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
}
|
2023-08-31 06:35:50 +00:00
|
|
|
|
2023-09-26 03:11:36 +00:00
|
|
|
// TODO2: Rate limit this per timestamp
|
|
|
|
// And/or slash on Heartbeat which justifies a response, since the node obviously
|
|
|
|
// was offline and we must now use our bandwidth to compensate for them?
|
|
|
|
P2pMessageKind::Heartbeat(msg_genesis) => {
|
|
|
|
assert_eq!(msg_genesis, genesis);
|
|
|
|
if msg.msg.len() != 40 {
|
|
|
|
log::error!("validator sent invalid heartbeat");
|
|
|
|
continue;
|
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
|
2023-09-27 04:10:37 +00:00
|
|
|
let p2p = p2p.clone();
|
|
|
|
let spec = tributary.spec.clone();
|
|
|
|
let reader = tributary.tributary.reader();
|
|
|
|
// Spawn a dedicated task as this may require loading large amounts of data from
|
|
|
|
// disk and take a notable amount of time
|
|
|
|
tokio::spawn(async move {
|
|
|
|
/*
|
|
|
|
// Have sqrt(n) nodes reply with the blocks
|
|
|
|
let mut responders = (tributary.spec.n() as f32).sqrt().floor() as u64;
|
|
|
|
// Try to have at least 3 responders
|
|
|
|
if responders < 3 {
|
|
|
|
responders = tributary.spec.n().min(3).into();
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Have up to three nodes respond
|
|
|
|
let responders = u64::from(spec.n().min(3));
|
|
|
|
|
|
|
|
// Decide which nodes will respond by using the latest block's hash as a
|
|
|
|
// mutually agreed upon entropy source
|
|
|
|
// This isn't a secure source of entropy, yet it's fine for this
|
|
|
|
let entropy = u64::from_le_bytes(reader.tip()[.. 8].try_into().unwrap());
|
|
|
|
// If n = 10, responders = 3, we want `start` to be 0 ..= 7
|
|
|
|
// (so the highest is 7, 8, 9)
|
|
|
|
// entropy % (10 + 1) - 3 = entropy % 8 = 0 ..= 7
|
|
|
|
let start =
|
|
|
|
usize::try_from(entropy % (u64::from(spec.n() + 1) - responders)).unwrap();
|
|
|
|
let mut selected = false;
|
|
|
|
for validator in
|
|
|
|
&spec.validators()[start .. (start + usize::try_from(responders).unwrap())]
|
|
|
|
{
|
|
|
|
if our_key == validator.0 {
|
|
|
|
selected = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !selected {
|
|
|
|
log::debug!("received heartbeat and not selected to respond");
|
|
|
|
return;
|
2023-09-26 03:11:36 +00:00
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
|
2023-09-27 04:10:37 +00:00
|
|
|
log::debug!("received heartbeat and selected to respond");
|
2023-09-26 02:58:40 +00:00
|
|
|
|
2023-09-27 04:10:37 +00:00
|
|
|
let mut latest = msg.msg[.. 32].try_into().unwrap();
|
|
|
|
while let Some(next) = reader.block_after(&latest) {
|
|
|
|
let mut res = reader.block(&next).unwrap().serialize();
|
|
|
|
res.extend(reader.commit(&next).unwrap());
|
|
|
|
// Also include the timestamp used within the Heartbeat
|
|
|
|
res.extend(&msg.msg[32 .. 40]);
|
|
|
|
p2p.send(msg.sender, P2pMessageKind::Block(spec.genesis()), res).await;
|
|
|
|
latest = next;
|
|
|
|
}
|
|
|
|
});
|
2023-09-26 02:58:40 +00:00
|
|
|
}
|
2023-04-24 10:50:40 +00:00
|
|
|
|
2023-09-26 03:11:36 +00:00
|
|
|
P2pMessageKind::Block(msg_genesis) => {
|
|
|
|
assert_eq!(msg_genesis, genesis);
|
|
|
|
let mut msg_ref: &[u8] = msg.msg.as_ref();
|
|
|
|
let Ok(block) = Block::<Transaction>::read(&mut msg_ref) else {
|
|
|
|
log::error!("received block message with an invalidly serialized block");
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
// Get just the commit
|
|
|
|
msg.msg.drain(.. (msg.msg.len() - msg_ref.len()));
|
|
|
|
msg.msg.drain((msg.msg.len() - 8) ..);
|
|
|
|
|
|
|
|
let res = tributary.tributary.sync_block(block, msg.msg).await;
|
|
|
|
log::debug!("received block from {:?}, sync_block returned {}", msg.sender, res);
|
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
}
|
2023-08-31 06:35:50 +00:00
|
|
|
}
|
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2023-08-31 06:35:50 +00:00
|
|
|
|
2023-09-26 02:58:40 +00:00
|
|
|
loop {
|
|
|
|
let msg = p2p.receive().await;
|
|
|
|
match msg.kind {
|
|
|
|
P2pMessageKind::KeepAlive => {}
|
|
|
|
P2pMessageKind::Tributary(genesis) => {
|
|
|
|
if let Some(channel) = channels.read().await.get(&genesis) {
|
|
|
|
channel.send(msg).unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
P2pMessageKind::Heartbeat(genesis) => {
|
|
|
|
if let Some(channel) = channels.read().await.get(&genesis) {
|
|
|
|
channel.send(msg).unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
P2pMessageKind::Block(genesis) => {
|
|
|
|
if let Some(channel) = channels.read().await.get(&genesis) {
|
|
|
|
channel.send(msg).unwrap();
|
2023-08-31 06:35:50 +00:00
|
|
|
}
|
2023-04-24 04:53:15 +00:00
|
|
|
}
|
2023-09-26 02:58:40 +00:00
|
|
|
}
|
2023-04-20 09:05:17 +00:00
|
|
|
}
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
|
2023-09-27 04:44:31 +00:00
|
|
|
async fn publish_signed_transaction<D: Db, P: P2p>(
|
|
|
|
db: &mut D,
|
2023-05-09 02:20:51 +00:00
|
|
|
tributary: &Tributary<D, Transaction, P>,
|
|
|
|
tx: Transaction,
|
|
|
|
) {
|
2023-08-13 06:21:56 +00:00
|
|
|
log::debug!("publishing transaction {}", hex::encode(tx.hash()));
|
2023-09-27 04:44:31 +00:00
|
|
|
|
|
|
|
let mut txn = db.txn();
|
|
|
|
let signer = if let TransactionKind::Signed(signed) = tx.kind() {
|
|
|
|
let signer = signed.signer;
|
|
|
|
|
|
|
|
// Safe as we should deterministically create transactions, meaning if this is already on-disk,
|
|
|
|
// it's what we're saving now
|
|
|
|
MainDb::<D>::save_signed_transaction(&mut txn, signed.nonce, tx);
|
|
|
|
|
|
|
|
signer
|
2023-05-09 02:20:51 +00:00
|
|
|
} else {
|
2023-09-25 22:07:26 +00:00
|
|
|
panic!("non-signed transaction passed to publish_signed_transaction");
|
2023-09-27 04:44:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// If we're trying to publish 5, when the last transaction published was 3, this will delay
|
|
|
|
// publication until the point in time we publish 4
|
|
|
|
while let Some(tx) = MainDb::<D>::take_signed_transaction(
|
|
|
|
&mut txn,
|
|
|
|
tributary
|
|
|
|
.next_nonce(signer)
|
|
|
|
.await
|
|
|
|
.expect("we don't have a nonce, meaning we aren't a participant on this tributary"),
|
|
|
|
) {
|
|
|
|
// We should've created a valid transaction
|
|
|
|
// This does assume publish_signed_transaction hasn't been called twice with the same
|
|
|
|
// transaction, which risks a race condition on the validity of this assert
|
|
|
|
// Our use case only calls this function sequentially
|
|
|
|
assert!(tributary.add_transaction(tx).await, "created an invalid transaction");
|
2023-05-09 02:20:51 +00:00
|
|
|
}
|
2023-09-27 04:44:31 +00:00
|
|
|
txn.commit();
|
2023-05-09 02:20:51 +00:00
|
|
|
}
|
|
|
|
|
2023-09-27 04:00:31 +00:00
|
|
|
async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
|
|
|
|
mut db: D,
|
2023-04-25 07:14:42 +00:00
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
2023-08-14 10:08:55 +00:00
|
|
|
serai: Arc<Serai>,
|
2023-05-10 03:44:41 +00:00
|
|
|
mut processors: Pro,
|
2023-09-27 16:20:57 +00:00
|
|
|
network: NetworkId,
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
mut new_tributary: mpsc::UnboundedReceiver<ActiveTributary<D, P>>,
|
2023-04-25 07:14:42 +00:00
|
|
|
) {
|
2023-09-27 04:44:31 +00:00
|
|
|
let mut db_clone = db.clone(); // Enables cloning the DB while we have a txn
|
2023-04-25 07:14:42 +00:00
|
|
|
let pub_key = Ristretto::generator() * key.deref();
|
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
let mut tributaries = HashMap::new();
|
2023-09-01 04:03:53 +00:00
|
|
|
|
2023-09-27 04:00:31 +00:00
|
|
|
loop {
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
match new_tributary.try_recv() {
|
|
|
|
Ok(tributary) => {
|
2023-10-10 03:38:39 +00:00
|
|
|
let set = tributary.spec.set();
|
|
|
|
assert_eq!(set.network, network);
|
|
|
|
tributaries.insert(set.session, tributary);
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
|
|
|
Err(mpsc::error::TryRecvError::Empty) => {}
|
|
|
|
Err(mpsc::error::TryRecvError::Disconnected) => {
|
|
|
|
panic!("handle_processor_messages new_tributary sender closed")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-27 16:20:57 +00:00
|
|
|
// TODO: Check this ID is sane (last handled ID or expected next ID)
|
|
|
|
let msg = processors.recv(network).await;
|
2023-09-01 02:09:29 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// TODO: We need to verify the Batches published to Substrate
|
|
|
|
|
2023-09-27 16:20:57 +00:00
|
|
|
if !MainDb::<D>::handled_message(&db, msg.network, msg.id) {
|
2023-09-27 04:00:31 +00:00
|
|
|
let mut txn = db.txn();
|
2023-09-26 01:54:52 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
let relevant_tributary = match &msg.msg {
|
2023-09-29 00:04:20 +00:00
|
|
|
// We'll only receive these if we fired GenerateKey, which we'll only do if if we're
|
|
|
|
// in-set, making the Tributary relevant
|
2023-09-27 04:00:31 +00:00
|
|
|
ProcessorMessage::KeyGen(inner_msg) => match inner_msg {
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
key_gen::ProcessorMessage::Commitments { id, .. } => Some(id.set.session),
|
|
|
|
key_gen::ProcessorMessage::Shares { id, .. } => Some(id.set.session),
|
|
|
|
key_gen::ProcessorMessage::GeneratedKeyPair { id, .. } => Some(id.set.session),
|
2023-09-27 04:00:31 +00:00
|
|
|
},
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// TODO: Review replacing key with Session in messages?
|
|
|
|
ProcessorMessage::Sign(inner_msg) => match inner_msg {
|
2023-09-29 00:04:20 +00:00
|
|
|
// We'll only receive Preprocess and Share if we're actively signing
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
sign::ProcessorMessage::Preprocess { id, .. } => {
|
2023-09-29 07:51:01 +00:00
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
sign::ProcessorMessage::Share { id, .. } => {
|
2023-09-29 07:51:01 +00:00
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-09-29 00:04:20 +00:00
|
|
|
// While the Processor's Scanner will always emit Completed, that's routed through the
|
|
|
|
// Signer and only becomes a ProcessorMessage::Completed if the Signer is present and
|
|
|
|
// confirms it
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
sign::ProcessorMessage::Completed { key, .. } => {
|
2023-09-29 07:51:01 +00:00
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, key).unwrap())
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Coordinator(inner_msg) => match inner_msg {
|
2023-10-10 03:38:39 +00:00
|
|
|
// This is a special case as it's relevant to *all* Tributaries for this network
|
2023-09-29 00:04:20 +00:00
|
|
|
// It doesn't return a Tributary to become `relevant_tributary` though
|
2023-09-27 04:00:31 +00:00
|
|
|
coordinator::ProcessorMessage::SubstrateBlockAck { network, block, plans } => {
|
|
|
|
assert_eq!(
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
*network, msg.network,
|
2023-09-27 04:00:31 +00:00
|
|
|
"processor claimed to be a different network than it was for SubstrateBlockAck",
|
|
|
|
);
|
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// TODO: Find all Tributaries active at this Substrate block, and make sure we have
|
|
|
|
// them all
|
2023-09-27 04:00:31 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
for tributary in tributaries.values() {
|
|
|
|
// TODO: This needs to be scoped per multisig
|
|
|
|
TributaryDb::<D>::set_plan_ids(&mut txn, tributary.spec.genesis(), *block, plans);
|
|
|
|
|
|
|
|
let tx = Transaction::SubstrateBlock(*block);
|
|
|
|
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
|
|
|
|
log::trace!("providing transaction {}", hex::encode(tx.hash()));
|
|
|
|
let res = tributary.tributary.provide_transaction(tx).await;
|
|
|
|
if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) {
|
|
|
|
panic!("provided an invalid transaction: {res:?}");
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
|
|
|
None
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
2023-09-29 00:04:20 +00:00
|
|
|
// We'll only fire these if we are the Substrate signer, making the Tributary relevant
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
coordinator::ProcessorMessage::BatchPreprocess { id, .. } => {
|
2023-09-29 07:51:01 +00:00
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
|
|
|
coordinator::ProcessorMessage::BatchShare { id, .. } => {
|
2023-09-29 07:51:01 +00:00
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
},
|
2023-09-29 07:51:01 +00:00
|
|
|
// These don't return a relevant Tributary as there's no Tributary with action expected
|
2023-09-27 04:00:31 +00:00
|
|
|
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
|
2023-09-29 07:51:01 +00:00
|
|
|
processor_messages::substrate::ProcessorMessage::Batch { batch } => {
|
|
|
|
assert_eq!(
|
|
|
|
batch.network, msg.network,
|
|
|
|
"processor sent us a batch for a different network than it was for",
|
|
|
|
);
|
|
|
|
let this_batch_id = batch.id;
|
|
|
|
MainDb::<D>::save_expected_batch(&mut txn, batch);
|
|
|
|
|
|
|
|
// Re-define batch
|
|
|
|
// We can't drop it, yet it shouldn't be accidentally used in the following block
|
2023-09-29 08:19:59 +00:00
|
|
|
#[allow(clippy::let_unit_value, unused_variables)]
|
2023-09-29 07:51:01 +00:00
|
|
|
let batch = ();
|
|
|
|
|
|
|
|
// Verify all `Batch`s which we've already indexed from Substrate
|
|
|
|
// This won't be complete, as it only runs when a `Batch` message is received, which
|
|
|
|
// will be before we get a `SignedBatch`. It is, however, incremental. We can use a
|
|
|
|
// complete version to finish the last section when we need a complete version.
|
|
|
|
let last = MainDb::<D>::last_verified_batch(&txn, msg.network);
|
|
|
|
// This variable exists so Rust can verify Send/Sync properties
|
|
|
|
let mut faulty = None;
|
|
|
|
for id in last.map(|last| last + 1).unwrap_or(0) ..= this_batch_id {
|
|
|
|
if let Some(on_chain) = SubstrateDb::<D>::batch_instructions_hash(&txn, network, id) {
|
|
|
|
let off_chain = MainDb::<D>::expected_batch(&txn, network, id).unwrap();
|
|
|
|
if on_chain != off_chain {
|
|
|
|
faulty = Some((id, off_chain, on_chain));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
MainDb::<D>::save_last_verified_batch(&mut txn, msg.network, id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some((id, off_chain, on_chain)) = faulty {
|
|
|
|
// Halt operations on this network and spin, as this is a critical fault
|
|
|
|
loop {
|
|
|
|
log::error!(
|
|
|
|
"{}! network: {:?} id: {} off-chain: {} on-chain: {}",
|
|
|
|
"on-chain batch doesn't match off-chain",
|
|
|
|
network,
|
|
|
|
id,
|
|
|
|
hex::encode(off_chain),
|
|
|
|
hex::encode(on_chain),
|
|
|
|
);
|
|
|
|
sleep(Duration::from_secs(60)).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
2023-09-29 00:04:20 +00:00
|
|
|
// If this is a new Batch, immediately publish it (if we can)
|
2023-09-29 07:51:01 +00:00
|
|
|
processor_messages::substrate::ProcessorMessage::SignedBatch { batch } => {
|
2023-09-27 04:00:31 +00:00
|
|
|
assert_eq!(
|
|
|
|
batch.batch.network, msg.network,
|
2023-09-29 07:51:01 +00:00
|
|
|
"processor sent us a signed batch for a different network than it was for",
|
2023-09-27 04:00:31 +00:00
|
|
|
);
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// TODO: Check this key's key pair's substrate key is authorized to publish batches
|
2023-09-27 04:00:31 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
log::debug!("received batch {:?} {}", batch.batch.network, batch.batch.id);
|
2023-09-26 01:54:52 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// Save this batch to the disk
|
|
|
|
MainDb::<D>::save_batch(&mut txn, batch.clone());
|
|
|
|
|
|
|
|
// Get the next-to-execute batch ID
|
|
|
|
async fn get_next(serai: &Serai, network: NetworkId) -> u32 {
|
|
|
|
let mut first = true;
|
|
|
|
loop {
|
|
|
|
if !first {
|
|
|
|
log::error!(
|
|
|
|
"{} {network:?}",
|
|
|
|
"couldn't connect to Serai node to get the next batch ID for",
|
|
|
|
);
|
|
|
|
tokio::time::sleep(Duration::from_secs(5)).await;
|
2023-08-27 01:36:13 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
first = false;
|
|
|
|
|
|
|
|
let Ok(latest_block) = serai.get_latest_block().await else {
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
let Ok(last) = serai.get_last_batch_for_network(latest_block.hash(), network).await
|
|
|
|
else {
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
break if let Some(last) = last { last + 1 } else { 0 };
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let mut next = get_next(&serai, network).await;
|
|
|
|
|
|
|
|
// Since we have a new batch, publish all batches yet to be published to Serai
|
|
|
|
// This handles the edge-case where batch n+1 is signed before batch n is
|
|
|
|
let mut batches = VecDeque::new();
|
|
|
|
while let Some(batch) = MainDb::<D>::batch(&txn, network, next) {
|
|
|
|
batches.push_back(batch);
|
|
|
|
next += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
while let Some(batch) = batches.pop_front() {
|
|
|
|
// If this Batch should no longer be published, continue
|
|
|
|
if get_next(&serai, network).await > batch.batch.id {
|
|
|
|
continue;
|
2023-07-18 05:53:51 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
|
|
|
let tx = Serai::execute_batch(batch.clone());
|
|
|
|
log::debug!(
|
|
|
|
"attempting to publish batch {:?} {}",
|
|
|
|
batch.batch.network,
|
|
|
|
batch.batch.id,
|
|
|
|
);
|
|
|
|
// This publish may fail if this transactions already exists in the mempool, which is
|
|
|
|
// possible, or if this batch was already executed on-chain
|
|
|
|
// Either case will have eventual resolution and be handled by the above check on if
|
|
|
|
// this batch should execute
|
|
|
|
let res = serai.publish(&tx).await;
|
|
|
|
if res.is_ok() {
|
|
|
|
log::info!(
|
|
|
|
"published batch {network:?} {} (block {})",
|
|
|
|
batch.batch.id,
|
|
|
|
hex::encode(batch.batch.block),
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
log::debug!(
|
|
|
|
"couldn't publish batch {:?} {}: {:?}",
|
|
|
|
batch.batch.network,
|
|
|
|
batch.batch.id,
|
|
|
|
res,
|
|
|
|
);
|
|
|
|
// If we failed to publish it, restore it
|
|
|
|
batches.push_front(batch);
|
2023-10-12 22:39:46 +00:00
|
|
|
// Sleep for a few seconds before retrying to prevent hammering the node
|
|
|
|
tokio::time::sleep(Duration::from_secs(5)).await;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
|
|
|
}
|
2023-05-10 05:45:42 +00:00
|
|
|
|
2023-09-27 04:00:31 +00:00
|
|
|
None
|
|
|
|
}
|
|
|
|
},
|
|
|
|
};
|
2023-09-27 03:28:05 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
// If there's a relevant Tributary...
|
|
|
|
if let Some(relevant_tributary) = relevant_tributary {
|
|
|
|
// Make sure we have it
|
2023-09-29 00:04:20 +00:00
|
|
|
// Per the reasoning above, we only return a Tributary as relevant if we're a participant
|
|
|
|
// Accordingly, we do *need* to have this Tributary now to handle it UNLESS the Tributary
|
|
|
|
// has already completed and this is simply an old message
|
|
|
|
// TODO: Check if the Tributary has already been completed
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
let Some(ActiveTributary { spec, tributary }) = tributaries.get(&relevant_tributary) else {
|
|
|
|
// Since we don't, sleep for a fraction of a second and move to the next loop iteration
|
|
|
|
// At the start of the loop, we'll check for new tributaries, making this eventually
|
|
|
|
// resolve
|
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
|
|
|
|
let genesis = spec.genesis();
|
|
|
|
|
|
|
|
let tx = match msg.msg.clone() {
|
|
|
|
ProcessorMessage::KeyGen(inner_msg) => match inner_msg {
|
|
|
|
key_gen::ProcessorMessage::Commitments { id, commitments } => Some(
|
|
|
|
Transaction::DkgCommitments(id.attempt, commitments, Transaction::empty_signed()),
|
|
|
|
),
|
|
|
|
key_gen::ProcessorMessage::Shares { id, mut shares } => {
|
|
|
|
// Create a MuSig-based machine to inform Substrate of this key generation
|
|
|
|
let nonces = crate::tributary::dkg_confirmation_nonces(&key, spec, id.attempt);
|
|
|
|
|
|
|
|
let mut tx_shares = Vec::with_capacity(shares.len());
|
|
|
|
for i in 1 ..= spec.n() {
|
|
|
|
let i = Participant::new(i).unwrap();
|
|
|
|
if i ==
|
|
|
|
spec
|
|
|
|
.i(pub_key)
|
|
|
|
.expect("processor message to DKG for a session we aren't a validator in")
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
tx_shares.push(
|
|
|
|
shares.remove(&i).expect("processor didn't send share for another validator"),
|
|
|
|
);
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
Some(Transaction::DkgShares {
|
|
|
|
attempt: id.attempt,
|
|
|
|
shares: tx_shares,
|
|
|
|
confirmation_nonces: nonces,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
})
|
2023-09-26 01:54:52 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
key_gen::ProcessorMessage::GeneratedKeyPair { id, substrate_key, network_key } => {
|
|
|
|
assert_eq!(
|
|
|
|
id.set.network, msg.network,
|
|
|
|
"processor claimed to be a different network than it was for GeneratedKeyPair",
|
|
|
|
);
|
|
|
|
// TODO2: Also check the other KeyGenId fields
|
|
|
|
|
|
|
|
// Tell the Tributary the key pair, get back the share for the MuSig signature
|
|
|
|
let share = crate::tributary::generated_key_pair::<D>(
|
|
|
|
&mut txn,
|
|
|
|
&key,
|
|
|
|
spec,
|
|
|
|
&(Public(substrate_key), network_key.try_into().unwrap()),
|
|
|
|
id.attempt,
|
|
|
|
);
|
|
|
|
|
|
|
|
match share {
|
|
|
|
Ok(share) => {
|
|
|
|
Some(Transaction::DkgConfirmed(id.attempt, share, Transaction::empty_signed()))
|
|
|
|
}
|
|
|
|
Err(p) => {
|
|
|
|
todo!("participant {p:?} sent invalid DKG confirmation preprocesses")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Sign(msg) => match msg {
|
|
|
|
sign::ProcessorMessage::Preprocess { id, preprocess } => {
|
|
|
|
if id.attempt == 0 {
|
|
|
|
MainDb::<D>::save_first_preprocess(&mut txn, network, id.id, preprocess);
|
|
|
|
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
Some(Transaction::SignPreprocess(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: preprocess,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sign::ProcessorMessage::Share { id, share } => Some(Transaction::SignShare(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: share,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
})),
|
|
|
|
sign::ProcessorMessage::Completed { key: _, id, tx } => {
|
|
|
|
let r = Zeroizing::new(<Ristretto as Ciphersuite>::F::random(&mut OsRng));
|
|
|
|
#[allow(non_snake_case)]
|
|
|
|
let R = <Ristretto as Ciphersuite>::generator() * r.deref();
|
|
|
|
let mut tx = Transaction::SignCompleted {
|
|
|
|
plan: id,
|
|
|
|
tx_hash: tx,
|
|
|
|
first_signer: pub_key,
|
|
|
|
signature: SchnorrSignature { R, s: <Ristretto as Ciphersuite>::F::ZERO },
|
|
|
|
};
|
|
|
|
let signed = SchnorrSignature::sign(&key, r, tx.sign_completed_challenge());
|
|
|
|
match &mut tx {
|
|
|
|
Transaction::SignCompleted { signature, .. } => {
|
|
|
|
*signature = signed;
|
|
|
|
}
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
Some(tx)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Coordinator(inner_msg) => match inner_msg {
|
|
|
|
coordinator::ProcessorMessage::SubstrateBlockAck { .. } => unreachable!(),
|
|
|
|
coordinator::ProcessorMessage::BatchPreprocess { id, block, preprocess } => {
|
|
|
|
log::info!(
|
|
|
|
"informed of batch (sign ID {}, attempt {}) for block {}",
|
|
|
|
hex::encode(id.id),
|
|
|
|
id.attempt,
|
|
|
|
hex::encode(block),
|
|
|
|
);
|
|
|
|
// If this is the first attempt instance, wait until we synchronize around
|
|
|
|
// the batch first
|
|
|
|
if id.attempt == 0 {
|
|
|
|
MainDb::<D>::save_first_preprocess(&mut txn, spec.set().network, id.id, preprocess);
|
|
|
|
|
2023-10-11 03:55:59 +00:00
|
|
|
// TODO: If this is the new key's first Batch, only create this TX once we verify
|
|
|
|
// all prior published `Batch`s
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
Some(Transaction::Batch(block.0, id.id))
|
|
|
|
} else {
|
|
|
|
Some(Transaction::BatchPreprocess(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: preprocess,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
coordinator::ProcessorMessage::BatchShare { id, share } => {
|
|
|
|
Some(Transaction::BatchShare(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: share.to_vec(),
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
|
2023-09-29 07:51:01 +00:00
|
|
|
processor_messages::substrate::ProcessorMessage::Batch { .. } => unreachable!(),
|
|
|
|
processor_messages::substrate::ProcessorMessage::SignedBatch { .. } => unreachable!(),
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
// If this created a transaction, publish it
|
|
|
|
if let Some(mut tx) = tx {
|
|
|
|
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
|
|
|
|
|
|
|
|
match tx.kind() {
|
|
|
|
TransactionKind::Provided(_) => {
|
|
|
|
log::trace!("providing transaction {}", hex::encode(tx.hash()));
|
|
|
|
let res = tributary.provide_transaction(tx).await;
|
|
|
|
if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) {
|
|
|
|
panic!("provided an invalid transaction: {res:?}");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
TransactionKind::Unsigned => {
|
|
|
|
log::trace!("publishing unsigned transaction {}", hex::encode(tx.hash()));
|
|
|
|
// Ignores the result since we can't differentiate already in-mempool from
|
|
|
|
// already on-chain from invalid
|
|
|
|
// TODO: Don't ignore the result
|
|
|
|
tributary.add_transaction(tx).await;
|
|
|
|
}
|
|
|
|
TransactionKind::Signed(_) => {
|
|
|
|
log::trace!("getting next nonce for Tributary TX in response to processor message");
|
|
|
|
|
|
|
|
let nonce = loop {
|
|
|
|
let Some(nonce) = NonceDecider::<D>::nonce(&txn, genesis, &tx)
|
|
|
|
.expect("signed TX didn't have nonce")
|
|
|
|
else {
|
|
|
|
// This can be None if:
|
|
|
|
// 1) We scanned the relevant transaction(s) in a Tributary block
|
|
|
|
// 2) The processor was sent a message and responded
|
|
|
|
// 3) The Tributary TXN has yet to be committed
|
|
|
|
log::warn!("nonce has yet to be saved for processor-instigated transaction");
|
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
break nonce;
|
2023-09-27 04:00:31 +00:00
|
|
|
};
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
tx.sign(&mut OsRng, genesis, &key, nonce);
|
2023-09-27 04:00:31 +00:00
|
|
|
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
publish_signed_transaction(&mut db_clone, tributary, tx).await;
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-27 16:20:57 +00:00
|
|
|
MainDb::<D>::save_handled_message(&mut txn, msg.network, msg.id);
|
2023-09-27 04:00:31 +00:00
|
|
|
txn.commit();
|
|
|
|
}
|
|
|
|
|
|
|
|
processors.ack(msg).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn handle_processors<D: Db, Pro: Processors, P: P2p>(
|
|
|
|
db: D,
|
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
serai: Arc<Serai>,
|
2023-09-27 16:20:57 +00:00
|
|
|
processors: Pro,
|
2023-09-27 04:00:31 +00:00
|
|
|
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
|
|
|
|
) {
|
2023-09-27 16:20:57 +00:00
|
|
|
let mut channels = HashMap::new();
|
|
|
|
for network in [NetworkId::Bitcoin, NetworkId::Ethereum, NetworkId::Monero] {
|
|
|
|
let (send, recv) = mpsc::unbounded_channel();
|
|
|
|
tokio::spawn(handle_processor_messages(
|
|
|
|
db.clone(),
|
|
|
|
key.clone(),
|
|
|
|
serai.clone(),
|
|
|
|
processors.clone(),
|
|
|
|
network,
|
|
|
|
recv,
|
|
|
|
));
|
|
|
|
channels.insert(network, send);
|
|
|
|
}
|
2023-08-13 06:21:56 +00:00
|
|
|
|
2023-09-27 16:20:57 +00:00
|
|
|
// Listen to new tributary events
|
2023-09-26 01:54:52 +00:00
|
|
|
loop {
|
2023-09-27 16:20:57 +00:00
|
|
|
let tributary = new_tributary.recv().await.unwrap();
|
|
|
|
channels[&tributary.spec.set().network].send(tributary).unwrap();
|
2023-04-25 07:14:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-10 03:44:41 +00:00
|
|
|
pub async fn run<D: Db, Pro: Processors, P: P2p>(
|
2023-09-27 04:00:31 +00:00
|
|
|
raw_db: D,
|
2023-04-24 03:15:15 +00:00
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
p2p: P,
|
2023-05-10 03:44:41 +00:00
|
|
|
processors: Pro,
|
2023-04-24 03:15:15 +00:00
|
|
|
serai: Serai,
|
|
|
|
) {
|
2023-08-14 10:08:55 +00:00
|
|
|
let serai = Arc::new(serai);
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
let (new_tributary_spec_send, mut new_tributary_spec_recv) = mpsc::unbounded_channel();
|
|
|
|
// Reload active tributaries from the database
|
2023-09-27 04:00:31 +00:00
|
|
|
for spec in MainDb::<D>::active_tributaries(&raw_db).1 {
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec_send.send(spec).unwrap();
|
|
|
|
}
|
2023-09-25 22:23:39 +00:00
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
// Handle new Substrate blocks
|
2023-09-25 22:23:39 +00:00
|
|
|
tokio::spawn(scan_substrate(
|
|
|
|
raw_db.clone(),
|
|
|
|
key.clone(),
|
|
|
|
processors.clone(),
|
|
|
|
serai.clone(),
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec_send,
|
2023-09-25 22:23:39 +00:00
|
|
|
));
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Handle the Tributaries
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
// This should be large enough for an entire rotation of all tributaries
|
|
|
|
// If it's too small, the coordinator fail to boot, which is a decent sanity check
|
|
|
|
let (new_tributary, mut new_tributary_listener_1) = broadcast::channel(32);
|
|
|
|
let new_tributary_listener_2 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_3 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_4 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_5 = new_tributary.subscribe();
|
2023-04-24 03:15:15 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
// Spawn a task to further add Tributaries as needed
|
|
|
|
tokio::spawn({
|
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
|
|
|
let processors = processors.clone();
|
|
|
|
let p2p = p2p.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
let spec = new_tributary_spec_recv.recv().await.unwrap();
|
|
|
|
add_tributary(
|
|
|
|
raw_db.clone(),
|
|
|
|
key.clone(),
|
|
|
|
&processors,
|
|
|
|
p2p.clone(),
|
|
|
|
&new_tributary,
|
|
|
|
spec.clone(),
|
|
|
|
)
|
|
|
|
.await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2023-04-24 03:15:15 +00:00
|
|
|
|
2023-08-25 01:55:59 +00:00
|
|
|
// When we reach synchrony on an event requiring signing, send our preprocess for it
|
|
|
|
let recognized_id = {
|
2023-05-09 02:20:51 +00:00
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
2023-09-25 23:28:53 +00:00
|
|
|
|
|
|
|
let tributaries = Arc::new(RwLock::new(HashMap::new()));
|
|
|
|
tokio::spawn({
|
|
|
|
let tributaries = tributaries.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
match new_tributary_listener_1.recv().await {
|
|
|
|
Ok(tributary) => {
|
2023-09-26 01:54:52 +00:00
|
|
|
tributaries.write().await.insert(tributary.spec.genesis(), tributary.tributary);
|
2023-09-25 23:28:53 +00:00
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Lagged(_)) => {
|
|
|
|
panic!("recognized_id lagged to handle new_tributary")
|
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2023-09-25 19:42:39 +00:00
|
|
|
move |network, genesis, id_type, id, nonce| {
|
2023-09-27 04:44:31 +00:00
|
|
|
let mut raw_db = raw_db.clone();
|
2023-08-25 01:55:59 +00:00
|
|
|
let key = key.clone();
|
|
|
|
let tributaries = tributaries.clone();
|
|
|
|
async move {
|
2023-09-27 04:00:31 +00:00
|
|
|
// The transactions for these are fired before the preprocesses are actually
|
|
|
|
// received/saved, creating a race between Tributary ack and the availability of all
|
|
|
|
// Preprocesses
|
2023-08-27 01:09:57 +00:00
|
|
|
// This waits until the necessary preprocess is available
|
|
|
|
let get_preprocess = |raw_db, id| async move {
|
|
|
|
loop {
|
2023-09-27 17:00:04 +00:00
|
|
|
let Some(preprocess) = MainDb::<D>::first_preprocess(raw_db, network, id) else {
|
2023-08-27 01:09:57 +00:00
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
return preprocess;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
let mut tx = match id_type {
|
|
|
|
RecognizedIdType::Batch => Transaction::BatchPreprocess(SignData {
|
|
|
|
plan: id,
|
|
|
|
attempt: 0,
|
|
|
|
data: get_preprocess(&raw_db, id).await,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}),
|
2023-05-09 02:20:51 +00:00
|
|
|
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
RecognizedIdType::Plan => Transaction::SignPreprocess(SignData {
|
|
|
|
plan: id,
|
|
|
|
attempt: 0,
|
|
|
|
data: get_preprocess(&raw_db, id).await,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}),
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
|
|
|
|
2023-09-25 19:42:39 +00:00
|
|
|
tx.sign(&mut OsRng, genesis, &key, nonce);
|
|
|
|
|
2023-08-25 01:55:59 +00:00
|
|
|
let tributaries = tributaries.read().await;
|
|
|
|
let Some(tributary) = tributaries.get(&genesis) else {
|
2023-09-26 01:54:52 +00:00
|
|
|
// TODO: This may happen if the task above is simply slow
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
panic!("tributary we don't have came to consensus on an Batch");
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
2023-09-27 04:44:31 +00:00
|
|
|
publish_signed_transaction(&mut raw_db, tributary, tx).await;
|
2023-05-09 02:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Handle new blocks for each Tributary
|
|
|
|
{
|
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
tokio::spawn(scan_tributaries(
|
|
|
|
raw_db,
|
|
|
|
key.clone(),
|
|
|
|
recognized_id,
|
|
|
|
processors.clone(),
|
|
|
|
serai.clone(),
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_listener_2,
|
2023-08-25 01:55:59 +00:00
|
|
|
));
|
|
|
|
}
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Spawn the heartbeat task, which will trigger syncing if there hasn't been a Tributary block
|
|
|
|
// in a while (presumably because we're behind)
|
2023-09-25 23:28:53 +00:00
|
|
|
tokio::spawn(heartbeat_tributaries(p2p.clone(), new_tributary_listener_3));
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Handle P2P messages
|
2023-09-25 23:28:53 +00:00
|
|
|
tokio::spawn(handle_p2p(Ristretto::generator() * key.deref(), p2p, new_tributary_listener_4));
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-04-25 07:14:42 +00:00
|
|
|
// Handle all messages from processors
|
2023-09-25 23:28:53 +00:00
|
|
|
handle_processors(raw_db, key, serai, processors, new_tributary_listener_5).await;
|
2023-04-15 21:38:47 +00:00
|
|
|
}
|
|
|
|
|
2023-04-11 13:21:35 +00:00
|
|
|
#[tokio::main]
|
2023-04-15 21:38:47 +00:00
|
|
|
async fn main() {
|
2023-08-13 08:30:49 +00:00
|
|
|
// Override the panic handler with one which will panic if any tokio task panics
|
|
|
|
{
|
|
|
|
let existing = std::panic::take_hook();
|
|
|
|
std::panic::set_hook(Box::new(move |panic| {
|
|
|
|
existing(panic);
|
|
|
|
const MSG: &str = "exiting the process due to a task panicking";
|
|
|
|
println!("{MSG}");
|
|
|
|
log::error!("{MSG}");
|
|
|
|
std::process::exit(1);
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
2023-08-01 23:00:48 +00:00
|
|
|
if std::env::var("RUST_LOG").is_err() {
|
|
|
|
std::env::set_var("RUST_LOG", serai_env::var("RUST_LOG").unwrap_or_else(|| "info".to_string()));
|
|
|
|
}
|
|
|
|
env_logger::init();
|
|
|
|
|
|
|
|
log::info!("starting coordinator service...");
|
|
|
|
|
2023-07-26 01:39:29 +00:00
|
|
|
let db = serai_db::new_rocksdb(&env::var("DB_PATH").expect("path to DB wasn't specified"));
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-08-06 16:38:44 +00:00
|
|
|
let key = {
|
|
|
|
let mut key_hex = serai_env::var("SERAI_KEY").expect("Serai key wasn't provided");
|
|
|
|
let mut key_vec = hex::decode(&key_hex).map_err(|_| ()).expect("Serai key wasn't hex-encoded");
|
|
|
|
key_hex.zeroize();
|
|
|
|
if key_vec.len() != 32 {
|
|
|
|
key_vec.zeroize();
|
|
|
|
panic!("Serai key had an invalid length");
|
|
|
|
}
|
|
|
|
let mut key_bytes = [0; 32];
|
|
|
|
key_bytes.copy_from_slice(&key_vec);
|
|
|
|
key_vec.zeroize();
|
|
|
|
let key = Zeroizing::new(<Ristretto as Ciphersuite>::F::from_repr(key_bytes).unwrap());
|
|
|
|
key_bytes.zeroize();
|
|
|
|
key
|
|
|
|
};
|
2023-08-08 19:12:47 +00:00
|
|
|
let p2p = LibP2p::new();
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-07-21 18:00:03 +00:00
|
|
|
let processors = Arc::new(MessageQueue::from_env(Service::Coordinator));
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-04-16 04:51:56 +00:00
|
|
|
let serai = || async {
|
|
|
|
loop {
|
2023-08-06 16:38:44 +00:00
|
|
|
let Ok(serai) = Serai::new(&format!(
|
2023-08-01 23:00:48 +00:00
|
|
|
"ws://{}:9944",
|
|
|
|
serai_env::var("SERAI_HOSTNAME").expect("Serai hostname wasn't provided")
|
2023-08-06 16:38:44 +00:00
|
|
|
))
|
2023-08-01 23:00:48 +00:00
|
|
|
.await
|
|
|
|
else {
|
2023-04-16 04:51:56 +00:00
|
|
|
log::error!("couldn't connect to the Serai node");
|
2023-04-17 06:10:33 +00:00
|
|
|
sleep(Duration::from_secs(5)).await;
|
2023-08-01 04:47:36 +00:00
|
|
|
continue;
|
2023-04-16 04:51:56 +00:00
|
|
|
};
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("made initial connection to Serai node");
|
2023-04-16 04:51:56 +00:00
|
|
|
return serai;
|
|
|
|
}
|
|
|
|
};
|
2023-05-10 03:44:41 +00:00
|
|
|
run(db, key, p2p, processors, serai().await).await
|
2023-04-15 21:38:47 +00:00
|
|
|
}
|