2023-10-14 02:30:13 +00:00
|
|
|
use core::ops::Deref;
|
2023-04-23 07:48:50 +00:00
|
|
|
use std::{
|
|
|
|
sync::Arc,
|
2023-10-14 02:40:11 +00:00
|
|
|
time::Duration,
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
collections::{VecDeque, HashMap},
|
2023-04-23 07:48:50 +00:00
|
|
|
};
|
2023-04-17 04:50:56 +00:00
|
|
|
|
2023-08-06 16:38:44 +00:00
|
|
|
use zeroize::{Zeroize, Zeroizing};
|
2023-04-25 07:14:42 +00:00
|
|
|
use rand_core::OsRng;
|
2023-04-16 07:16:53 +00:00
|
|
|
|
2023-09-01 03:39:36 +00:00
|
|
|
use ciphersuite::{
|
|
|
|
group::ff::{Field, PrimeField},
|
|
|
|
Ciphersuite, Ristretto,
|
|
|
|
};
|
|
|
|
use schnorr::SchnorrSignature;
|
2023-09-01 04:03:53 +00:00
|
|
|
use frost::Participant;
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-07-18 05:53:51 +00:00
|
|
|
use serai_db::{DbTxn, Db};
|
|
|
|
use serai_env as env;
|
2023-05-10 04:46:51 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
use serai_client::{
|
|
|
|
primitives::NetworkId,
|
|
|
|
validator_sets::primitives::{Session, ValidatorSet},
|
|
|
|
Public, Serai,
|
|
|
|
};
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-07-18 05:53:51 +00:00
|
|
|
use message_queue::{Service, client::MessageQueue};
|
|
|
|
|
2023-09-25 22:23:39 +00:00
|
|
|
use tokio::{
|
2023-09-25 23:28:53 +00:00
|
|
|
sync::{RwLock, mpsc, broadcast},
|
2023-09-25 22:23:39 +00:00
|
|
|
time::sleep,
|
|
|
|
};
|
2023-04-23 07:48:50 +00:00
|
|
|
|
2023-10-14 02:40:11 +00:00
|
|
|
use ::tributary::{ProvidedError, TransactionKind, TransactionTrait, Block, Tributary};
|
2023-04-17 04:50:56 +00:00
|
|
|
|
2023-04-20 09:05:17 +00:00
|
|
|
mod tributary;
|
2023-09-25 19:42:39 +00:00
|
|
|
use crate::tributary::{
|
|
|
|
TributarySpec, SignData, Transaction, TributaryDb, NonceDecider, scanner::RecognizedIdType,
|
|
|
|
};
|
2023-04-16 04:51:56 +00:00
|
|
|
|
2023-04-23 08:31:00 +00:00
|
|
|
mod db;
|
|
|
|
use db::MainDb;
|
|
|
|
|
2023-04-16 04:51:56 +00:00
|
|
|
mod p2p;
|
|
|
|
pub use p2p::*;
|
|
|
|
|
2023-04-25 07:14:42 +00:00
|
|
|
use processor_messages::{key_gen, sign, coordinator, ProcessorMessage};
|
|
|
|
|
2023-05-10 03:44:41 +00:00
|
|
|
pub mod processors;
|
|
|
|
use processors::Processors;
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-04-15 21:38:47 +00:00
|
|
|
mod substrate;
|
2023-10-14 01:46:17 +00:00
|
|
|
use substrate::{SubstrateDb, is_active_set};
|
2023-04-11 23:04:53 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
2023-04-23 02:27:12 +00:00
|
|
|
pub mod tests;
|
2023-04-11 23:04:53 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
#[derive(Clone)]
|
2023-04-24 03:15:15 +00:00
|
|
|
pub struct ActiveTributary<D: Db, P: P2p> {
|
2023-04-24 06:50:03 +00:00
|
|
|
pub spec: TributarySpec,
|
2023-09-25 23:28:53 +00:00
|
|
|
pub tributary: Arc<Tributary<D, Transaction, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
|
2023-10-13 01:55:25 +00:00
|
|
|
// Creates a new tributary and sends it to all listeners.
|
2023-09-25 22:27:16 +00:00
|
|
|
async fn add_tributary<D: Db, Pro: Processors, P: P2p>(
|
2023-04-24 03:15:15 +00:00
|
|
|
db: D,
|
2023-04-16 07:16:53 +00:00
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
2023-09-25 22:27:16 +00:00
|
|
|
processors: &Pro,
|
2023-04-16 07:16:53 +00:00
|
|
|
p2p: P,
|
2023-09-25 23:28:53 +00:00
|
|
|
tributaries: &broadcast::Sender<ActiveTributary<D, P>>,
|
2023-04-24 03:15:15 +00:00
|
|
|
spec: TributarySpec,
|
2023-09-25 23:28:53 +00:00
|
|
|
) {
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("adding tributary {:?}", spec.set());
|
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
let tributary = Tributary::<_, Transaction, _>::new(
|
2023-08-30 21:25:04 +00:00
|
|
|
// TODO2: Use a db on a distinct volume to protect against DoS attacks
|
2023-04-24 03:15:15 +00:00
|
|
|
db,
|
|
|
|
spec.genesis(),
|
|
|
|
spec.start_time(),
|
2023-09-25 22:27:16 +00:00
|
|
|
key.clone(),
|
2023-04-24 03:15:15 +00:00
|
|
|
spec.validators(),
|
|
|
|
p2p,
|
|
|
|
)
|
|
|
|
.await
|
|
|
|
.unwrap();
|
|
|
|
|
2023-09-25 22:27:16 +00:00
|
|
|
// Trigger a DKG for the newly added Tributary
|
|
|
|
// If we're rebooting, we'll re-fire this message
|
|
|
|
// This is safe due to the message-queue deduplicating based off the intent system
|
|
|
|
let set = spec.set();
|
|
|
|
processors
|
|
|
|
.send(
|
|
|
|
set.network,
|
2023-09-29 08:19:59 +00:00
|
|
|
processor_messages::key_gen::CoordinatorMessage::GenerateKey {
|
|
|
|
id: processor_messages::key_gen::KeyGenId { set, attempt: 0 },
|
|
|
|
params: frost::ThresholdParams::new(
|
|
|
|
spec.t(),
|
|
|
|
spec.n(),
|
|
|
|
spec
|
|
|
|
.i(Ristretto::generator() * key.deref())
|
|
|
|
.expect("adding a tributary for a set we aren't in set for"),
|
|
|
|
)
|
|
|
|
.unwrap(),
|
|
|
|
},
|
2023-09-25 22:27:16 +00:00
|
|
|
)
|
|
|
|
.await;
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
tributaries
|
|
|
|
.send(ActiveTributary { spec, tributary: Arc::new(tributary) })
|
|
|
|
.map_err(|_| "all ActiveTributary recipients closed")
|
|
|
|
.unwrap();
|
2023-04-24 03:15:15 +00:00
|
|
|
}
|
|
|
|
|
2023-09-27 04:44:31 +00:00
|
|
|
async fn publish_signed_transaction<D: Db, P: P2p>(
|
2023-10-14 03:36:07 +00:00
|
|
|
txn: &mut D::Transaction<'_>,
|
2023-05-09 02:20:51 +00:00
|
|
|
tributary: &Tributary<D, Transaction, P>,
|
|
|
|
tx: Transaction,
|
|
|
|
) {
|
2023-08-13 06:21:56 +00:00
|
|
|
log::debug!("publishing transaction {}", hex::encode(tx.hash()));
|
2023-09-27 04:44:31 +00:00
|
|
|
|
|
|
|
let signer = if let TransactionKind::Signed(signed) = tx.kind() {
|
|
|
|
let signer = signed.signer;
|
|
|
|
|
|
|
|
// Safe as we should deterministically create transactions, meaning if this is already on-disk,
|
|
|
|
// it's what we're saving now
|
2023-10-14 03:36:07 +00:00
|
|
|
MainDb::<D>::save_signed_transaction(txn, signed.nonce, tx);
|
2023-09-27 04:44:31 +00:00
|
|
|
|
|
|
|
signer
|
2023-05-09 02:20:51 +00:00
|
|
|
} else {
|
2023-09-25 22:07:26 +00:00
|
|
|
panic!("non-signed transaction passed to publish_signed_transaction");
|
2023-09-27 04:44:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// If we're trying to publish 5, when the last transaction published was 3, this will delay
|
|
|
|
// publication until the point in time we publish 4
|
|
|
|
while let Some(tx) = MainDb::<D>::take_signed_transaction(
|
2023-10-14 03:36:07 +00:00
|
|
|
txn,
|
2023-09-27 04:44:31 +00:00
|
|
|
tributary
|
|
|
|
.next_nonce(signer)
|
|
|
|
.await
|
|
|
|
.expect("we don't have a nonce, meaning we aren't a participant on this tributary"),
|
|
|
|
) {
|
|
|
|
// We should've created a valid transaction
|
|
|
|
// This does assume publish_signed_transaction hasn't been called twice with the same
|
|
|
|
// transaction, which risks a race condition on the validity of this assert
|
|
|
|
// Our use case only calls this function sequentially
|
|
|
|
assert!(tributary.add_transaction(tx).await, "created an invalid transaction");
|
2023-05-09 02:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
async fn handle_processor_message<D: Db, P: P2p>(
|
|
|
|
db: &mut D,
|
|
|
|
key: &Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
serai: &Serai,
|
|
|
|
tributaries: &HashMap<Session, ActiveTributary<D, P>>,
|
2023-10-13 07:36:59 +00:00
|
|
|
network: NetworkId,
|
2023-10-14 03:36:07 +00:00
|
|
|
msg: &processors::Message,
|
|
|
|
) -> bool {
|
|
|
|
if MainDb::<D>::handled_message(db, msg.network, msg.id) {
|
|
|
|
return true;
|
2023-10-13 07:36:59 +00:00
|
|
|
}
|
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
let mut txn = db.txn();
|
2023-09-01 04:03:53 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
let mut relevant_tributary = match &msg.msg {
|
|
|
|
// We'll only receive these if we fired GenerateKey, which we'll only do if if we're
|
|
|
|
// in-set, making the Tributary relevant
|
|
|
|
ProcessorMessage::KeyGen(inner_msg) => match inner_msg {
|
|
|
|
key_gen::ProcessorMessage::Commitments { id, .. } => Some(id.set.session),
|
|
|
|
key_gen::ProcessorMessage::Shares { id, .. } => Some(id.set.session),
|
|
|
|
key_gen::ProcessorMessage::GeneratedKeyPair { id, .. } => Some(id.set.session),
|
|
|
|
},
|
|
|
|
// TODO: Review replacing key with Session in messages?
|
|
|
|
ProcessorMessage::Sign(inner_msg) => match inner_msg {
|
|
|
|
// We'll only receive Preprocess and Share if we're actively signing
|
|
|
|
sign::ProcessorMessage::Preprocess { id, .. } => {
|
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
sign::ProcessorMessage::Share { id, .. } => {
|
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
// While the Processor's Scanner will always emit Completed, that's routed through the
|
|
|
|
// Signer and only becomes a ProcessorMessage::Completed if the Signer is present and
|
|
|
|
// confirms it
|
|
|
|
sign::ProcessorMessage::Completed { key, .. } => {
|
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, key).unwrap())
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Coordinator(inner_msg) => match inner_msg {
|
|
|
|
// This is a special case as it's relevant to *all* Tributaries for this network
|
|
|
|
// It doesn't return a Tributary to become `relevant_tributary` though
|
|
|
|
coordinator::ProcessorMessage::SubstrateBlockAck { network, block, plans } => {
|
|
|
|
assert_eq!(
|
|
|
|
*network, msg.network,
|
|
|
|
"processor claimed to be a different network than it was for SubstrateBlockAck",
|
|
|
|
);
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// TODO: Find all Tributaries active at this Substrate block, and make sure we have
|
|
|
|
// them all (if we were present in them)
|
2023-09-01 02:09:29 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
for tributary in tributaries.values() {
|
|
|
|
// TODO: This needs to be scoped per multisig
|
|
|
|
TributaryDb::<D>::set_plan_ids(&mut txn, tributary.spec.genesis(), *block, plans);
|
2023-09-27 04:00:31 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
let tx = Transaction::SubstrateBlock(*block);
|
|
|
|
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
|
|
|
|
log::trace!("providing transaction {}", hex::encode(tx.hash()));
|
|
|
|
let res = tributary.tributary.provide_transaction(tx).await;
|
|
|
|
if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) {
|
|
|
|
if res == Err(ProvidedError::LocalMismatchesOnChain) {
|
|
|
|
// Spin, since this is a crit for this Tributary
|
|
|
|
loop {
|
|
|
|
log::error!(
|
|
|
|
"{}. tributary: {}, provided: SubstrateBlock({})",
|
|
|
|
"tributary added distinct provided to delayed locally provided TX",
|
|
|
|
hex::encode(tributary.spec.genesis()),
|
|
|
|
block,
|
|
|
|
);
|
|
|
|
sleep(Duration::from_secs(60)).await;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
panic!("provided an invalid transaction: {res:?}");
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
}
|
2023-09-29 07:51:01 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
None
|
|
|
|
}
|
|
|
|
// We'll only fire these if we are the Substrate signer, making the Tributary relevant
|
|
|
|
coordinator::ProcessorMessage::BatchPreprocess { id, .. } => {
|
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
|
|
|
}
|
|
|
|
coordinator::ProcessorMessage::BatchShare { id, .. } => {
|
|
|
|
Some(SubstrateDb::<D>::session_for_key(&txn, &id.key).unwrap())
|
|
|
|
}
|
|
|
|
},
|
|
|
|
// These don't return a relevant Tributary as there's no Tributary with action expected
|
|
|
|
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
|
|
|
|
processor_messages::substrate::ProcessorMessage::Batch { batch } => {
|
|
|
|
assert_eq!(
|
|
|
|
batch.network, msg.network,
|
|
|
|
"processor sent us a batch for a different network than it was for",
|
|
|
|
);
|
|
|
|
let this_batch_id = batch.id;
|
|
|
|
MainDb::<D>::save_expected_batch(&mut txn, batch);
|
2023-09-29 07:51:01 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// Re-define batch
|
|
|
|
// We can't drop it, yet it shouldn't be accidentally used in the following block
|
|
|
|
#[allow(clippy::let_unit_value, unused_variables)]
|
|
|
|
let batch = ();
|
2023-09-29 07:51:01 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// This won't be complete, as this call is when a `Batch` message is received, which
|
|
|
|
// will be before we get a `SignedBatch`
|
|
|
|
// It is, however, incremental
|
|
|
|
// When we need a complete version, we use another call, continuously called as-needed
|
|
|
|
substrate::verify_published_batches::<D>(&mut txn, msg.network, this_batch_id).await;
|
2023-09-27 04:00:31 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
None
|
|
|
|
}
|
|
|
|
// If this is a new Batch, immediately publish it (if we can)
|
|
|
|
processor_messages::substrate::ProcessorMessage::SignedBatch { batch } => {
|
|
|
|
assert_eq!(
|
|
|
|
batch.batch.network, msg.network,
|
|
|
|
"processor sent us a signed batch for a different network than it was for",
|
|
|
|
);
|
|
|
|
// TODO: Check this key's key pair's substrate key is authorized to publish batches
|
2023-09-26 01:54:52 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
log::debug!("received batch {:?} {}", batch.batch.network, batch.batch.id);
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// Save this batch to the disk
|
|
|
|
MainDb::<D>::save_batch(&mut txn, batch.clone());
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// Get the next-to-execute batch ID
|
|
|
|
let mut next = substrate::get_expected_next_batch(serai, network).await;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// Since we have a new batch, publish all batches yet to be published to Serai
|
|
|
|
// This handles the edge-case where batch n+1 is signed before batch n is
|
|
|
|
let mut batches = VecDeque::new();
|
|
|
|
while let Some(batch) = MainDb::<D>::batch(&txn, network, next) {
|
|
|
|
batches.push_back(batch);
|
|
|
|
next += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
let start_id = batches.front().map(|batch| batch.batch.id);
|
|
|
|
let last_id = batches.back().map(|batch| batch.batch.id);
|
|
|
|
while let Some(batch) = batches.pop_front() {
|
|
|
|
// If this Batch should no longer be published, continue
|
|
|
|
if substrate::get_expected_next_batch(serai, network).await > batch.batch.id {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let tx = Serai::execute_batch(batch.clone());
|
|
|
|
log::debug!("attempting to publish batch {:?} {}", batch.batch.network, batch.batch.id,);
|
|
|
|
// This publish may fail if this transactions already exists in the mempool, which is
|
|
|
|
// possible, or if this batch was already executed on-chain
|
|
|
|
// Either case will have eventual resolution and be handled by the above check on if
|
|
|
|
// this batch should execute
|
|
|
|
let res = serai.publish(&tx).await;
|
|
|
|
if res.is_ok() {
|
|
|
|
log::info!(
|
|
|
|
"published batch {network:?} {} (block {})",
|
|
|
|
batch.batch.id,
|
|
|
|
hex::encode(batch.batch.block),
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
log::debug!(
|
|
|
|
"couldn't publish batch {:?} {}: {:?}",
|
|
|
|
batch.batch.network,
|
|
|
|
batch.batch.id,
|
|
|
|
res,
|
|
|
|
);
|
|
|
|
// If we failed to publish it, restore it
|
|
|
|
batches.push_front(batch);
|
|
|
|
// Sleep for a few seconds before retrying to prevent hammering the node
|
|
|
|
sleep(Duration::from_secs(5)).await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Verify the `Batch`s we just published
|
|
|
|
if let Some(last_id) = last_id {
|
|
|
|
loop {
|
|
|
|
let verified =
|
|
|
|
substrate::verify_published_batches::<D>(&mut txn, msg.network, last_id).await;
|
|
|
|
if verified == Some(last_id) {
|
|
|
|
break;
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
}
|
|
|
|
}
|
2023-05-10 05:45:42 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// Check if any of these `Batch`s were a handover `Batch`
|
|
|
|
// If so, we need to publish any delayed `Batch` provided transactions
|
|
|
|
let mut relevant = None;
|
|
|
|
if let Some(start_id) = start_id {
|
|
|
|
let last_id = last_id.unwrap();
|
|
|
|
for batch in start_id .. last_id {
|
|
|
|
if let Some(set) = MainDb::<D>::is_handover_batch(&txn, msg.network, batch) {
|
|
|
|
// relevant may already be Some. This is a safe over-write, as we don't need to
|
|
|
|
// be concerned for handovers of Tributaries which have completed their handovers
|
|
|
|
// While this does bypass the checks that Tributary would've performed at the
|
|
|
|
// time, if we ever actually participate in a handover, we will verify *all*
|
|
|
|
// prior `Batch`s, including the ones which would've been explicitly verified
|
|
|
|
// then
|
|
|
|
//
|
|
|
|
// We should only declare this session relevant if it's relevant to us
|
|
|
|
// We only set handover `Batch`s when we're trying to produce said `Batch`, so this
|
|
|
|
// would be a `Batch` we were involved in the production of
|
|
|
|
// Accordingly, iy's relevant
|
|
|
|
relevant = Some(set.session);
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
2023-10-14 01:46:17 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
relevant
|
2023-10-14 01:46:17 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
},
|
|
|
|
};
|
2023-10-14 01:46:17 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// If we have a relevant Tributary, check it's actually still relevant and has yet to be retired
|
|
|
|
if let Some(relevant_tributary_value) = relevant_tributary {
|
|
|
|
if !is_active_set(
|
|
|
|
serai,
|
|
|
|
ValidatorSet { network: msg.network, session: relevant_tributary_value },
|
|
|
|
)
|
|
|
|
.await
|
|
|
|
{
|
|
|
|
relevant_tributary = None;
|
|
|
|
}
|
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// If there's a relevant Tributary...
|
|
|
|
if let Some(relevant_tributary) = relevant_tributary {
|
|
|
|
// Make sure we have it
|
|
|
|
// Per the reasoning above, we only return a Tributary as relevant if we're a participant
|
|
|
|
// Accordingly, we do *need* to have this Tributary now to handle it UNLESS the Tributary has
|
|
|
|
// already completed and this is simply an old message (which we prior checked)
|
|
|
|
let Some(ActiveTributary { spec, tributary }) = tributaries.get(&relevant_tributary) else {
|
|
|
|
// Since we don't, sleep for a fraction of a second and return false, signaling we didn't
|
|
|
|
// handle this message
|
|
|
|
// At the start of the loop which calls this function, we'll check for new tributaries, making
|
|
|
|
// this eventually resolve
|
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
return false;
|
|
|
|
};
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
let genesis = spec.genesis();
|
|
|
|
let pub_key = Ristretto::generator() * key.deref();
|
2023-09-27 04:00:31 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
let txs = match msg.msg.clone() {
|
|
|
|
ProcessorMessage::KeyGen(inner_msg) => match inner_msg {
|
|
|
|
key_gen::ProcessorMessage::Commitments { id, commitments } => {
|
|
|
|
vec![Transaction::DkgCommitments(id.attempt, commitments, Transaction::empty_signed())]
|
|
|
|
}
|
|
|
|
key_gen::ProcessorMessage::Shares { id, mut shares } => {
|
|
|
|
// Create a MuSig-based machine to inform Substrate of this key generation
|
|
|
|
let nonces = crate::tributary::dkg_confirmation_nonces(key, spec, id.attempt);
|
|
|
|
|
|
|
|
let mut tx_shares = Vec::with_capacity(shares.len());
|
|
|
|
for i in 1 ..= spec.n() {
|
|
|
|
let i = Participant::new(i).unwrap();
|
|
|
|
if i ==
|
|
|
|
spec
|
|
|
|
.i(pub_key)
|
|
|
|
.expect("processor message to DKG for a session we aren't a validator in")
|
|
|
|
{
|
|
|
|
continue;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
tx_shares
|
|
|
|
.push(shares.remove(&i).expect("processor didn't send share for another validator"));
|
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
vec![Transaction::DkgShares {
|
|
|
|
attempt: id.attempt,
|
|
|
|
shares: tx_shares,
|
|
|
|
confirmation_nonces: nonces,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}]
|
|
|
|
}
|
|
|
|
key_gen::ProcessorMessage::GeneratedKeyPair { id, substrate_key, network_key } => {
|
|
|
|
assert_eq!(
|
|
|
|
id.set.network, msg.network,
|
|
|
|
"processor claimed to be a different network than it was for GeneratedKeyPair",
|
|
|
|
);
|
|
|
|
// TODO2: Also check the other KeyGenId fields
|
|
|
|
|
|
|
|
// Tell the Tributary the key pair, get back the share for the MuSig signature
|
|
|
|
let share = crate::tributary::generated_key_pair::<D>(
|
|
|
|
&mut txn,
|
|
|
|
key,
|
|
|
|
spec,
|
|
|
|
&(Public(substrate_key), network_key.try_into().unwrap()),
|
|
|
|
id.attempt,
|
|
|
|
);
|
|
|
|
|
|
|
|
match share {
|
|
|
|
Ok(share) => {
|
|
|
|
vec![Transaction::DkgConfirmed(id.attempt, share, Transaction::empty_signed())]
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
Err(p) => {
|
|
|
|
todo!("participant {p:?} sent invalid DKG confirmation preprocesses")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Sign(msg) => match msg {
|
|
|
|
sign::ProcessorMessage::Preprocess { id, preprocess } => {
|
|
|
|
if id.attempt == 0 {
|
|
|
|
MainDb::<D>::save_first_preprocess(&mut txn, network, id.id, preprocess);
|
|
|
|
|
|
|
|
vec![]
|
|
|
|
} else {
|
|
|
|
vec![Transaction::SignPreprocess(SignData {
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
2023-10-14 03:36:07 +00:00
|
|
|
data: preprocess,
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
signed: Transaction::empty_signed(),
|
2023-10-14 03:36:07 +00:00
|
|
|
})]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sign::ProcessorMessage::Share { id, share } => vec![Transaction::SignShare(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: share,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
})],
|
|
|
|
sign::ProcessorMessage::Completed { key: _, id, tx } => {
|
|
|
|
let r = Zeroizing::new(<Ristretto as Ciphersuite>::F::random(&mut OsRng));
|
|
|
|
#[allow(non_snake_case)]
|
|
|
|
let R = <Ristretto as Ciphersuite>::generator() * r.deref();
|
|
|
|
let mut tx = Transaction::SignCompleted {
|
|
|
|
plan: id,
|
|
|
|
tx_hash: tx,
|
|
|
|
first_signer: pub_key,
|
|
|
|
signature: SchnorrSignature { R, s: <Ristretto as Ciphersuite>::F::ZERO },
|
|
|
|
};
|
|
|
|
let signed = SchnorrSignature::sign(key, r, tx.sign_completed_challenge());
|
|
|
|
match &mut tx {
|
|
|
|
Transaction::SignCompleted { signature, .. } => {
|
|
|
|
*signature = signed;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
vec![tx]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Coordinator(inner_msg) => match inner_msg {
|
|
|
|
coordinator::ProcessorMessage::SubstrateBlockAck { .. } => unreachable!(),
|
|
|
|
coordinator::ProcessorMessage::BatchPreprocess { id, block, preprocess } => {
|
|
|
|
log::info!(
|
|
|
|
"informed of batch (sign ID {}, attempt {}) for block {}",
|
|
|
|
hex::encode(id.id),
|
|
|
|
id.attempt,
|
|
|
|
hex::encode(block),
|
|
|
|
);
|
|
|
|
|
|
|
|
// If this is the first attempt instance, wait until we synchronize around the batch
|
|
|
|
// first
|
|
|
|
if id.attempt == 0 {
|
|
|
|
MainDb::<D>::save_first_preprocess(&mut txn, spec.set().network, id.id, preprocess);
|
|
|
|
|
|
|
|
// If this is the new key's first Batch, only create this TX once we verify all
|
|
|
|
// all prior published `Batch`s
|
|
|
|
let last_received = MainDb::<D>::last_received_batch(&txn, msg.network).unwrap();
|
|
|
|
let handover_batch = MainDb::<D>::handover_batch(&txn, spec.set());
|
|
|
|
if handover_batch.is_none() {
|
|
|
|
MainDb::<D>::set_handover_batch(&mut txn, spec.set(), last_received);
|
|
|
|
if last_received != 0 {
|
|
|
|
// Decrease by 1, to get the ID of the Batch prior to this Batch
|
|
|
|
let prior_sets_last_batch = last_received - 1;
|
|
|
|
loop {
|
|
|
|
let successfully_verified = substrate::verify_published_batches::<D>(
|
|
|
|
&mut txn,
|
|
|
|
msg.network,
|
|
|
|
prior_sets_last_batch,
|
|
|
|
)
|
|
|
|
.await;
|
|
|
|
if successfully_verified == Some(prior_sets_last_batch) {
|
|
|
|
break;
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
sleep(Duration::from_secs(5)).await;
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
}
|
|
|
|
}
|
2023-10-13 16:14:59 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// There is a race condition here. We may verify all `Batch`s from the prior set,
|
|
|
|
// start signing the handover `Batch` `n`, start signing `n+1`, have `n+1` signed
|
|
|
|
// before `n` (or at the same time), yet then the prior set forges a malicious
|
|
|
|
// `Batch` `n`.
|
|
|
|
//
|
|
|
|
// The malicious `Batch` `n` would be publishable to Serai, as Serai can't
|
|
|
|
// distinguish what's intended to be a handover `Batch`, yet then anyone could
|
|
|
|
// publish the new set's `n+1`, causing their acceptance of the handover.
|
|
|
|
//
|
|
|
|
// To fix this, if this is after the handover `Batch` and we have yet to verify
|
|
|
|
// publication of the handover `Batch`, don't yet yield the provided.
|
|
|
|
let handover_batch = MainDb::<D>::handover_batch(&txn, spec.set()).unwrap();
|
|
|
|
let intended = Transaction::Batch(block.0, id.id);
|
|
|
|
let mut res = vec![intended.clone()];
|
|
|
|
if last_received > handover_batch {
|
|
|
|
if let Some(last_verified) = MainDb::<D>::last_verified_batch(&txn, msg.network) {
|
|
|
|
if last_verified < handover_batch {
|
|
|
|
res = vec![];
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
} else {
|
2023-10-14 03:36:07 +00:00
|
|
|
res = vec![];
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
|
|
|
|
if res.is_empty() {
|
|
|
|
MainDb::<D>::queue_batch(&mut txn, spec.set(), intended);
|
2023-10-13 16:14:59 +00:00
|
|
|
}
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
res
|
|
|
|
} else {
|
|
|
|
vec![Transaction::BatchPreprocess(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: preprocess,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
})]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
coordinator::ProcessorMessage::BatchShare { id, share } => {
|
|
|
|
vec![Transaction::BatchShare(SignData {
|
|
|
|
plan: id.id,
|
|
|
|
attempt: id.attempt,
|
|
|
|
data: share.to_vec(),
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
})]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
|
|
|
|
processor_messages::substrate::ProcessorMessage::Batch { .. } => unreachable!(),
|
|
|
|
processor_messages::substrate::ProcessorMessage::SignedBatch { .. } => {
|
|
|
|
// We only reach here if this SignedBatch triggered the publication of a handover
|
|
|
|
// Batch
|
|
|
|
// Since the handover `Batch` was successfully published and verified, we no longer
|
|
|
|
// have to worry about the above n+1 attack
|
|
|
|
MainDb::<D>::take_queued_batches(&mut txn, spec.set())
|
|
|
|
}
|
|
|
|
},
|
|
|
|
};
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// If this created transactions, publish them
|
|
|
|
for mut tx in txs {
|
|
|
|
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
|
|
|
|
|
|
|
|
match tx.kind() {
|
|
|
|
TransactionKind::Provided(_) => {
|
|
|
|
log::trace!("providing transaction {}", hex::encode(tx.hash()));
|
|
|
|
let res = tributary.provide_transaction(tx.clone()).await;
|
|
|
|
if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) {
|
|
|
|
if res == Err(ProvidedError::LocalMismatchesOnChain) {
|
|
|
|
// Spin, since this is a crit for this Tributary
|
|
|
|
loop {
|
|
|
|
log::error!(
|
|
|
|
"{}. tributary: {}, provided: {:?}",
|
|
|
|
"tributary added distinct provided to delayed locally provided TX",
|
|
|
|
hex::encode(spec.genesis()),
|
|
|
|
&tx,
|
|
|
|
);
|
|
|
|
sleep(Duration::from_secs(60)).await;
|
Start moving Coordinator to a multi-Tributary model
Prior, we only supported a single Tributary per network, and spawned a task to
handled Processor messages per Tributary. Now, we handle Processor messages per
network, yet we still only supported a single Tributary in that handling
function.
Now, when we handle a message, we load the Tributary which is relevant. Once we
know it, we ensure we have it (preventing race conditions), and then proceed.
We do need work to check if we should have a Tributary, or if we're not
participating. We also need to check if a Tributary has been retired, meaning
we shouldn't handle any transactions related to them, and to clean up retired
Tributaries.
2023-09-27 22:20:36 +00:00
|
|
|
}
|
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
panic!("provided an invalid transaction: {res:?}");
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
TransactionKind::Unsigned => {
|
|
|
|
log::trace!("publishing unsigned transaction {}", hex::encode(tx.hash()));
|
|
|
|
// Ignores the result since we can't differentiate already in-mempool from
|
|
|
|
// already on-chain from invalid
|
|
|
|
// TODO: Don't ignore the result
|
|
|
|
tributary.add_transaction(tx).await;
|
|
|
|
}
|
|
|
|
TransactionKind::Signed(_) => {
|
|
|
|
log::trace!("getting next nonce for Tributary TX in response to processor message");
|
|
|
|
|
|
|
|
let nonce = loop {
|
|
|
|
let Some(nonce) =
|
|
|
|
NonceDecider::<D>::nonce(&txn, genesis, &tx).expect("signed TX didn't have nonce")
|
|
|
|
else {
|
|
|
|
// This can be None if the following events occur, in order:
|
|
|
|
// 1) We scanned the relevant transaction(s) in a Tributary block
|
|
|
|
// 2) The processor was sent a message and responded
|
|
|
|
// 3) The Tributary TXN has yet to be committed
|
|
|
|
log::warn!("nonce has yet to be saved for processor-instigated transaction");
|
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
break nonce;
|
|
|
|
};
|
|
|
|
tx.sign(&mut OsRng, genesis, key, nonce);
|
|
|
|
|
|
|
|
publish_signed_transaction(&mut txn, tributary, tx).await;
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
2023-10-14 03:36:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MainDb::<D>::save_handled_message(&mut txn, msg.network, msg.id);
|
|
|
|
txn.commit();
|
|
|
|
|
|
|
|
true
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
|
|
|
|
mut db: D,
|
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
serai: Arc<Serai>,
|
|
|
|
mut processors: Pro,
|
|
|
|
network: NetworkId,
|
|
|
|
mut new_tributary: mpsc::UnboundedReceiver<ActiveTributary<D, P>>,
|
|
|
|
) {
|
|
|
|
let mut tributaries = HashMap::new();
|
|
|
|
loop {
|
|
|
|
match new_tributary.try_recv() {
|
|
|
|
Ok(tributary) => {
|
|
|
|
let set = tributary.spec.set();
|
|
|
|
assert_eq!(set.network, network);
|
|
|
|
tributaries.insert(set.session, tributary);
|
|
|
|
}
|
|
|
|
Err(mpsc::error::TryRecvError::Empty) => {}
|
|
|
|
Err(mpsc::error::TryRecvError::Disconnected) => {
|
|
|
|
panic!("handle_processor_messages new_tributary sender closed")
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
|
2023-10-14 03:36:07 +00:00
|
|
|
// TODO: Check this ID is sane (last handled ID or expected next ID)
|
|
|
|
let msg = processors.recv(network).await;
|
|
|
|
if handle_processor_message(&mut db, &key, &serai, &tributaries, network, &msg).await {
|
|
|
|
processors.ack(msg).await;
|
|
|
|
}
|
2023-09-27 04:00:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn handle_processors<D: Db, Pro: Processors, P: P2p>(
|
|
|
|
db: D,
|
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
serai: Arc<Serai>,
|
2023-09-27 16:20:57 +00:00
|
|
|
processors: Pro,
|
2023-09-27 04:00:31 +00:00
|
|
|
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
|
|
|
|
) {
|
2023-09-27 16:20:57 +00:00
|
|
|
let mut channels = HashMap::new();
|
2023-10-13 03:59:21 +00:00
|
|
|
for network in serai_client::primitives::NETWORKS {
|
|
|
|
if network == NetworkId::Serai {
|
|
|
|
continue;
|
|
|
|
}
|
2023-09-27 16:20:57 +00:00
|
|
|
let (send, recv) = mpsc::unbounded_channel();
|
|
|
|
tokio::spawn(handle_processor_messages(
|
|
|
|
db.clone(),
|
|
|
|
key.clone(),
|
|
|
|
serai.clone(),
|
|
|
|
processors.clone(),
|
|
|
|
network,
|
|
|
|
recv,
|
|
|
|
));
|
|
|
|
channels.insert(network, send);
|
|
|
|
}
|
2023-08-13 06:21:56 +00:00
|
|
|
|
2023-09-27 16:20:57 +00:00
|
|
|
// Listen to new tributary events
|
2023-09-26 01:54:52 +00:00
|
|
|
loop {
|
2023-09-27 16:20:57 +00:00
|
|
|
let tributary = new_tributary.recv().await.unwrap();
|
|
|
|
channels[&tributary.spec.set().network].send(tributary).unwrap();
|
2023-04-25 07:14:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-10 03:44:41 +00:00
|
|
|
pub async fn run<D: Db, Pro: Processors, P: P2p>(
|
2023-09-27 04:00:31 +00:00
|
|
|
raw_db: D,
|
2023-04-24 03:15:15 +00:00
|
|
|
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
|
|
|
|
p2p: P,
|
2023-05-10 03:44:41 +00:00
|
|
|
processors: Pro,
|
2023-04-24 03:15:15 +00:00
|
|
|
serai: Serai,
|
|
|
|
) {
|
2023-08-14 10:08:55 +00:00
|
|
|
let serai = Arc::new(serai);
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
let (new_tributary_spec_send, mut new_tributary_spec_recv) = mpsc::unbounded_channel();
|
|
|
|
// Reload active tributaries from the database
|
2023-09-27 04:00:31 +00:00
|
|
|
for spec in MainDb::<D>::active_tributaries(&raw_db).1 {
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec_send.send(spec).unwrap();
|
|
|
|
}
|
2023-09-25 22:23:39 +00:00
|
|
|
|
2023-04-24 03:15:15 +00:00
|
|
|
// Handle new Substrate blocks
|
2023-10-14 02:31:26 +00:00
|
|
|
tokio::spawn(crate::substrate::scan_task(
|
2023-09-25 22:23:39 +00:00
|
|
|
raw_db.clone(),
|
|
|
|
key.clone(),
|
|
|
|
processors.clone(),
|
|
|
|
serai.clone(),
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_spec_send,
|
2023-09-25 22:23:39 +00:00
|
|
|
));
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Handle the Tributaries
|
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
// This should be large enough for an entire rotation of all tributaries
|
|
|
|
// If it's too small, the coordinator fail to boot, which is a decent sanity check
|
|
|
|
let (new_tributary, mut new_tributary_listener_1) = broadcast::channel(32);
|
|
|
|
let new_tributary_listener_2 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_3 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_4 = new_tributary.subscribe();
|
|
|
|
let new_tributary_listener_5 = new_tributary.subscribe();
|
2023-04-24 03:15:15 +00:00
|
|
|
|
2023-09-25 23:28:53 +00:00
|
|
|
// Spawn a task to further add Tributaries as needed
|
|
|
|
tokio::spawn({
|
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
|
|
|
let processors = processors.clone();
|
|
|
|
let p2p = p2p.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
let spec = new_tributary_spec_recv.recv().await.unwrap();
|
2023-10-13 01:55:25 +00:00
|
|
|
// Uses an inner task as Tributary::new may take several seconds
|
|
|
|
tokio::spawn({
|
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
|
|
|
let processors = processors.clone();
|
|
|
|
let p2p = p2p.clone();
|
|
|
|
let new_tributary = new_tributary.clone();
|
|
|
|
async move {
|
|
|
|
add_tributary(raw_db, key, &processors, p2p, &new_tributary, spec).await;
|
|
|
|
}
|
|
|
|
});
|
2023-09-25 23:28:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
2023-04-24 03:15:15 +00:00
|
|
|
|
2023-08-25 01:55:59 +00:00
|
|
|
// When we reach synchrony on an event requiring signing, send our preprocess for it
|
|
|
|
let recognized_id = {
|
2023-05-09 02:20:51 +00:00
|
|
|
let raw_db = raw_db.clone();
|
|
|
|
let key = key.clone();
|
2023-09-25 23:28:53 +00:00
|
|
|
|
|
|
|
let tributaries = Arc::new(RwLock::new(HashMap::new()));
|
|
|
|
tokio::spawn({
|
|
|
|
let tributaries = tributaries.clone();
|
|
|
|
async move {
|
|
|
|
loop {
|
|
|
|
match new_tributary_listener_1.recv().await {
|
|
|
|
Ok(tributary) => {
|
2023-09-26 01:54:52 +00:00
|
|
|
tributaries.write().await.insert(tributary.spec.genesis(), tributary.tributary);
|
2023-09-25 23:28:53 +00:00
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Lagged(_)) => {
|
|
|
|
panic!("recognized_id lagged to handle new_tributary")
|
|
|
|
}
|
|
|
|
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2023-09-25 19:42:39 +00:00
|
|
|
move |network, genesis, id_type, id, nonce| {
|
2023-09-27 04:44:31 +00:00
|
|
|
let mut raw_db = raw_db.clone();
|
2023-08-25 01:55:59 +00:00
|
|
|
let key = key.clone();
|
|
|
|
let tributaries = tributaries.clone();
|
|
|
|
async move {
|
2023-09-27 04:00:31 +00:00
|
|
|
// The transactions for these are fired before the preprocesses are actually
|
|
|
|
// received/saved, creating a race between Tributary ack and the availability of all
|
|
|
|
// Preprocesses
|
2023-08-27 01:09:57 +00:00
|
|
|
// This waits until the necessary preprocess is available
|
|
|
|
let get_preprocess = |raw_db, id| async move {
|
|
|
|
loop {
|
2023-09-27 17:00:04 +00:00
|
|
|
let Some(preprocess) = MainDb::<D>::first_preprocess(raw_db, network, id) else {
|
2023-08-27 01:09:57 +00:00
|
|
|
sleep(Duration::from_millis(100)).await;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
return preprocess;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
let mut tx = match id_type {
|
|
|
|
RecognizedIdType::Batch => Transaction::BatchPreprocess(SignData {
|
|
|
|
plan: id,
|
|
|
|
attempt: 0,
|
|
|
|
data: get_preprocess(&raw_db, id).await,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}),
|
2023-05-09 02:20:51 +00:00
|
|
|
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
RecognizedIdType::Plan => Transaction::SignPreprocess(SignData {
|
|
|
|
plan: id,
|
|
|
|
attempt: 0,
|
|
|
|
data: get_preprocess(&raw_db, id).await,
|
|
|
|
signed: Transaction::empty_signed(),
|
|
|
|
}),
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
|
|
|
|
2023-09-25 19:42:39 +00:00
|
|
|
tx.sign(&mut OsRng, genesis, &key, nonce);
|
|
|
|
|
2023-08-25 01:55:59 +00:00
|
|
|
let tributaries = tributaries.read().await;
|
|
|
|
let Some(tributary) = tributaries.get(&genesis) else {
|
2023-09-26 01:54:52 +00:00
|
|
|
// TODO: This may happen if the task above is simply slow
|
Replace ExternalBlock with Batch
The initial TODO was simply to use one ExternalBlock per all batches in the
block. This would require publishing ExternalBlock after the last batch,
requiring knowing the last batch. While we could add such a pipeline, it'd
require:
1) Initial preprocesses using a distinct message from BatchPreprocess
2) An additional message sent after all BatchPreprocess are sent
Unfortunately, both would require tweaks to the SubstrateSigner which aren't
worth the complexity compared to the solution here, at least, not at this time.
While this will cause, if a Tributary is signing a block whose total batch data
exceeds 25 kB, to use multiple transactions which could be optimized out by
'better' local data pipelining, that's an extreme edge case. Given the temporal
nature of each Tributary, it's also an acceptable edge.
This does no longer achieve synchrony over external blocks accordingly. While
signed batches have synchrony, as they embed their block hash, batches being
signed don't have cryptographic synchrony on their contents. This means
validators who are eclipsed may produce invalid shares, as they sign a
different batch. This will be introduced in a follow-up commit.
2023-09-01 02:48:02 +00:00
|
|
|
panic!("tributary we don't have came to consensus on an Batch");
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
2023-10-14 03:36:07 +00:00
|
|
|
let mut txn = raw_db.txn();
|
|
|
|
publish_signed_transaction(&mut txn, tributary, tx).await;
|
|
|
|
txn.commit();
|
2023-05-09 02:20:51 +00:00
|
|
|
}
|
|
|
|
}
|
2023-08-25 01:55:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Handle new blocks for each Tributary
|
|
|
|
{
|
|
|
|
let raw_db = raw_db.clone();
|
2023-10-14 02:31:26 +00:00
|
|
|
tokio::spawn(tributary::scanner::scan_tributaries_task(
|
2023-08-25 01:55:59 +00:00
|
|
|
raw_db,
|
|
|
|
key.clone(),
|
|
|
|
recognized_id,
|
|
|
|
processors.clone(),
|
|
|
|
serai.clone(),
|
2023-09-25 23:28:53 +00:00
|
|
|
new_tributary_listener_2,
|
2023-08-25 01:55:59 +00:00
|
|
|
));
|
|
|
|
}
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Spawn the heartbeat task, which will trigger syncing if there hasn't been a Tributary block
|
|
|
|
// in a while (presumably because we're behind)
|
2023-10-14 02:40:11 +00:00
|
|
|
tokio::spawn(p2p::heartbeat_tributaries_task(p2p.clone(), new_tributary_listener_3));
|
2023-04-24 03:15:15 +00:00
|
|
|
|
|
|
|
// Handle P2P messages
|
2023-10-14 02:40:11 +00:00
|
|
|
tokio::spawn(p2p::handle_p2p_task(
|
|
|
|
Ristretto::generator() * key.deref(),
|
|
|
|
p2p,
|
|
|
|
new_tributary_listener_4,
|
|
|
|
));
|
2023-04-15 21:38:47 +00:00
|
|
|
|
2023-04-25 07:14:42 +00:00
|
|
|
// Handle all messages from processors
|
2023-09-25 23:28:53 +00:00
|
|
|
handle_processors(raw_db, key, serai, processors, new_tributary_listener_5).await;
|
2023-04-15 21:38:47 +00:00
|
|
|
}
|
|
|
|
|
2023-04-11 13:21:35 +00:00
|
|
|
#[tokio::main]
|
2023-04-15 21:38:47 +00:00
|
|
|
async fn main() {
|
2023-08-13 08:30:49 +00:00
|
|
|
// Override the panic handler with one which will panic if any tokio task panics
|
|
|
|
{
|
|
|
|
let existing = std::panic::take_hook();
|
|
|
|
std::panic::set_hook(Box::new(move |panic| {
|
|
|
|
existing(panic);
|
|
|
|
const MSG: &str = "exiting the process due to a task panicking";
|
|
|
|
println!("{MSG}");
|
|
|
|
log::error!("{MSG}");
|
|
|
|
std::process::exit(1);
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
2023-08-01 23:00:48 +00:00
|
|
|
if std::env::var("RUST_LOG").is_err() {
|
|
|
|
std::env::set_var("RUST_LOG", serai_env::var("RUST_LOG").unwrap_or_else(|| "info".to_string()));
|
|
|
|
}
|
|
|
|
env_logger::init();
|
|
|
|
|
|
|
|
log::info!("starting coordinator service...");
|
|
|
|
|
2023-07-26 01:39:29 +00:00
|
|
|
let db = serai_db::new_rocksdb(&env::var("DB_PATH").expect("path to DB wasn't specified"));
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-08-06 16:38:44 +00:00
|
|
|
let key = {
|
|
|
|
let mut key_hex = serai_env::var("SERAI_KEY").expect("Serai key wasn't provided");
|
|
|
|
let mut key_vec = hex::decode(&key_hex).map_err(|_| ()).expect("Serai key wasn't hex-encoded");
|
|
|
|
key_hex.zeroize();
|
|
|
|
if key_vec.len() != 32 {
|
|
|
|
key_vec.zeroize();
|
|
|
|
panic!("Serai key had an invalid length");
|
|
|
|
}
|
|
|
|
let mut key_bytes = [0; 32];
|
|
|
|
key_bytes.copy_from_slice(&key_vec);
|
|
|
|
key_vec.zeroize();
|
|
|
|
let key = Zeroizing::new(<Ristretto as Ciphersuite>::F::from_repr(key_bytes).unwrap());
|
|
|
|
key_bytes.zeroize();
|
|
|
|
key
|
|
|
|
};
|
2023-08-08 19:12:47 +00:00
|
|
|
let p2p = LibP2p::new();
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-07-21 18:00:03 +00:00
|
|
|
let processors = Arc::new(MessageQueue::from_env(Service::Coordinator));
|
2023-04-17 06:10:33 +00:00
|
|
|
|
2023-04-16 04:51:56 +00:00
|
|
|
let serai = || async {
|
|
|
|
loop {
|
2023-08-06 16:38:44 +00:00
|
|
|
let Ok(serai) = Serai::new(&format!(
|
2023-08-01 23:00:48 +00:00
|
|
|
"ws://{}:9944",
|
|
|
|
serai_env::var("SERAI_HOSTNAME").expect("Serai hostname wasn't provided")
|
2023-08-06 16:38:44 +00:00
|
|
|
))
|
2023-08-01 23:00:48 +00:00
|
|
|
.await
|
|
|
|
else {
|
2023-04-16 04:51:56 +00:00
|
|
|
log::error!("couldn't connect to the Serai node");
|
2023-04-17 06:10:33 +00:00
|
|
|
sleep(Duration::from_secs(5)).await;
|
2023-08-01 04:47:36 +00:00
|
|
|
continue;
|
2023-04-16 04:51:56 +00:00
|
|
|
};
|
2023-08-01 23:00:48 +00:00
|
|
|
log::info!("made initial connection to Serai node");
|
2023-04-16 04:51:56 +00:00
|
|
|
return serai;
|
|
|
|
}
|
|
|
|
};
|
2023-05-10 03:44:41 +00:00
|
|
|
run(db, key, p2p, processors, serai().await).await
|
2023-04-15 21:38:47 +00:00
|
|
|
}
|