Finish routing eventualities

Also corrects some misc TODOs and tidies up some log statements.
This commit is contained in:
Luke Parker 2023-04-11 05:49:27 -04:00
parent 9e78c8fc9e
commit 90f2b03595
No known key found for this signature in database
8 changed files with 171 additions and 50 deletions

View file

@ -148,6 +148,21 @@ impl<E: Eventuality> EventualitiesTracker<E> {
// If our self tracker already went past this block number, set it back
self.block_number = self.block_number.min(block_number);
}
pub fn drop(&mut self, id: [u8; 32]) {
// O(n) due to the lack of a reverse lookup
let mut found_key = None;
for (key, value) in &self.map {
if value.0 == id {
found_key = Some(key.clone());
break;
}
}
if let Some(key) = found_key {
self.map.remove(&key);
}
}
}
impl<E: Eventuality> Default for EventualitiesTracker<E> {

View file

@ -5,7 +5,6 @@ use std::{
use messages::{ProcessorMessage, CoordinatorMessage};
// TODO: Also include the coin block height here so we can delay handling if not synced?
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Message {
pub id: u64,

View file

@ -109,7 +109,10 @@ async fn get_fee<C: Coin>(coin: &C, block_number: usize) -> C::Fee {
return block.median_fee();
}
Err(e) => {
error!("couldn't get block {}: {e}", block_number);
error!(
"couldn't get block {block_number} in get_fee. {} {}",
"this should only happen if the node is offline. error: ", e
);
// Since this block is considered finalized, we shouldn't be unable to get it unless the
// node is offline, hence the long sleep
sleep(Duration::from_secs(60)).await;
@ -455,8 +458,6 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
},
msg = scanner.events.recv() => {
// These need to be sent to the coordinator which needs to check they aren't replayed
// TODO
match msg.unwrap() {
ScannerEvent::Block(key, block, time, outputs) => {
let key = key.to_bytes().as_ref().to_vec();
@ -500,6 +501,13 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
substrate_signers[&key].sign(time, batch).await;
},
ScannerEvent::Completed(id, tx) => {
// We don't know which signer had this plan, so inform all of them
for (_, signer) in signers.iter_mut() {
signer.eventuality_completion(id, &tx).await;
}
},
}
},
@ -526,7 +534,10 @@ async fn run<C: Coin, D: Db, Co: Coordinator>(raw_db: D, coin: C, mut coordinato
},
SignerEvent::SignedTransaction { id, tx } => {
// If we die after calling finish_signing, we'll never fire Completed
// TODO: Is that acceptable? Do we need to fire Completed before firing finish_signing?
main_db.finish_signing(&key, id);
scanner.drop_eventuality(id).await;
coordinator
.send(ProcessorMessage::Sign(messages::sign::ProcessorMessage::Completed {
key: key.to_vec(),

View file

@ -16,13 +16,15 @@ use tokio::{
use crate::{
DbTxn, Db,
coins::{Output, EventualitiesTracker, Block, Coin},
coins::{Output, Transaction, EventualitiesTracker, Block, Coin},
};
#[derive(Clone, Debug)]
pub enum ScannerEvent<C: Coin> {
// Block scanned
Block(<C::Curve as Ciphersuite>::G, <C::Block as Block<C>>::Id, SystemTime, Vec<C::Output>),
// Eventuality completion found on-chain
Completed([u8; 32], <C::Transaction as Transaction<C>>::Id),
}
pub type ScannerEventChannel<C> = mpsc::UnboundedReceiver<ScannerEvent<C>>;
@ -68,15 +70,31 @@ impl<C: Coin, D: Db> ScannerDb<C, D> {
}
fn add_active_key(&mut self, txn: &mut D::Transaction, key: <C::Curve as Ciphersuite>::G) {
let mut keys = self.0.get(Self::active_keys_key()).unwrap_or(vec![]);
// TODO: Don't do this if the key is already marked active (which can happen based on reboot
// timing)
keys.extend(key.to_bytes().as_ref());
let key_bytes = key.to_bytes();
// Don't add this key if it's already present
let key_len = key_bytes.as_ref().len();
let mut i = 0;
while i < keys.len() {
if keys[i .. (i + key_len)].as_ref() == key_bytes.as_ref() {
debug!("adding {} as an active key yet it was already present", hex::encode(key_bytes));
return;
}
i += key_len;
}
keys.extend(key_bytes.as_ref());
txn.put(Self::active_keys_key(), keys);
}
fn active_keys(&self) -> Vec<<C::Curve as Ciphersuite>::G> {
let bytes_vec = self.0.get(Self::active_keys_key()).unwrap_or(vec![]);
let mut bytes: &[u8] = bytes_vec.as_ref();
// Assumes keys will be 32 bytes when calculating the capacity
// If keys are larger, this may allocate more memory than needed
// If keys are smaller, this may require additional allocations
// Either are fine
let mut res = Vec::with_capacity(bytes.len() / 32);
while !bytes.is_empty() {
res.push(C::Curve::read_G(&mut bytes).unwrap());
@ -210,6 +228,10 @@ impl<C: Coin, D: Db> ScannerHandle<C, D> {
self.scanner.write().await.eventualities.register(block_number, id, eventuality)
}
pub async fn drop_eventuality(&self, id: [u8; 32]) {
self.scanner.write().await.eventualities.drop(id);
}
/// Rotate the key being scanned for.
///
/// If no key has been prior set, this will become the key with no further actions.
@ -362,9 +384,17 @@ impl<C: Coin, D: Db> Scanner<C, D> {
for (id, tx) in
coin.get_eventuality_completions(&mut scanner.eventualities, &block).await
{
// TODO: Fire Completed
let _ = id;
let _ = tx;
// This should only happen if there's a P2P net desync or there's a malicious
// validator
warn!(
"eventuality {} resolved by {}, as found on chain. this should not happen",
hex::encode(id),
hex::encode(&tx)
);
if !scanner.emit(ScannerEvent::Completed(id, tx)) {
return;
}
}
let outputs = match scanner.coin.get_outputs(&block, key).await {

View file

@ -49,7 +49,7 @@ impl<C: Coin, D: Db> SignerDb<C, D> {
&mut self,
txn: &mut D::Transaction,
id: [u8; 32],
tx: <C::Transaction as Transaction<C>>::Id,
tx: &<C::Transaction as Transaction<C>>::Id,
) {
// Transactions can be completed by multiple signatures
// Save every solution in order to be robust
@ -165,7 +165,11 @@ impl<C: Coin, D: Db> Signer<C, D> {
// If we don't have an attempt logged, it's because the coordinator is faulty OR
// because we rebooted
None => {
warn!("not attempting {:?}. this is an error if we didn't reboot", id);
warn!(
"not attempting {} #{}. this is an error if we didn't reboot",
hex::encode(id.id),
id.attempt
);
// Don't panic on the assumption we rebooted
Err(())?;
}
@ -191,6 +195,57 @@ impl<C: Coin, D: Db> Signer<C, D> {
}
}
async fn eventuality_completion(
&mut self,
id: [u8; 32],
tx_id: &<C::Transaction as Transaction<C>>::Id,
) {
if let Some(eventuality) = self.db.eventuality(id) {
// Transaction hasn't hit our mempool/was dropped for a different signature
// The latter can happen given certain latency conditions/a single malicious signer
// In the case of a single malicious signer, they can drag multiple honest
// validators down with them, so we unfortunately can't slash on this case
let Ok(tx) = self.coin.get_transaction(tx_id).await else {
warn!(
"a validator claimed {} completed {} yet we didn't have that TX in our mempool",
hex::encode(tx_id),
hex::encode(id),
);
return;
};
if self.coin.confirm_completion(&eventuality, &tx) {
debug!("eventuality for {} resolved in TX {}", hex::encode(id), hex::encode(tx_id));
// Stop trying to sign for this TX
let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id, tx_id);
txn.commit();
self.signable.remove(&id);
self.attempt.remove(&id);
self.preprocessing.remove(&id);
self.signing.remove(&id);
self.emit(SignerEvent::SignedTransaction { id, tx: tx.id() });
} else {
warn!(
"a validator claimed {} completed {} when it did not",
hex::encode(tx_id),
hex::encode(id)
);
}
} else {
debug!(
"signer {} informed of the completion of {}. {}",
hex::encode(self.keys.group_key().to_bytes()),
hex::encode(id),
"this signer did not have/has already completed that plan",
);
}
}
async fn handle(&mut self, msg: CoordinatorMessage) {
match msg {
CoordinatorMessage::Preprocesses { id, mut preprocesses } => {
@ -201,7 +256,10 @@ impl<C: Coin, D: Db> Signer<C, D> {
let machine = match self.preprocessing.remove(&id.id) {
// Either rebooted or RPC error, or some invariant
None => {
warn!("not preprocessing for {:?}. this is an error if we didn't reboot", id);
warn!(
"not preprocessing for {}. this is an error if we didn't reboot",
hex::encode(id.id)
);
return;
}
Some(machine) => machine,
@ -248,7 +306,10 @@ impl<C: Coin, D: Db> Signer<C, D> {
panic!("never preprocessed yet signing?");
}
warn!("not preprocessing for {:?}. this is an error if we didn't reboot", id);
warn!(
"not preprocessing for {}. this is an error if we didn't reboot",
hex::encode(id.id)
);
return;
}
Some(machine) => machine,
@ -273,14 +334,15 @@ impl<C: Coin, D: Db> Signer<C, D> {
// Save the transaction in case it's needed for recovery
let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id.id, tx.id());
let tx_id = tx.id();
self.db.complete(&mut txn, id.id, &tx_id);
txn.commit();
// Publish it
if let Err(e) = self.coin.publish_transaction(&tx).await {
error!("couldn't publish {:?}: {:?}", tx, e);
} else {
info!("published {:?}", hex::encode(tx.id()));
info!("published {}", hex::encode(&tx_id));
}
// Stop trying to sign for this TX
@ -289,46 +351,23 @@ impl<C: Coin, D: Db> Signer<C, D> {
assert!(self.preprocessing.remove(&id.id).is_none());
assert!(self.signing.remove(&id.id).is_none());
self.emit(SignerEvent::SignedTransaction { id: id.id, tx: tx.id() });
self.emit(SignerEvent::SignedTransaction { id: id.id, tx: tx_id });
}
CoordinatorMessage::Completed { key: _, id, tx: tx_vec } => {
CoordinatorMessage::Completed { key: _, id, tx: mut tx_vec } => {
let mut tx = <C::Transaction as Transaction<C>>::Id::default();
if tx.as_ref().len() != tx_vec.len() {
tx_vec.truncate(2 * tx.as_ref().len());
warn!(
"a validator claimed {} completed {id:?} yet that's not a valid TX ID",
hex::encode(&tx)
"a validator claimed {} completed {} yet that's not a valid TX ID",
hex::encode(&tx),
hex::encode(id),
);
return;
}
tx.as_mut().copy_from_slice(&tx_vec);
if let Some(eventuality) = self.db.eventuality(id) {
// Transaction hasn't hit our mempool/was dropped for a different signature
// The latter can happen given certain latency conditions/a single malicious signer
// In the case of a single malicious signer, they can drag multiple honest
// validators down with them, so we unfortunately can't slash on this case
let Ok(tx) = self.coin.get_transaction(&tx).await else {
todo!("queue checking eventualities"); // or give up here?
};
if self.coin.confirm_completion(&eventuality, &tx) {
// Stop trying to sign for this TX
let mut txn = self.db.0.txn();
self.db.save_transaction(&mut txn, &tx);
self.db.complete(&mut txn, id, tx.id());
txn.commit();
self.signable.remove(&id);
self.attempt.remove(&id);
self.preprocessing.remove(&id);
self.signing.remove(&id);
self.emit(SignerEvent::SignedTransaction { id, tx: tx.id() });
} else {
warn!("a validator claimed {} completed {id:?} when it did not", hex::encode(&tx.id()));
}
}
self.eventuality_completion(id, &tx).await;
}
}
}
@ -406,7 +445,7 @@ impl<C: Coin, D: Db> Signer<C, D> {
if !id.signing_set(&signer.keys.params()).contains(&signer.keys.params().i()) {
continue;
}
info!("selected to sign {:?}", id);
info!("selected to sign {} #{}", hex::encode(id.id), id.attempt);
// If we reboot mid-sign, the current design has us abort all signs and wait for latter
// attempts/new signing protocols
@ -421,7 +460,11 @@ impl<C: Coin, D: Db> Signer<C, D> {
//
// Only run if this hasn't already been attempted
if signer.db.has_attempt(&id) {
warn!("already attempted {:?}. this is an error if we didn't reboot", id);
warn!(
"already attempted {} #{}. this is an error if we didn't reboot",
hex::encode(id.id),
id.attempt
);
continue;
}
@ -432,7 +475,7 @@ impl<C: Coin, D: Db> Signer<C, D> {
// Attempt to create the TX
let machine = match signer.coin.attempt_send(tx).await {
Err(e) => {
error!("failed to attempt {:?}: {:?}", id, e);
error!("failed to attempt {}, #{}: {:?}", hex::encode(id.id), id.attempt, e);
continue;
}
Ok(machine) => machine,
@ -503,6 +546,14 @@ impl<C: Coin, D: Db> SignerHandle<C, D> {
signer.signable.insert(id, (start, tx));
}
pub async fn eventuality_completion(
&self,
id: [u8; 32],
tx: &<C::Transaction as Transaction<C>>::Id,
) {
self.signer.write().await.eventuality_completion(id, tx).await;
}
pub async fn handle(&self, msg: CoordinatorMessage) {
self.signer.write().await.handle(msg).await;
}

View file

@ -56,6 +56,9 @@ async fn spend<C: Coin, D: Db>(
assert_eq!(outputs[0].kind(), OutputType::Change);
outputs
}
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
}
}
@ -89,6 +92,9 @@ pub async fn test_addresses<C: Coin>(coin: C) {
assert_eq!(outputs[0].kind(), OutputType::Branch);
outputs
}
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
};
// Spend the branch output, creating a change output and ensuring we actually get change

View file

@ -56,6 +56,9 @@ pub async fn test_scanner<C: Coin>(coin: C) {
assert_eq!(outputs[0].kind(), OutputType::External);
outputs
}
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
};
(scanner, outputs)
};

View file

@ -39,6 +39,9 @@ pub async fn test_wallet<C: Coin>(coin: C) {
assert_eq!(outputs.len(), 1);
(block_id, outputs)
}
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
}
};
@ -105,6 +108,9 @@ pub async fn test_wallet<C: Coin>(coin: C) {
assert_eq!(time, block.time());
assert_eq!(these_outputs, outputs);
}
ScannerEvent::Completed(_, _) => {
panic!("unexpectedly got eventuality completion");
}
}
// Check the Scanner DB can reload the outputs