Alternate handover batch TOCTOU fix (#397)

* Revert "Correct the prior documented TOCTOU"

This reverts commit d50fe87801.

* Correct the prior documented TOCTOU

d50fe87801 edited the challenge for the Batch to
fix it. This won't produce Batch n+1 until Batch n is successfully published
and verified. It's an alternative strategy able to be reviewed, with a much
smaller impact to scope.
This commit is contained in:
Luke Parker 2023-10-13 12:14:59 -04:00 committed by GitHub
parent 7d0d1dc382
commit f6e8bc3352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 144 additions and 78 deletions

View file

@ -8,7 +8,7 @@ use blake2::{
use scale::{Encode, Decode};
use serai_client::{
primitives::NetworkId,
validator_sets::primitives::ValidatorSet,
validator_sets::primitives::{Session, ValidatorSet},
in_instructions::primitives::{Batch, SignedBatch},
};
@ -142,13 +142,49 @@ impl<D: Db> MainDb<D> {
.map(|id| u32::from_le_bytes(id.try_into().unwrap()))
}
fn did_handover_key(set: ValidatorSet) -> Vec<u8> {
Self::main_key(b"did_handover", set.encode())
fn handover_batch_key(set: ValidatorSet) -> Vec<u8> {
Self::main_key(b"handover_batch", set.encode())
}
pub fn set_did_handover(txn: &mut D::Transaction<'_>, set: ValidatorSet) {
txn.put(Self::did_handover_key(set), []);
fn lookup_handover_batch_key(network: NetworkId, batch: u32) -> Vec<u8> {
Self::main_key(b"lookup_handover_batch", (network, batch).encode())
}
pub fn did_handover<G: Get>(getter: &G, set: ValidatorSet) -> bool {
getter.get(Self::did_handover_key(set)).is_some()
pub fn set_handover_batch(txn: &mut D::Transaction<'_>, set: ValidatorSet, batch: u32) {
txn.put(Self::handover_batch_key(set), batch.to_le_bytes());
txn.put(Self::lookup_handover_batch_key(set.network, batch), set.session.0.to_le_bytes());
}
pub fn handover_batch<G: Get>(getter: &G, set: ValidatorSet) -> Option<u32> {
getter.get(Self::handover_batch_key(set)).map(|id| u32::from_le_bytes(id.try_into().unwrap()))
}
pub fn is_handover_batch<G: Get>(
getter: &G,
network: NetworkId,
batch: u32,
) -> Option<ValidatorSet> {
getter.get(Self::lookup_handover_batch_key(network, batch)).map(|session| ValidatorSet {
network,
session: Session(u32::from_le_bytes(session.try_into().unwrap())),
})
}
fn queued_batches_key(set: ValidatorSet) -> Vec<u8> {
Self::main_key(b"queued_batches", set.encode())
}
pub fn queue_batch(txn: &mut D::Transaction<'_>, set: ValidatorSet, batch: Transaction) {
let key = Self::queued_batches_key(set);
let mut batches = txn.get(&key).unwrap_or(vec![]);
batches.extend(batch.serialize());
txn.put(&key, batches);
}
pub fn take_queued_batches(txn: &mut D::Transaction<'_>, set: ValidatorSet) -> Vec<Transaction> {
let key = Self::queued_batches_key(set);
let batches_vec = txn.get(&key).unwrap_or(vec![]);
txn.del(&key);
let mut batches: &[u8] = &batches_vec;
let mut res = vec![];
while !batches.is_empty() {
res.push(Transaction::read(&mut batches).unwrap());
}
res
}
}

View file

@ -732,6 +732,8 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
next += 1;
}
let start_id = batches.front().map(|batch| batch.batch.id);
let last_id = batches.back().map(|batch| batch.batch.id);
while let Some(batch) = batches.pop_front() {
// If this Batch should no longer be published, continue
if get_next(&serai, network).await > batch.batch.id {
@ -768,8 +770,30 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
sleep(Duration::from_secs(5)).await;
}
}
// Verify the `Batch`s we just published
if let Some(last_id) = last_id {
loop {
let verified = verify_published_batches::<D>(&mut txn, msg.network, last_id).await;
if verified == Some(last_id) {
break;
}
}
}
None
// Check if any of these `Batch`s were a handover `Batch`
// If so, we need to publish any delayed `Batch` provided transactions
let mut relevant = None;
if let Some(start_id) = start_id {
let last_id = last_id.unwrap();
for batch in start_id .. last_id {
if let Some(set) = MainDb::<D>::is_handover_batch(&txn, msg.network, batch) {
// TODO: relevant may malready be Some. This is a safe over-write, yet we do need
// to explicitly not bother with old tributaries
relevant = Some(set.session);
}
}
}
relevant
}
},
};
@ -791,11 +815,15 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
let genesis = spec.genesis();
let tx = match msg.msg.clone() {
let txs = match msg.msg.clone() {
ProcessorMessage::KeyGen(inner_msg) => match inner_msg {
key_gen::ProcessorMessage::Commitments { id, commitments } => Some(
Transaction::DkgCommitments(id.attempt, commitments, Transaction::empty_signed()),
),
key_gen::ProcessorMessage::Commitments { id, commitments } => {
vec![Transaction::DkgCommitments(
id.attempt,
commitments,
Transaction::empty_signed(),
)]
}
key_gen::ProcessorMessage::Shares { id, mut shares } => {
// Create a MuSig-based machine to inform Substrate of this key generation
let nonces = crate::tributary::dkg_confirmation_nonces(&key, spec, id.attempt);
@ -815,12 +843,12 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
);
}
Some(Transaction::DkgShares {
vec![Transaction::DkgShares {
attempt: id.attempt,
shares: tx_shares,
confirmation_nonces: nonces,
signed: Transaction::empty_signed(),
})
}]
}
key_gen::ProcessorMessage::GeneratedKeyPair { id, substrate_key, network_key } => {
assert_eq!(
@ -840,7 +868,7 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
match share {
Ok(share) => {
Some(Transaction::DkgConfirmed(id.attempt, share, Transaction::empty_signed()))
vec![Transaction::DkgConfirmed(id.attempt, share, Transaction::empty_signed())]
}
Err(p) => {
todo!("participant {p:?} sent invalid DKG confirmation preprocesses")
@ -853,22 +881,22 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
if id.attempt == 0 {
MainDb::<D>::save_first_preprocess(&mut txn, network, id.id, preprocess);
None
vec![]
} else {
Some(Transaction::SignPreprocess(SignData {
vec![Transaction::SignPreprocess(SignData {
plan: id.id,
attempt: id.attempt,
data: preprocess,
signed: Transaction::empty_signed(),
}))
})]
}
}
sign::ProcessorMessage::Share { id, share } => Some(Transaction::SignShare(SignData {
sign::ProcessorMessage::Share { id, share } => vec![Transaction::SignShare(SignData {
plan: id.id,
attempt: id.attempt,
data: share,
signed: Transaction::empty_signed(),
})),
})],
sign::ProcessorMessage::Completed { key: _, id, tx } => {
let r = Zeroizing::new(<Ristretto as Ciphersuite>::F::random(&mut OsRng));
#[allow(non_snake_case)]
@ -886,7 +914,7 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
}
_ => unreachable!(),
}
Some(tx)
vec![tx]
}
},
ProcessorMessage::Coordinator(inner_msg) => match inner_msg {
@ -906,9 +934,11 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
// If this is the new key's first Batch, only create this TX once we verify all
// all prior published `Batch`s
if (spec.set().session.0 != 0) && (!MainDb::<D>::did_handover(&txn, spec.set())) {
let last_received = MainDb::<D>::last_received_batch(&txn, msg.network);
if let Some(last_received) = last_received {
let last_received = MainDb::<D>::last_received_batch(&txn, msg.network).unwrap();
let handover_batch = MainDb::<D>::handover_batch(&txn, spec.set());
if handover_batch.is_none() {
MainDb::<D>::set_handover_batch(&mut txn, spec.set(), last_received);
if last_received != 0 {
// Decrease by 1, to get the ID of the Batch prior to this Batch
let prior_sets_last_batch = last_received - 1;
loop {
@ -921,36 +951,69 @@ async fn handle_processor_messages<D: Db, Pro: Processors, P: P2p>(
sleep(Duration::from_secs(5)).await;
}
}
MainDb::<D>::set_did_handover(&mut txn, spec.set());
}
Some(Transaction::Batch(block.0, id.id))
// There is a race condition here. We may verify all `Batch`s from the prior set,
// start signing the handover `Batch` `n`, start signing `n+1`, have `n+1` signed
// before `n` (or at the same time), yet then the prior set forges a malicious
// `Batch` `n`.
//
// The malicious `Batch` `n` would be publishable to Serai, as Serai can't
// distinguish what's intended to be a handover `Batch`, yet then anyone could
// publish the new set's `n+1`, causing their acceptance of the handover.
//
// To fix this, if this is after the handover `Batch` and we have yet to verify
// publication of the handover `Batch`, don't yet yield the provided.
let handover_batch = MainDb::<D>::handover_batch(&txn, spec.set()).unwrap();
let intended = Transaction::Batch(block.0, id.id);
let mut res = vec![intended.clone()];
if last_received > handover_batch {
if let Some(last_verified) = MainDb::<D>::last_verified_batch(&txn, msg.network) {
if last_verified < handover_batch {
res = vec![];
}
} else {
Some(Transaction::BatchPreprocess(SignData {
res = vec![];
}
}
if res.is_empty() {
MainDb::<D>::queue_batch(&mut txn, spec.set(), intended);
}
res
} else {
vec![Transaction::BatchPreprocess(SignData {
plan: id.id,
attempt: id.attempt,
data: preprocess,
signed: Transaction::empty_signed(),
}))
})]
}
}
coordinator::ProcessorMessage::BatchShare { id, share } => {
Some(Transaction::BatchShare(SignData {
vec![Transaction::BatchShare(SignData {
plan: id.id,
attempt: id.attempt,
data: share.to_vec(),
signed: Transaction::empty_signed(),
}))
})]
}
},
ProcessorMessage::Substrate(inner_msg) => match inner_msg {
processor_messages::substrate::ProcessorMessage::Batch { .. } => unreachable!(),
processor_messages::substrate::ProcessorMessage::SignedBatch { .. } => unreachable!(),
processor_messages::substrate::ProcessorMessage::SignedBatch { .. } => {
// We only reach here if this SignedBatch triggered the publication of a handover
// Batch
// Since the handover `Batch` was successfully published and verified, we no longer
// have to worry about the above n+1 attack
MainDb::<D>::take_queued_batches(&mut txn, spec.set())
}
},
};
// If this created a transaction, publish it
if let Some(mut tx) = tx {
// If this created transactions, publish them
for mut tx in txs {
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
match tx.kind() {

View file

@ -52,18 +52,6 @@ impl<D: Db> SubstrateSignerDb<D> {
D::key(b"SUBSTRATE_SIGNER", dst, key)
}
fn first_batch_key(key: [u8; 32]) -> Vec<u8> {
Self::sign_key(b"first_batch", key)
}
fn save_first_batch(txn: &mut D::Transaction<'_>, key: [u8; 32], id: u32) {
txn.put(Self::first_batch_key(key), id.to_le_bytes());
}
fn first_batch<G: Get>(getter: &G, key: [u8; 32]) -> Option<u32> {
getter
.get(Self::first_batch_key(key))
.map(|bytes| u32::from_le_bytes(bytes.try_into().unwrap()))
}
fn completed_key(id: [u8; 32]) -> Vec<u8> {
Self::sign_key(b"completed", id)
}
@ -241,11 +229,6 @@ impl<D: Db> SubstrateSigner<D> {
return;
}
let group_key = self.keys.group_key().to_bytes();
if SubstrateSignerDb::<D>::first_batch(txn, group_key).is_none() {
SubstrateSignerDb::<D>::save_first_batch(txn, group_key, batch.id);
}
self.signable.insert(id, batch);
self.attempt(txn, id, 0).await;
}
@ -287,13 +270,8 @@ impl<D: Db> SubstrateSigner<D> {
Err(e) => todo!("malicious signer: {:?}", e),
};
let batch = &self.signable[&id.id];
let is_first_batch =
SubstrateSignerDb::<D>::first_batch(txn, self.keys.group_key().to_bytes()).unwrap() ==
batch.id;
let (machine, share) =
match machine.sign(preprocesses, &batch_message(is_first_batch, batch)) {
match machine.sign(preprocesses, &batch_message(&self.signable[&id.id])) {
Ok(res) => res,
Err(e) => todo!("malicious signer: {:?}", e),
};

View file

@ -146,9 +146,8 @@ async fn test_substrate_signer() {
signers.get_mut(i).unwrap().events.pop_front().unwrap()
{
assert_eq!(signed_batch.batch, batch);
// SubstrateSigner will believe this is the first batch for this set, hence `true`
assert!(Public::from_raw(keys[&participant_one].group_key().to_bytes())
.verify(&batch_message(true, &batch), &signed_batch.signature));
.verify(&batch_message(&batch), &signed_batch.signature));
} else {
panic!("didn't get signed batch back");
}

View file

@ -39,8 +39,7 @@ pub async fn provide_batch(batch: Batch) -> [u8; 32] {
let block = publish_tx(&Serai::execute_batch(SignedBatch {
batch: batch.clone(),
// TODO: This `batch.id == 0` line only works when session == 0
signature: pair.sign(&batch_message(batch.id == 0, &batch)),
signature: pair.sign(&batch_message(&batch)),
}))
.await;

View file

@ -155,21 +155,14 @@ pub mod pallet {
// verify the signature
let network = batch.batch.network;
let (current_session, prior, current) = keys_for_network::<T>(network)?;
let batch_message = batch_message(&batch.batch);
// Check the prior key first since only a single `Batch` (the last one) will be when prior is
// Some yet prior wasn't the signing key
let valid_by_prior = if let Some(key) = prior {
key.verify(&batch_message(false, &batch.batch), &batch.signature)
} else {
false
};
let valid_by_prior =
if let Some(key) = prior { key.verify(&batch_message, &batch.signature) } else { false };
let valid = valid_by_prior ||
(if let Some(key) = current {
key.verify(
// This `== 0` is valid as either it'll be the first Batch for the first set, or if
// they never had a Batch, the first Batch for the next set
&batch_message((batch.batch.id == 0) || prior.is_some(), &batch.batch),
&batch.signature,
)
key.verify(&batch_message, &batch.signature)
} else {
false
});

View file

@ -84,6 +84,6 @@ impl Zeroize for SignedBatch {
// TODO: Make this an associated method?
/// The message for the batch signature.
pub fn batch_message(is_first_batch_of_set: bool, batch: &Batch) -> Vec<u8> {
[b"InInstructions-batch".as_ref(), &(is_first_batch_of_set, batch).encode()].concat()
pub fn batch_message(batch: &Batch) -> Vec<u8> {
[b"InInstructions-batch".as_ref(), &batch.encode()].concat()
}

View file

@ -165,7 +165,7 @@ pub async fn batch(
let signature = Signature(
schnorrkel::keys::Keypair::from_bytes(&schnorrkel_key_pair)
.unwrap()
.sign_simple(b"substrate", &batch_message(batch.id == 0, &batch))
.sign_simple(b"substrate", &batch_message(&batch))
.to_bytes(),
);

View file

@ -137,10 +137,8 @@ pub(crate) async fn sign_batch(
messages::substrate::ProcessorMessage::SignedBatch { batch: this_batch },
) => {
if batch.is_none() {
assert!(PublicKey::from_raw(key).verify(
&batch_message(this_batch.batch.id == 0, &this_batch.batch),
&this_batch.signature
));
assert!(PublicKey::from_raw(key)
.verify(&batch_message(&this_batch.batch), &this_batch.signature));
batch = Some(this_batch.clone());
}