Add an UnbalancedMerkleTree primitive

The reasoning for it is documented with itself. The plan is to use it within our header for committing to the DAG (allowing one header per epoch, yet logarithmic proofs for any header within the epoch), the transactions commitment (allowing logarithmic proofs of a transaction within a block, without padding), and the events commitment (allowing logarithmic proofs of unique events within a block, despite events not having a unique ID inherent). This also defines transaction hashes and performs the necessary modifications for transactions to be unique.
2025-04-22 22:18:15 +00:00 · 2025-03-04 04:00:05 -05:00 · 2025-03-04 04:00:05 -05:00 · df2ae10d2f
commit df2ae10d2f
parent b92ac4a15b
7 changed files with 334 additions and 11 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -8827,6 +8827,7 @@ dependencies = [
 "ciphersuite",
 "dkg",
 "parity-scale-codec",
+ "rand_core",
 "sp-core",
 "zeroize",
 ]
--- a/substrate/abi/src/block.rs
+++ b/substrate/abi/src/block.rs
@ -2,7 +2,20 @@ use alloc::vec::Vec;

 use borsh::{BorshSerialize, BorshDeserialize};

-use crate::{primitives::BlockHash, Transaction};
+use crate::{
+  primitives::{BlockHash, merkle::UnbalancedMerkleTree},
+  Transaction,
+};
+
+/// The tag for the hash of a transaction's event, forming a leaf of the Merkle tree of its events.
+pub const EVENTS_COMMITMENT_TRANSACTION_EVENT_TAG: u8 = 0;
+/// The tag for the branch hashes of transaction events.
+pub const EVENTS_COMMITMENT_TRANSACTION_EVENTS_TAG: u8 = 1;
+/// The tag for the hash of a transaction's hash and its events' Merkle root, forming a leaf of the
+/// Merkle tree which is the events commitment.
+pub const EVENTS_COMMITMENT_TRANSACTION_TAG: u8 = 2;
+/// The tag for for the branch hashes of the Merkle tree which is the events commitments.
+pub const EVENTS_COMMITMENT_TRANSACTIONS_TAG: u8 = 3;

 /// A V1 header for a block.
 #[derive(Clone, Copy, PartialEq, Eq, Debug, BorshSerialize, BorshDeserialize)]
@ -18,6 +31,8 @@ pub struct HeaderV1 {
  /// The commitment to the transactions within this block.
  // TODO: Some transactions don't have unique hashes due to assuming validators set unique keys
  pub transactions_commitment: [u8; 32],
+  /// The commitment to the events within this block.
+  pub events_commitment: UnbalancedMerkleTree,
  /// A commitment to the consensus data used to justify adding this block to the blockchain.
  pub consensus_commitment: [u8; 32],
 }
@ -90,6 +105,8 @@ mod substrate {
    pub unix_time_in_millis: u64,
    /// The commitment to the transactions within this block.
    pub transactions_commitment: [u8; 32],
+    /// The commitment to the events within this block.
+    pub events_commitment: UnbalancedMerkleTree,
  }

  impl SeraiDigest {
@ -155,6 +172,11 @@ mod substrate {
              .as_ref()
              .map(|digest| digest.transactions_commitment)
              .unwrap_or([0; 32]),
+            events_commitment: digest
+              .as_ref()
+              .map(|digest| digest.events_commitment)
+              .unwrap_or(UnbalancedMerkleTree::EMPTY),
+            // TODO: This hashes the digest *including seals*, doesn't it?
            consensus_commitment: sp_core::blake2_256(&header.consensus.encode()),
          })
        }
--- a/substrate/abi/src/transaction.rs
+++ b/substrate/abi/src/transaction.rs
@ -204,6 +204,26 @@ impl Transaction {
    explicit_context.serialize(&mut message).unwrap();
    message
  }
+
+  /// The unique hash of this transaction.
+  ///
+  /// No two transactions on the blockchain will share a hash, making this a unique identifier.
+  /// For signed transactions, this is due to the `(signer, nonce)` pair present within the
+  /// `ExplicitContext`. For unsigned transactions, this is due to inherent properties of their
+  /// execution (e.g. only being able to set a `ValidatorSet`'s keys once).
+  pub fn hash(&self) -> [u8; 32] {
+    sp_core::blake2_256(&match self {
+      Transaction::Unsigned { call } => borsh::to_vec(&call).unwrap(),
+      Transaction::Signed {
+        calls,
+        contextualized_signature: ContextualizedSignature { explicit_context, signature: _ },
+      } => {
+        // We explicitly don't hash the signature, so signatures can be replaced in the future if
+        // desired (such as with half-aggregated Schnorr signatures)
+        borsh::to_vec(&(calls, explicit_context)).unwrap()
+      }
+    })
+  }
 }

 #[cfg(feature = "substrate")]
@ -276,16 +296,23 @@ mod substrate {
    ///
    /// Returns `None` if the time has yet to be set.
    fn current_time(&self) -> Option<u64>;
-    /// Get, and consume, the next nonce for an account.
-    fn get_and_consume_next_nonce(&self, signer: &SeraiAddress) -> u32;
+    /// Get the next nonce for an account.
+    fn next_nonce(&self, signer: &SeraiAddress) -> u32;
    /// If the signer can pay the SRI fee.
    fn can_pay_fee(
      &self,
      signer: &SeraiAddress,
      fee: Amount,
    ) -> Result<(), TransactionValidityError>;
+
+    /// Begin execution of a transaction.
+    fn start_transaction(&self);
+    /// Consume the next nonce for an account.
+    fn consume_next_nonce(&self, signer: &SeraiAddress);
    /// Have the transaction pay its SRI fee.
    fn pay_fee(&self, signer: &SeraiAddress, fee: Amount) -> Result<(), TransactionValidityError>;
+    /// End execution of a transaction.
+    fn end_transaction(&self, transaction_hash: [u8; 32]);
  }

  /// A transaction with the context necessary to evaluate it within Substrate.
@ -402,7 +429,7 @@ mod substrate {
              Err(TransactionValidityError::Invalid(InvalidTransaction::Stale))?;
            }
          }
-          match self.1.get_and_consume_next_nonce(signer).cmp(nonce) {
+          match self.1.next_nonce(signer).cmp(nonce) {
            core::cmp::Ordering::Less => {
              Err(TransactionValidityError::Invalid(InvalidTransaction::Stale))?
            }
@ -472,7 +499,12 @@ mod substrate {
      // We use 0 for the mempool priority, as this is no longer in the mempool so it's irrelevant
      self.validate_except_fee::<V>(TransactionSource::InBlock, 0)?;

-      match self.0 {
+      // Start the transaction
+      self.1.start_transaction();
+
+      let transaction_hash = self.0.hash();
+
+      let res = match self.0 {
        Transaction::Unsigned { call } => {
          let call = Context::RuntimeCall::from(call.0);
          V::pre_dispatch(&call)?;
@ -487,7 +519,9 @@ mod substrate {
          contextualized_signature:
            ContextualizedSignature { explicit_context: ExplicitContext { signer, fee, .. }, .. },
        } => {
-          // Start by paying the fee
+          // Consume the signer's next nonce
+          self.1.consume_next_nonce(&signer);
+          // Pay the fee
          self.1.pay_fee(&signer, fee)?;

          let _res = frame_support::storage::transactional::with_storage_layer(|| {
@ -514,7 +548,14 @@ mod substrate {
            pays_fee: Pays::Yes,
          }))
        }
-      }
+      };
+
+      // TODO: TransactionSuccess/TransactionFailure event?
+
+      // End the transaction
+      self.1.end_transaction(transaction_hash);
+
+      res
    }
  }
 }
--- a/substrate/abi/src/validator_sets.rs
+++ b/substrate/abi/src/validator_sets.rs
@ -15,8 +15,8 @@ use serai_primitives::{
 pub enum Call {
  /// Set the keys for a validator set.
  set_keys {
-    /// The network whose latest validator set is setting their keys.
-    network: ExternalNetworkId,
+    /// The validator set which is setting their keys.
+    validator_set: ExternalValidatorSet,
    /// The keys being set.
    key_pair: KeyPair,
    /// The participants in the validator set who signed off on these keys.
@ -31,8 +31,8 @@ pub enum Call {
  },
  /// Report a validator set's slashes onto Serai.
  report_slashes {
-    /// The network whose retiring validator set is setting their keys.
-    network: ExternalNetworkId,
+    /// The validator set which is setting their keys.
+    validator_set: ExternalValidatorSet,
    /// The slashes they're reporting.
    slashes: SlashReport,
    /// The signature confirming the validity of this slash report.
--- a/substrate/primitives/Cargo.toml
+++ b/substrate/primitives/Cargo.toml
@ -28,6 +28,9 @@ dkg = { path = "../../crypto/dkg", default-features = false }

 bech32 = { version = "0.11", default-features = false }

+[dev-dependencies]
+rand_core = { version = "0.6", default-features = false, features = ["std"] }
+
 [features]
 std = ["zeroize/std", "borsh/std", "ciphersuite/std", "dkg/std", "sp-core/std", "bech32/std"]
 default = ["std"]
--- a/substrate/primitives/src/lib.rs
+++ b/substrate/primitives/src/lib.rs
@ -43,6 +43,9 @@ pub mod signals;
 /// Instruction types.
 pub mod instructions;

+/// Merkle trees.
+pub mod merkle;
+
 /// The type used to identify block numbers.
 ///
 /// A block's number is its zero-indexed position on the list of blocks which form a blockchain.
--- a/substrate/primitives/src/merkle.rs
+++ b/substrate/primitives/src/merkle.rs
@ -0,0 +1,253 @@
+use alloc::vec::Vec;
+
+use borsh::{BorshSerialize, BorshDeserialize};
+
+/// An unbalanced Merkle tree.
+///
+/// This Merkle tree represents its leaves once and only once (distinct from a balanced Merkle
+/// tree, which would require padding its leaves to a power of two). Accordingly, leaves have
+/// canonical paths. This is useful for anyone who wants to index leaves which don't inherently
+/// have indexes.
+///
+/// `[0; 32]` is used to represent an empty tree.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, BorshSerialize, BorshDeserialize)]
+pub struct UnbalancedMerkleTree {
+  /// The root of the tree represented.
+  pub root: [u8; 32],
+}
+
+impl UnbalancedMerkleTree {
+  /// An empty Merkle tree.
+  pub const EMPTY: Self = Self { root: [0; 32] };
+
+  /// If this tree is empty of leaves.
+  pub fn is_empty(self) -> bool {
+    self == Self::EMPTY
+  }
+
+  fn branch_hash(tag: u8, left: &[u8; 32], right: &[u8; 32]) -> [u8; 32] {
+    let mut preimage = [tag; 65];
+    preimage[1 .. 33].copy_from_slice(left);
+    preimage[33 ..].copy_from_slice(right);
+    sp_core::blake2_256(&preimage)
+  }
+
+  /// Create a new Merkle tree from a set of leaves.
+  ///
+  /// Each branch hash will be prefixed by the specified tag. To ensure branches are not argued
+  /// leaves, and vice-versa, the hashes present in the list MUST never have preimages whose first
+  /// byte may be the specified tag byte.
+  ///
+  /// This method performs intermediary allocations necessary to calculate the root.
+  pub fn new(tag: u8, leaves: Vec<[u8; 32]>) -> Self {
+    if leaves.is_empty() {
+      return Self::EMPTY;
+    }
+
+    let mut current = leaves;
+    let mut next = Vec::with_capacity(current.len().div_ceil(2));
+    // Iterate until the root hash
+    while current.len() != 1 {
+      let mut iter = current.iter();
+      while let Some(a) = iter.next() {
+        match iter.next() {
+          // If we have a pair of hashes, create a branch hash
+          Some(b) => {
+            next.push(Self::branch_hash(tag, a, b));
+          }
+          // If we don't, propagate this hash
+          None => next.push(*a),
+        }
+      }
+      core::mem::swap(&mut current, &mut next);
+      next.clear();
+    }
+    Self { root: current[0] }
+  }
+
+  /// Calculate the Merkle tree root for a list of hashes, passed in as their SCALE encoding.
+  ///
+  /// This method does not perform any allocations and is quite optimized. It is intended to be
+  /// called from within the Substrate runtime, a resource-constrained environment. It does take in
+  /// an owned Vec, despite solely using it as a mutable slice, due to the trashing of its content.
+  ///
+  /// Please see the documentation of `UnbalancedMerkleTree` and `UnbalancedMerkleTree::new` for
+  /// context on structure.
+  ///
+  /// A SCALE encoding will be length-prefixed with a Compact number per
+  /// https://docs.polkadot.com/polkadot-protocol/basics/data-encoding/#data-types.
+  #[doc(hidden)]
+  pub fn from_scale_encoded_list_of_hashes(tag: u8, encoding: Vec<u8>) -> Self {
+    let mut hashes = encoding;
+
+    // Learn the length of the length prefix
+    let length_prefix_len = {
+      let mut slice = hashes.as_slice();
+      <scale::Compact<u32> as scale::Decode>::skip(&mut slice).unwrap();
+      hashes.len() - slice.len()
+    };
+
+    // We calculate the hashes in-place to avoid redundant allocations
+    let mut hashes = hashes.as_mut_slice();
+
+    let mut amount_of_hashes;
+    while {
+      amount_of_hashes = (hashes.len() - length_prefix_len) / 32;
+      amount_of_hashes > 1
+    } {
+      let complete_pairs = amount_of_hashes / 2;
+      for i in 0 .. complete_pairs {
+        // We hash the i'th pair of 32-byte elements
+        let hash = {
+          // The starting position of these elements
+          let start = length_prefix_len + ((2 * i) * 32);
+          /*
+            We write the tag to the byte before this pair starts.
+
+            In the case of the first pair, this corrupts a byte of the length prefix.
+
+            In the case of the nth pair, this corrupts the prior-hashed pair's second element.
+            This is safe as it was already hashed and the data there won't be read again. While
+            we do write, and later read, the carried hash outputs to this buffer, those will
+            always be written to either a pair's first element or a (n * prior-)hashed pair's
+            second element (where n > 2), never the immediately preceding pair's second element.
+          */
+          hashes[start - 1] = tag;
+          sp_core::blake2_256(&hashes[(start - 1) .. (start + 64)])
+        };
+        // We save this hash to the i'th position
+        {
+          let start = length_prefix_len + (i * 32);
+          hashes[start .. (start + 32)].copy_from_slice(hash.as_slice());
+        }
+      }
+
+      let mut end_of_hashes_on_next_layer = length_prefix_len + (complete_pairs * 32);
+
+      // If there was an odd hash which wasn't hashed on this layer, carry it
+      if (amount_of_hashes % 2) == 1 {
+        let mut hash = [0xff; 32];
+        hash.copy_from_slice(&hashes[(hashes.len() - 32) ..]);
+
+        let start = end_of_hashes_on_next_layer;
+        end_of_hashes_on_next_layer = start + 32;
+        hashes[start .. end_of_hashes_on_next_layer].copy_from_slice(&hash);
+      }
+
+      hashes = &mut hashes[.. end_of_hashes_on_next_layer];
+    }
+
+    match hashes[length_prefix_len ..].try_into() {
+      Ok(root) => Self { root },
+      Err(_) => Self::EMPTY,
+    }
+  }
+}
+
+/// An unbalanced Merkle tree which is incrementally created.
+#[derive(Clone, PartialEq, Eq, Debug, BorshSerialize, BorshDeserialize)]
+pub struct IncrementalUnbalancedMerkleTree {
+  /// (number of children under branch, branch hash)
+  branches: Vec<(u64, [u8; 32])>,
+}
+
+impl IncrementalUnbalancedMerkleTree {
+  /// Create a new incrementally-created unbalanced merkle tree.
+  pub fn new() -> Self {
+    Self { branches: Vec::new() }
+  }
+
+  /// Reduce the incremental tree.
+  ///
+  /// We prune the descendants of fully-populated branches.
+  fn reduce(&mut self, tag: u8) {
+    while {
+      // If we have two branches eligible to be merged, and they're of equal depth
+      let len = self.branches.len();
+      (len >= 2) && (self.branches[len - 2].0 == self.branches[len - 1].0)
+    } {
+      // Merge them, as the two descendants of this branch, pruning themselves
+      let right = self.branches.pop().unwrap();
+      let left = self.branches.last_mut().unwrap();
+      left.0 *= 2;
+      left.1 = UnbalancedMerkleTree::branch_hash(tag, &left.1, &right.1);
+    }
+  }
+
+  /// Append a leaf to this merkle tree.
+  ///
+  /// The conditions on this leaf are the same as defined by `UnbalancedMerkleTree::new`.
+  pub fn append(&mut self, tag: u8, leaf: [u8; 32]) {
+    self.branches.push((1, leaf));
+    self.reduce(tag);
+  }
+
+  /// Calculate the `UnbalancedMerkleTree` for this tree.
+  pub fn calculate(mut self, tag: u8) -> UnbalancedMerkleTree {
+    if self.branches.is_empty() {
+      return UnbalancedMerkleTree::EMPTY;
+    }
+    while self.branches.len() > 1 {
+      // The left-most list elements will have already be hashed at the layer simulated for the
+      // right-most list elements. We emulate the hashes upon carries for right-most elements
+      {
+        let right = self.branches.pop().unwrap();
+        let left = self.branches.last_mut().unwrap();
+        left.0 *= 2;
+        left.1 = UnbalancedMerkleTree::branch_hash(tag, &left.1, &right.1);
+      }
+      // And then we perform any hashes due to being of equal depth
+      self.reduce(tag);
+    }
+    UnbalancedMerkleTree { root: self.branches[0].1 }
+  }
+}
+
+#[cfg(feature = "std")]
+#[test]
+fn unbalanced_merkle_tree() {
+  use sp_core::Encode;
+
+  use rand_core::{RngCore, OsRng};
+
+  let tag = u8::try_from(OsRng.next_u64() % u64::from(u8::MAX)).unwrap();
+  let mut list_of_hashes = vec![];
+  let mut incremental = IncrementalUnbalancedMerkleTree::new();
+  for i in 0 ..= 257 {
+    assert_eq!(list_of_hashes.len(), i);
+
+    // Calculate the root of the tree
+    let with_new = UnbalancedMerkleTree::new(tag, list_of_hashes.clone());
+    // Check `is_empty` works
+    assert_eq!(with_new.is_empty(), i == 0);
+    // The reference method, easy to audit, should have identical behavior to the optimized method
+    assert_eq!(
+      with_new,
+      UnbalancedMerkleTree::from_scale_encoded_list_of_hashes(tag, list_of_hashes.encode())
+    );
+    // The encoding of a slice should work the same as the encoding of a list
+    assert_eq!(
+      with_new,
+      UnbalancedMerkleTree::from_scale_encoded_list_of_hashes(
+        tag,
+        list_of_hashes.as_slice().encode()
+      )
+    );
+    // Check the incremental method produces an identical result
+    assert_eq!(incremental.clone().calculate(tag), with_new, "{i}");
+
+    // If the tree has branches...
+    if i > 1 {
+      // Changing the tag should change the root hash
+      assert!(with_new != UnbalancedMerkleTree::new(tag.wrapping_add(1), list_of_hashes.clone()));
+    }
+
+    // Push a new hash onto the list for the next iteration
+    {
+      let mut hash = [0; 32];
+      OsRng.fill_bytes(&mut hash);
+      list_of_hashes.push(hash);
+      incremental.append(tag, hash);
+    }
+  }
+}