database: docs & small fixes for Config (#77)

* config: `ReaderThreads::from(0)` -> `ReaderThreads::OnePerThread` * config: all docs, impl `as_threads()` * config: fix doc link * config: add `db_directory()` and make field private * config: remove `P: AsRef<Path>` We need a `PathBuf` anyway, make the caller create one. * config: impl `Debug`, add `Config::default()` test * config: more docs * fix doc links * config: review fix * Update database/src/config.rs --------- Co-authored-by: Boog900 <boog900@tutanota.com>
2024-12-23 03:59:31 +00:00 · 2024-02-26 18:40:13 -05:00 · 2024-02-26 18:40:13 -05:00 · 88d40ef401
commit 88d40ef401
parent e560ecc2ee
4 changed files with 297 additions and 55 deletions
--- a/database/Cargo.toml
+++ b/database/Cargo.toml
@ -46,5 +46,6 @@ sanakirja   = { version = "1.4.0", optional = true }
 serde       = { workspace = true, optional = true }
 [dev-dependencies]
 cuprate-helper = { path = "../helper", features = ["thread"] }
 page_size = { version = "0.6.0" }
 tempfile  = { version = "3.10.0" }
--- a/database/src/config.rs
+++ b/database/src/config.rs
@ -1,9 +1,19 @@
 //! Database [`Env`](crate::Env) configuration.
 //!
-//! TODO
+//! This module contains the main [`Config`]uration struct
 //! for the database [`Env`](crate::Env)ironment, and data
 //! structures related to any configuration setting.
 //!
 //! These configurations are processed at runtime, meaning
 //! the `Env` can/will dynamically adjust its behavior
 //! based on these values.
 //---------------------------------------------------------------------------------------------------- Import
-use std::{borrow::Cow, num::NonZeroUsize, path::Path};
+use std::{
    borrow::Cow,
    num::NonZeroUsize,
    path::{Path, PathBuf},
 };
 use cuprate_helper::fs::cuprate_database_dir;
@ -16,116 +26,249 @@ use crate::{constants::DATABASE_FILENAME, resize::ResizeAlgorithm};
 /// allows the database to be configured in various ways.
 ///
 /// TODO: there's probably more options to add.
-#[derive(Clone, PartialEq, PartialOrd)]
+#[derive(Debug, Clone, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Config {
    //------------------------ Database PATHs
    // These are private since we don't want
    // users messing with them after construction.
    /// The directory used to store all database files.
    ///
    /// By default, if no value is provided in the [`Config`]
    /// constructor functions, this will be [`cuprate_database_dir`].
-    pub db_directory: Cow<'static, Path>,
+    pub(crate) db_directory: Cow<'static, Path>,
    /// The actual database data file.
    ///
    /// This is private, and created from the above `db_directory`.
    pub(crate) db_file: Cow<'static, Path>,
-    /// TODO
+    /// Disk synchronization mode.
    pub sync_mode: SyncMode,
    /// Database reader thread count.
    pub reader_threads: ReaderThreads,
-    /// TODO
+    /// Database memory map resizing algorithm.
    ///
    /// This is used as the default fallback, but
    /// custom algorithms can be used as well with
    /// [`Env::resize_map`](crate::Env::resize_map).
    pub resize_algorithm: ResizeAlgorithm,
 }
 impl Config {
-    /// TODO
+    /// Private function to acquire [`Config::db_file`]
-    fn return_db_dir_and_file<P: AsRef<Path>>(
+    /// from the user provided (or default) [`Config::db_directory`].
-        db_directory: Option<P>,
+    ///
    /// As the database data file PATH is just the directory + the filename,
    /// we only need the directory from the user/Config, and can add it here.
    fn return_db_dir_and_file(
        db_directory: Option<PathBuf>,
    ) -> (Cow<'static, Path>, Cow<'static, Path>) {
        // INVARIANT: all PATH safety checks are done
        // in `helper::fs`. No need to do them here.
-        let db_directory = db_directory.map_or_else(
+        let db_directory =
-            || Cow::Borrowed(cuprate_database_dir()),
+            db_directory.map_or_else(|| Cow::Borrowed(cuprate_database_dir()), Cow::Owned);
            |p| Cow::Owned(p.as_ref().to_path_buf()),
        );
        // Add the database filename to the directory.
        let mut db_file = db_directory.to_path_buf();
        db_file.push(DATABASE_FILENAME);
        (db_directory, Cow::Owned(db_file))
    }
-    /// TODO
+    /// Create a new [`Config`] with sane default settings.
-    pub fn new<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
+    ///
    /// # `db_directory`
    /// If this is `Some`, it will be used as the
    /// directory that contains all database files.
    ///
    /// If `None`, it will use the default directory [`cuprate_database_dir`].
    pub fn new(db_directory: Option<PathBuf>) -> Self {
        let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
        Self {
            db_directory,
            db_file,
-            sync_mode: SyncMode::Safe,
+            sync_mode: SyncMode::FastThenSafe,
            reader_threads: ReaderThreads::OnePerThread,
            resize_algorithm: ResizeAlgorithm::new(),
        }
    }
-    /// TODO
+    /// Create a [`Config`] with the highest performing,
-    pub fn fast<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
+    /// but also most resource-intensive & maybe risky settings.
    ///
    /// Good default for testing, and resource-available machines.
    ///
    /// # `db_directory`
    /// If this is `Some`, it will be used as the
    /// directory that contains all database files.
    ///
    /// If `None`, it will use the default directory [`cuprate_database_dir`].
    pub fn fast(db_directory: Option<PathBuf>) -> Self {
        let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
        Self {
            db_directory,
            db_file,
-            sync_mode: SyncMode::Fastest,
+            sync_mode: SyncMode::Fast,
            reader_threads: ReaderThreads::OnePerThread,
            resize_algorithm: ResizeAlgorithm::new(),
        }
    }
-    /// TODO
+    /// Create a [`Config`] with the lowest performing,
-    pub fn low_power<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
+    /// but also least resource-intensive settings.
    ///
    /// Good default for resource-limited machines, e.g. a cheap VPS.
    ///
    /// # `db_directory`
    /// If this is `Some`, it will be used as the
    /// directory that contains all database files.
    ///
    /// If `None`, it will use the default directory [`cuprate_database_dir`].
    pub fn low_power(db_directory: Option<PathBuf>) -> Self {
        let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
        Self {
            db_directory,
            db_file,
-            sync_mode: SyncMode::Safe,
+            sync_mode: SyncMode::FastThenSafe,
            reader_threads: ReaderThreads::One,
            resize_algorithm: ResizeAlgorithm::new(),
        }
    }
    /// Return the absolute [`Path`] to the database directory.
    ///
    /// This will be the `db_directory` given
    /// (or default) during [`Config`] construction.
    pub const fn db_directory(&self) -> &Cow<'_, Path> {
        &self.db_directory
    }
    /// Return the absolute [`Path`] to the database data file.
-    pub fn db_file_path(&self) -> &Path {
+    ///
    /// This will be based off the `db_directory` given
    /// (or default) during [`Config`] construction.
    pub const fn db_file(&self) -> &Cow<'_, Path> {
        &self.db_file
    }
 }
 impl Default for Config {
    /// Same as `Self::new(None)`.
    ///
    /// ```rust
    /// # use cuprate_database::config::*;
    /// assert_eq!(Config::default(), Config::new(None));
    /// ```
    fn default() -> Self {
-        Self::new(None::<&'static Path>)
+        Self::new(None)
    }
 }
 //---------------------------------------------------------------------------------------------------- SyncMode
-/// TODO
+/// Disk synchronization mode.
-#[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
+///
 /// This controls how/when the database syncs its data to disk.
 ///
 /// Regardless of the variant chosen, dropping [`Env`](crate::Env)
 /// will always cause it to fully sync to disk.
 ///
 /// # Sync vs Async
 /// All invariants except [`SyncMode::Fast`] are `synchronous`,
 /// as in the database will wait until the OS has finished syncing
 /// all the data to disk before continuing.
 ///
 /// `SyncMode::Fast` is `asynchronous`, meaning the database will _NOT_
 /// wait until the data is fully synced to disk before continuing.
 /// Note that this doesn't mean the database itself won't be synchronized
 /// between readers/writers, but rather that the data _on disk_ may not
 /// be immediately synchronized after a write.
 ///
 /// Something like:
 /// ```rust,ignore
 /// db.put("key", value);
 /// db.get("key");
 /// ```
 /// will be fine, most likely pulling from memory instead of disk.
 #[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[cfg_attr(
    feature = "borsh",
    derive(borsh::BorshSerialize, borsh::BorshDeserialize)
 )]
 pub enum SyncMode {
-    /// Fully sync to disk per transaction.
+    /// Use [`SyncMode::Fast`] until fully synced,
    /// then use [`SyncMode::Safe`].
    ///
    /// # TODO: how to implement this?
    /// ref: <https://github.com/monero-project/monero/issues/1463>
    /// monerod-solution: <https://github.com/monero-project/monero/pull/1506>
    /// cuprate-issue: <https://github.com/Cuprate/cuprate/issues/78>
    ///
    /// We could:
    /// ```rust,ignore
    /// if current_db_block <= top_block.saturating_sub(N) {
    ///     // don't sync()
    /// } else {
    ///     // sync()
    /// }
    /// ```
    /// where N is some threshold we pick that is _close_ enough
    /// to being synced where we want to start being safer.
    ///
    /// Essentially, when we are in a certain % range of being finished,
    /// switch to safe mode, until then, go fast.
    #[default]
    FastThenSafe,
    /// Fully sync to disk per transaction.
    ///
    /// Every database transaction commit will
    /// fully sync all data to disk, _synchronously_,
    /// so the database halts until synced.
    ///
    /// This is expected to be very slow.
    Safe,
-    /// Asynchronously sync, only flush at database shutdown.
+    /// Fully sync to disk after we cross this transaction threshold.
-    Fastest,
+    ///
    /// After committing [`usize`] amount of database
    /// transactions, it will be sync to disk.
    ///
    /// `0` behaves the same as [`SyncMode::Safe`], and a ridiculously large
    /// number like `usize::MAX` is practically the same as [`SyncMode::Fast`].
    Threshold(usize),
    /// Only flush at database shutdown.
    ///
    /// This is the fastest, yet unsafest option.
    ///
    /// It will cause the database to never _actively_ sync,
    /// letting the OS decide when to flush data to disk.
    ///
    /// # Corruption
    /// In the case of a system crash, the database
    /// may become corrupted when using this option.
    //
    // TODO: we could call this `unsafe`
    // and use that terminology in the config file
    // so users know exactly what they are getting
    // themselves into.
    Fast,
 }
 //---------------------------------------------------------------------------------------------------- ReaderThreads
-/// TODO
+/// Amount of database reader threads to spawn.
-#[derive(Copy, Clone, Default, PartialEq, PartialOrd)]
+///
 /// This controls how many reader thread [`crate::service`]'s
 /// thread-pool will spawn to receive and send requests/responses.
 ///
 /// It will always be at least 1, up until the amount of threads on the machine.
 ///
 /// The main function used to extract an actual
 /// usable thread count out of this is [`ReaderThreads::as_threads`].
 #[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[cfg_attr(
    feature = "borsh",
@ -133,34 +276,115 @@ pub enum SyncMode {
 )]
 pub enum ReaderThreads {
    #[default]
-    /// TODO
+    /// Spawn 1 reader thread per available thread on the machine.
    ///
    /// For example, a `16-core, 32-thread` Ryzen 5950x will
    /// spawn `32` reader threads using this setting.
    OnePerThread,
-    /// TODO
+    /// Only spawn 1 reader thread.
    One,
-    /// TODO
+    /// Spawn a specified amount of reader threads.
    Number(NonZeroUsize),
    /// TODO
    ///
-    /// # Invariant
+    /// Note that no matter how large this value, it will be
-    /// Must be `0.0..=1.0`.
+    /// ultimately capped at the amount of system threads.
    ///
    /// # `0`
    /// `ReaderThreads::Number(0)` represents "use maximum value",
    /// as such, it is equal to [`ReaderThreads::OnePerThread`].
    ///
    /// ```rust
    /// # use cuprate_database::config::*;
    /// let reader_threads = ReaderThreads::from(0_usize);
    /// assert!(matches!(reader_threads, ReaderThreads::OnePerThread));
    /// ```
    Number(usize),
    /// Spawn a specified % of reader threads.
    ///
    /// This must be a value in-between `0.0..1.0`
    /// where `1.0` represents [`ReaderThreads::OnePerThread`].
    ///
    /// # Example
    /// For example, using a `16-core, 32-thread` Ryzen 5950x CPU:
    ///
    /// | Input                              | Total thread used |
    /// |------------------------------------|-------------------|
    /// | `ReaderThreads::Percent(0.0)`      | 32 (maximum value)
    /// | `ReaderThreads::Percent(0.5)`      | 16
    /// | `ReaderThreads::Percent(0.75)`     | 24
    /// | `ReaderThreads::Percent(1.0)`      | 32
    /// | `ReaderThreads::Percent(2.0)`      | 32 (saturating)
    /// | `ReaderThreads::Percent(f32::NAN)` | 32 (non-normal default)
    ///
    /// # `0.0`
    /// `ReaderThreads::Percent(0.0)` represents "use maximum value",
    /// as such, it is equal to [`ReaderThreads::OnePerThread`].
    ///
    /// # Not quite `0.0`
    /// If the thread count multiplied by the percentage ends up being
    /// non-zero, but not 1 thread, the minimum value 1 will be returned.
    ///
    /// ```rust
    /// # use cuprate_database::config::*;
    /// assert_eq!(ReaderThreads::Percent(0.000000001).as_threads().get(), 1);
    /// ```
    Percent(f32),
 }
 impl ReaderThreads {
-    /// TODO
+    /// This converts [`ReaderThreads`] into a safe, usable
-    // # Invariant
+    /// number representing how many threads to spawn.
    ///
    /// This function will always return a number in-between `1..=total_thread_count`.
    ///
    /// It uses [`cuprate_helper::thread::threads()`] internally to determine the total thread count.
    ///
    /// # Example
    /// ```rust
    /// use cuprate_database::config::ReaderThreads as Rt;
    ///
    /// let total_threads: std::num::NonZeroUsize =
    ///     cuprate_helper::thread::threads();
    ///
    /// assert_eq!(Rt::OnePerThread.as_threads(), total_threads);
    ///
    /// assert_eq!(Rt::One.as_threads().get(), 1);
    ///
    /// assert_eq!(Rt::Number(0).as_threads(), total_threads);
    /// assert_eq!(Rt::Number(1).as_threads().get(), 1);
    /// assert_eq!(Rt::Number(usize::MAX).as_threads(), total_threads);
    ///
    /// assert_eq!(Rt::Percent(0.01).as_threads().get(), 1);
    /// assert_eq!(Rt::Percent(0.0).as_threads(), total_threads);
    /// assert_eq!(Rt::Percent(1.0).as_threads(), total_threads);
    /// assert_eq!(Rt::Percent(f32::NAN).as_threads(), total_threads);
    /// assert_eq!(Rt::Percent(f32::INFINITY).as_threads(), total_threads);
    /// assert_eq!(Rt::Percent(f32::NEG_INFINITY).as_threads(), total_threads);
    ///
    /// // Percentage only works on more than 1 thread.
    /// if total_threads.get() > 1 {
    ///     assert_eq!(
    ///         Rt::Percent(0.5).as_threads().get(),
    ///         (total_threads.get() as f32 / 2.0) as usize,
    ///     );
    /// }
    /// ```
    //
    // INVARIANT:
    // LMDB will error if we input zero, so don't allow that.
    // <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/mdb.c#L4687>
    pub fn as_threads(&self) -> NonZeroUsize {
        let total_threads = cuprate_helper::thread::threads();
        match self {
-            Self::OnePerThread => total_threads,
+            Self::OnePerThread => total_threads, // use all threads
-            Self::One => NonZeroUsize::MIN,
+            Self::One => NonZeroUsize::MIN,      // one
-            Self::Number(n) => std::cmp::min(*n, total_threads),
+            Self::Number(n) => match NonZeroUsize::new(*n) {
                Some(n) => std::cmp::min(n, total_threads), // saturate at total threads
                None => total_threads,                      // 0 == maximum value
            },
            // We handle the casting loss.
            #[allow(
@ -169,26 +393,43 @@ impl ReaderThreads {
                clippy::cast_sign_loss
            )]
            Self::Percent(f) => {
                // If non-normal float, use the default (all threads).
                if !f.is_normal() || !(0.0..=1.0).contains(f) {
                    return total_threads;
                }
-                let thread_percent = (total_threads.get() as f32) * f;
+                // 0.0 == maximum value.
-                let Some(threads) = NonZeroUsize::new(thread_percent as usize) else {
+                if *f == 0.0 {
                    return total_threads;
-                };
+                }
-                std::cmp::min(threads, total_threads)
+                // Calculate percentage of total threads.
                let thread_percent = (total_threads.get() as f32) * f;
                match NonZeroUsize::new(thread_percent as usize) {
                    Some(n) => std::cmp::min(n, total_threads), // saturate at total threads.
                    None => {
                        // We checked for `0.0` above, so what this
                        // being 0 means that the percentage was _so_
                        // low it made our thread count something like
                        // 0.99. In this case, just use 1 thread.
                        NonZeroUsize::MIN
                    }
                }
            }
        }
    }
 }
 impl<T: Into<usize>> From<T> for ReaderThreads {
    /// Create a [`ReaderThreads::Number`].
    ///
    /// If `value` is `0`, this will return [`ReaderThreads::OnePerThread`].
    fn from(value: T) -> Self {
-        match NonZeroUsize::new(value.into()) {
+        let u: usize = value.into();
-            Some(n) => Self::Number(n),
+        if u == 0 {
-            None => Self::One,
+            Self::OnePerThread
        } else {
            Self::Number(u)
        }
    }
 }
--- a/database/src/constants.rs
+++ b/database/src/constants.rs
@ -27,7 +27,7 @@ cfg_if::cfg_if! {
        /// Cuprate's database filename.
        ///
-        /// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
+        /// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
        pub const DATABASE_FILENAME: &str = "data.san"; // TODO: pick a name + extension.
    } else {
        /// Static string of the `crate` being used as the database backend.
@ -35,7 +35,7 @@ cfg_if::cfg_if! {
        /// Cuprate's database filename.
        ///
-        /// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
+        /// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
        pub const DATABASE_FILENAME: &str = "data.mdb";
    }
 }
--- a/database/src/lib.rs
+++ b/database/src/lib.rs
@ -98,7 +98,7 @@
 //!
 //! // Create a configuration for the database environment.
 //! let db_dir = tempfile::tempdir().unwrap();
-//! let config = Config::new(Some(db_dir));
+//! let config = Config::new(Some(db_dir.path().to_path_buf()));
 //!
 //! // Initialize the database thread-pool.
 //!