database: docs & small fixes for Config (#77)

* config: `ReaderThreads::from(0)` -> `ReaderThreads::OnePerThread`

* config: all docs, impl `as_threads()`

* config: fix doc link

* config: add `db_directory()` and make field private

* config: remove `P: AsRef<Path>`

We need a `PathBuf` anyway, make the caller create one.

* config: impl `Debug`, add `Config::default()` test

* config: more docs

* fix doc links

* config: review fix

* Update database/src/config.rs

---------

Co-authored-by: Boog900 <boog900@tutanota.com>
This commit is contained in:
hinto-janai 2024-02-26 18:40:13 -05:00 committed by GitHub
parent e560ecc2ee
commit 88d40ef401
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 297 additions and 55 deletions

View file

@ -46,5 +46,6 @@ sanakirja = { version = "1.4.0", optional = true }
serde = { workspace = true, optional = true }
[dev-dependencies]
cuprate-helper = { path = "../helper", features = ["thread"] }
page_size = { version = "0.6.0" }
tempfile = { version = "3.10.0" }

View file

@ -1,9 +1,19 @@
//! Database [`Env`](crate::Env) configuration.
//!
//! TODO
//! This module contains the main [`Config`]uration struct
//! for the database [`Env`](crate::Env)ironment, and data
//! structures related to any configuration setting.
//!
//! These configurations are processed at runtime, meaning
//! the `Env` can/will dynamically adjust its behavior
//! based on these values.
//---------------------------------------------------------------------------------------------------- Import
use std::{borrow::Cow, num::NonZeroUsize, path::Path};
use std::{
borrow::Cow,
num::NonZeroUsize,
path::{Path, PathBuf},
};
use cuprate_helper::fs::cuprate_database_dir;
@ -16,116 +26,249 @@ use crate::{constants::DATABASE_FILENAME, resize::ResizeAlgorithm};
/// allows the database to be configured in various ways.
///
/// TODO: there's probably more options to add.
#[derive(Clone, PartialEq, PartialOrd)]
#[derive(Debug, Clone, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Config {
//------------------------ Database PATHs
// These are private since we don't want
// users messing with them after construction.
/// The directory used to store all database files.
///
/// By default, if no value is provided in the [`Config`]
/// constructor functions, this will be [`cuprate_database_dir`].
pub db_directory: Cow<'static, Path>,
pub(crate) db_directory: Cow<'static, Path>,
/// The actual database data file.
///
/// This is private, and created from the above `db_directory`.
pub(crate) db_file: Cow<'static, Path>,
/// TODO
/// Disk synchronization mode.
pub sync_mode: SyncMode,
/// Database reader thread count.
pub reader_threads: ReaderThreads,
/// TODO
/// Database memory map resizing algorithm.
///
/// This is used as the default fallback, but
/// custom algorithms can be used as well with
/// [`Env::resize_map`](crate::Env::resize_map).
pub resize_algorithm: ResizeAlgorithm,
}
impl Config {
/// TODO
fn return_db_dir_and_file<P: AsRef<Path>>(
db_directory: Option<P>,
/// Private function to acquire [`Config::db_file`]
/// from the user provided (or default) [`Config::db_directory`].
///
/// As the database data file PATH is just the directory + the filename,
/// we only need the directory from the user/Config, and can add it here.
fn return_db_dir_and_file(
db_directory: Option<PathBuf>,
) -> (Cow<'static, Path>, Cow<'static, Path>) {
// INVARIANT: all PATH safety checks are done
// in `helper::fs`. No need to do them here.
let db_directory = db_directory.map_or_else(
|| Cow::Borrowed(cuprate_database_dir()),
|p| Cow::Owned(p.as_ref().to_path_buf()),
);
let db_directory =
db_directory.map_or_else(|| Cow::Borrowed(cuprate_database_dir()), Cow::Owned);
// Add the database filename to the directory.
let mut db_file = db_directory.to_path_buf();
db_file.push(DATABASE_FILENAME);
(db_directory, Cow::Owned(db_file))
}
/// TODO
pub fn new<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
/// Create a new [`Config`] with sane default settings.
///
/// # `db_directory`
/// If this is `Some`, it will be used as the
/// directory that contains all database files.
///
/// If `None`, it will use the default directory [`cuprate_database_dir`].
pub fn new(db_directory: Option<PathBuf>) -> Self {
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
Self {
db_directory,
db_file,
sync_mode: SyncMode::Safe,
sync_mode: SyncMode::FastThenSafe,
reader_threads: ReaderThreads::OnePerThread,
resize_algorithm: ResizeAlgorithm::new(),
}
}
/// TODO
pub fn fast<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
/// Create a [`Config`] with the highest performing,
/// but also most resource-intensive & maybe risky settings.
///
/// Good default for testing, and resource-available machines.
///
/// # `db_directory`
/// If this is `Some`, it will be used as the
/// directory that contains all database files.
///
/// If `None`, it will use the default directory [`cuprate_database_dir`].
pub fn fast(db_directory: Option<PathBuf>) -> Self {
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
Self {
db_directory,
db_file,
sync_mode: SyncMode::Fastest,
sync_mode: SyncMode::Fast,
reader_threads: ReaderThreads::OnePerThread,
resize_algorithm: ResizeAlgorithm::new(),
}
}
/// TODO
pub fn low_power<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
/// Create a [`Config`] with the lowest performing,
/// but also least resource-intensive settings.
///
/// Good default for resource-limited machines, e.g. a cheap VPS.
///
/// # `db_directory`
/// If this is `Some`, it will be used as the
/// directory that contains all database files.
///
/// If `None`, it will use the default directory [`cuprate_database_dir`].
pub fn low_power(db_directory: Option<PathBuf>) -> Self {
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
Self {
db_directory,
db_file,
sync_mode: SyncMode::Safe,
sync_mode: SyncMode::FastThenSafe,
reader_threads: ReaderThreads::One,
resize_algorithm: ResizeAlgorithm::new(),
}
}
/// Return the absolute [`Path`] to the database directory.
///
/// This will be the `db_directory` given
/// (or default) during [`Config`] construction.
pub const fn db_directory(&self) -> &Cow<'_, Path> {
&self.db_directory
}
/// Return the absolute [`Path`] to the database data file.
pub fn db_file_path(&self) -> &Path {
///
/// This will be based off the `db_directory` given
/// (or default) during [`Config`] construction.
pub const fn db_file(&self) -> &Cow<'_, Path> {
&self.db_file
}
}
impl Default for Config {
/// Same as `Self::new(None)`.
///
/// ```rust
/// # use cuprate_database::config::*;
/// assert_eq!(Config::default(), Config::new(None));
/// ```
fn default() -> Self {
Self::new(None::<&'static Path>)
Self::new(None)
}
}
//---------------------------------------------------------------------------------------------------- SyncMode
/// TODO
#[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
/// Disk synchronization mode.
///
/// This controls how/when the database syncs its data to disk.
///
/// Regardless of the variant chosen, dropping [`Env`](crate::Env)
/// will always cause it to fully sync to disk.
///
/// # Sync vs Async
/// All invariants except [`SyncMode::Fast`] are `synchronous`,
/// as in the database will wait until the OS has finished syncing
/// all the data to disk before continuing.
///
/// `SyncMode::Fast` is `asynchronous`, meaning the database will _NOT_
/// wait until the data is fully synced to disk before continuing.
/// Note that this doesn't mean the database itself won't be synchronized
/// between readers/writers, but rather that the data _on disk_ may not
/// be immediately synchronized after a write.
///
/// Something like:
/// ```rust,ignore
/// db.put("key", value);
/// db.get("key");
/// ```
/// will be fine, most likely pulling from memory instead of disk.
#[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(
feature = "borsh",
derive(borsh::BorshSerialize, borsh::BorshDeserialize)
)]
pub enum SyncMode {
/// Fully sync to disk per transaction.
/// Use [`SyncMode::Fast`] until fully synced,
/// then use [`SyncMode::Safe`].
///
/// # TODO: how to implement this?
/// ref: <https://github.com/monero-project/monero/issues/1463>
/// monerod-solution: <https://github.com/monero-project/monero/pull/1506>
/// cuprate-issue: <https://github.com/Cuprate/cuprate/issues/78>
///
/// We could:
/// ```rust,ignore
/// if current_db_block <= top_block.saturating_sub(N) {
/// // don't sync()
/// } else {
/// // sync()
/// }
/// ```
/// where N is some threshold we pick that is _close_ enough
/// to being synced where we want to start being safer.
///
/// Essentially, when we are in a certain % range of being finished,
/// switch to safe mode, until then, go fast.
#[default]
FastThenSafe,
/// Fully sync to disk per transaction.
///
/// Every database transaction commit will
/// fully sync all data to disk, _synchronously_,
/// so the database halts until synced.
///
/// This is expected to be very slow.
Safe,
/// Asynchronously sync, only flush at database shutdown.
Fastest,
/// Fully sync to disk after we cross this transaction threshold.
///
/// After committing [`usize`] amount of database
/// transactions, it will be sync to disk.
///
/// `0` behaves the same as [`SyncMode::Safe`], and a ridiculously large
/// number like `usize::MAX` is practically the same as [`SyncMode::Fast`].
Threshold(usize),
/// Only flush at database shutdown.
///
/// This is the fastest, yet unsafest option.
///
/// It will cause the database to never _actively_ sync,
/// letting the OS decide when to flush data to disk.
///
/// # Corruption
/// In the case of a system crash, the database
/// may become corrupted when using this option.
//
// TODO: we could call this `unsafe`
// and use that terminology in the config file
// so users know exactly what they are getting
// themselves into.
Fast,
}
//---------------------------------------------------------------------------------------------------- ReaderThreads
/// TODO
#[derive(Copy, Clone, Default, PartialEq, PartialOrd)]
/// Amount of database reader threads to spawn.
///
/// This controls how many reader thread [`crate::service`]'s
/// thread-pool will spawn to receive and send requests/responses.
///
/// It will always be at least 1, up until the amount of threads on the machine.
///
/// The main function used to extract an actual
/// usable thread count out of this is [`ReaderThreads::as_threads`].
#[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(
feature = "borsh",
@ -133,34 +276,115 @@ pub enum SyncMode {
)]
pub enum ReaderThreads {
#[default]
/// TODO
/// Spawn 1 reader thread per available thread on the machine.
///
/// For example, a `16-core, 32-thread` Ryzen 5950x will
/// spawn `32` reader threads using this setting.
OnePerThread,
/// TODO
/// Only spawn 1 reader thread.
One,
/// TODO
Number(NonZeroUsize),
/// TODO
/// Spawn a specified amount of reader threads.
///
/// # Invariant
/// Must be `0.0..=1.0`.
/// Note that no matter how large this value, it will be
/// ultimately capped at the amount of system threads.
///
/// # `0`
/// `ReaderThreads::Number(0)` represents "use maximum value",
/// as such, it is equal to [`ReaderThreads::OnePerThread`].
///
/// ```rust
/// # use cuprate_database::config::*;
/// let reader_threads = ReaderThreads::from(0_usize);
/// assert!(matches!(reader_threads, ReaderThreads::OnePerThread));
/// ```
Number(usize),
/// Spawn a specified % of reader threads.
///
/// This must be a value in-between `0.0..1.0`
/// where `1.0` represents [`ReaderThreads::OnePerThread`].
///
/// # Example
/// For example, using a `16-core, 32-thread` Ryzen 5950x CPU:
///
/// | Input | Total thread used |
/// |------------------------------------|-------------------|
/// | `ReaderThreads::Percent(0.0)` | 32 (maximum value)
/// | `ReaderThreads::Percent(0.5)` | 16
/// | `ReaderThreads::Percent(0.75)` | 24
/// | `ReaderThreads::Percent(1.0)` | 32
/// | `ReaderThreads::Percent(2.0)` | 32 (saturating)
/// | `ReaderThreads::Percent(f32::NAN)` | 32 (non-normal default)
///
/// # `0.0`
/// `ReaderThreads::Percent(0.0)` represents "use maximum value",
/// as such, it is equal to [`ReaderThreads::OnePerThread`].
///
/// # Not quite `0.0`
/// If the thread count multiplied by the percentage ends up being
/// non-zero, but not 1 thread, the minimum value 1 will be returned.
///
/// ```rust
/// # use cuprate_database::config::*;
/// assert_eq!(ReaderThreads::Percent(0.000000001).as_threads().get(), 1);
/// ```
Percent(f32),
}
impl ReaderThreads {
/// TODO
// # Invariant
/// This converts [`ReaderThreads`] into a safe, usable
/// number representing how many threads to spawn.
///
/// This function will always return a number in-between `1..=total_thread_count`.
///
/// It uses [`cuprate_helper::thread::threads()`] internally to determine the total thread count.
///
/// # Example
/// ```rust
/// use cuprate_database::config::ReaderThreads as Rt;
///
/// let total_threads: std::num::NonZeroUsize =
/// cuprate_helper::thread::threads();
///
/// assert_eq!(Rt::OnePerThread.as_threads(), total_threads);
///
/// assert_eq!(Rt::One.as_threads().get(), 1);
///
/// assert_eq!(Rt::Number(0).as_threads(), total_threads);
/// assert_eq!(Rt::Number(1).as_threads().get(), 1);
/// assert_eq!(Rt::Number(usize::MAX).as_threads(), total_threads);
///
/// assert_eq!(Rt::Percent(0.01).as_threads().get(), 1);
/// assert_eq!(Rt::Percent(0.0).as_threads(), total_threads);
/// assert_eq!(Rt::Percent(1.0).as_threads(), total_threads);
/// assert_eq!(Rt::Percent(f32::NAN).as_threads(), total_threads);
/// assert_eq!(Rt::Percent(f32::INFINITY).as_threads(), total_threads);
/// assert_eq!(Rt::Percent(f32::NEG_INFINITY).as_threads(), total_threads);
///
/// // Percentage only works on more than 1 thread.
/// if total_threads.get() > 1 {
/// assert_eq!(
/// Rt::Percent(0.5).as_threads().get(),
/// (total_threads.get() as f32 / 2.0) as usize,
/// );
/// }
/// ```
//
// INVARIANT:
// LMDB will error if we input zero, so don't allow that.
// <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/mdb.c#L4687>
pub fn as_threads(&self) -> NonZeroUsize {
let total_threads = cuprate_helper::thread::threads();
match self {
Self::OnePerThread => total_threads,
Self::One => NonZeroUsize::MIN,
Self::Number(n) => std::cmp::min(*n, total_threads),
Self::OnePerThread => total_threads, // use all threads
Self::One => NonZeroUsize::MIN, // one
Self::Number(n) => match NonZeroUsize::new(*n) {
Some(n) => std::cmp::min(n, total_threads), // saturate at total threads
None => total_threads, // 0 == maximum value
},
// We handle the casting loss.
#[allow(
@ -169,26 +393,43 @@ impl ReaderThreads {
clippy::cast_sign_loss
)]
Self::Percent(f) => {
// If non-normal float, use the default (all threads).
if !f.is_normal() || !(0.0..=1.0).contains(f) {
return total_threads;
}
let thread_percent = (total_threads.get() as f32) * f;
let Some(threads) = NonZeroUsize::new(thread_percent as usize) else {
// 0.0 == maximum value.
if *f == 0.0 {
return total_threads;
};
}
std::cmp::min(threads, total_threads)
// Calculate percentage of total threads.
let thread_percent = (total_threads.get() as f32) * f;
match NonZeroUsize::new(thread_percent as usize) {
Some(n) => std::cmp::min(n, total_threads), // saturate at total threads.
None => {
// We checked for `0.0` above, so what this
// being 0 means that the percentage was _so_
// low it made our thread count something like
// 0.99. In this case, just use 1 thread.
NonZeroUsize::MIN
}
}
}
}
}
}
impl<T: Into<usize>> From<T> for ReaderThreads {
/// Create a [`ReaderThreads::Number`].
///
/// If `value` is `0`, this will return [`ReaderThreads::OnePerThread`].
fn from(value: T) -> Self {
match NonZeroUsize::new(value.into()) {
Some(n) => Self::Number(n),
None => Self::One,
let u: usize = value.into();
if u == 0 {
Self::OnePerThread
} else {
Self::Number(u)
}
}
}

View file

@ -27,7 +27,7 @@ cfg_if::cfg_if! {
/// Cuprate's database filename.
///
/// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
/// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
pub const DATABASE_FILENAME: &str = "data.san"; // TODO: pick a name + extension.
} else {
/// Static string of the `crate` being used as the database backend.
@ -35,7 +35,7 @@ cfg_if::cfg_if! {
/// Cuprate's database filename.
///
/// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
/// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
pub const DATABASE_FILENAME: &str = "data.mdb";
}
}

View file

@ -98,7 +98,7 @@
//!
//! // Create a configuration for the database environment.
//! let db_dir = tempfile::tempdir().unwrap();
//! let config = Config::new(Some(db_dir));
//! let config = Config::new(Some(db_dir.path().to_path_buf()));
//!
//! // Initialize the database thread-pool.
//!