mirror of
https://github.com/hinto-janai/cuprate.git
synced 2024-12-22 11:39:30 +00:00
database: docs & small fixes for Config
(#77)
* config: `ReaderThreads::from(0)` -> `ReaderThreads::OnePerThread` * config: all docs, impl `as_threads()` * config: fix doc link * config: add `db_directory()` and make field private * config: remove `P: AsRef<Path>` We need a `PathBuf` anyway, make the caller create one. * config: impl `Debug`, add `Config::default()` test * config: more docs * fix doc links * config: review fix * Update database/src/config.rs --------- Co-authored-by: Boog900 <boog900@tutanota.com>
This commit is contained in:
parent
e560ecc2ee
commit
88d40ef401
4 changed files with 297 additions and 55 deletions
|
@ -46,5 +46,6 @@ sanakirja = { version = "1.4.0", optional = true }
|
|||
serde = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
cuprate-helper = { path = "../helper", features = ["thread"] }
|
||||
page_size = { version = "0.6.0" }
|
||||
tempfile = { version = "3.10.0" }
|
|
@ -1,9 +1,19 @@
|
|||
//! Database [`Env`](crate::Env) configuration.
|
||||
//!
|
||||
//! TODO
|
||||
//! This module contains the main [`Config`]uration struct
|
||||
//! for the database [`Env`](crate::Env)ironment, and data
|
||||
//! structures related to any configuration setting.
|
||||
//!
|
||||
//! These configurations are processed at runtime, meaning
|
||||
//! the `Env` can/will dynamically adjust its behavior
|
||||
//! based on these values.
|
||||
|
||||
//---------------------------------------------------------------------------------------------------- Import
|
||||
use std::{borrow::Cow, num::NonZeroUsize, path::Path};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
num::NonZeroUsize,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use cuprate_helper::fs::cuprate_database_dir;
|
||||
|
||||
|
@ -16,116 +26,249 @@ use crate::{constants::DATABASE_FILENAME, resize::ResizeAlgorithm};
|
|||
/// allows the database to be configured in various ways.
|
||||
///
|
||||
/// TODO: there's probably more options to add.
|
||||
#[derive(Clone, PartialEq, PartialOrd)]
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct Config {
|
||||
//------------------------ Database PATHs
|
||||
// These are private since we don't want
|
||||
// users messing with them after construction.
|
||||
/// The directory used to store all database files.
|
||||
///
|
||||
/// By default, if no value is provided in the [`Config`]
|
||||
/// constructor functions, this will be [`cuprate_database_dir`].
|
||||
pub db_directory: Cow<'static, Path>,
|
||||
|
||||
pub(crate) db_directory: Cow<'static, Path>,
|
||||
/// The actual database data file.
|
||||
///
|
||||
/// This is private, and created from the above `db_directory`.
|
||||
pub(crate) db_file: Cow<'static, Path>,
|
||||
|
||||
/// TODO
|
||||
/// Disk synchronization mode.
|
||||
pub sync_mode: SyncMode,
|
||||
|
||||
/// Database reader thread count.
|
||||
pub reader_threads: ReaderThreads,
|
||||
|
||||
/// TODO
|
||||
/// Database memory map resizing algorithm.
|
||||
///
|
||||
/// This is used as the default fallback, but
|
||||
/// custom algorithms can be used as well with
|
||||
/// [`Env::resize_map`](crate::Env::resize_map).
|
||||
pub resize_algorithm: ResizeAlgorithm,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// TODO
|
||||
fn return_db_dir_and_file<P: AsRef<Path>>(
|
||||
db_directory: Option<P>,
|
||||
/// Private function to acquire [`Config::db_file`]
|
||||
/// from the user provided (or default) [`Config::db_directory`].
|
||||
///
|
||||
/// As the database data file PATH is just the directory + the filename,
|
||||
/// we only need the directory from the user/Config, and can add it here.
|
||||
fn return_db_dir_and_file(
|
||||
db_directory: Option<PathBuf>,
|
||||
) -> (Cow<'static, Path>, Cow<'static, Path>) {
|
||||
// INVARIANT: all PATH safety checks are done
|
||||
// in `helper::fs`. No need to do them here.
|
||||
let db_directory = db_directory.map_or_else(
|
||||
|| Cow::Borrowed(cuprate_database_dir()),
|
||||
|p| Cow::Owned(p.as_ref().to_path_buf()),
|
||||
);
|
||||
let db_directory =
|
||||
db_directory.map_or_else(|| Cow::Borrowed(cuprate_database_dir()), Cow::Owned);
|
||||
|
||||
// Add the database filename to the directory.
|
||||
let mut db_file = db_directory.to_path_buf();
|
||||
db_file.push(DATABASE_FILENAME);
|
||||
|
||||
(db_directory, Cow::Owned(db_file))
|
||||
}
|
||||
|
||||
/// TODO
|
||||
pub fn new<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
|
||||
/// Create a new [`Config`] with sane default settings.
|
||||
///
|
||||
/// # `db_directory`
|
||||
/// If this is `Some`, it will be used as the
|
||||
/// directory that contains all database files.
|
||||
///
|
||||
/// If `None`, it will use the default directory [`cuprate_database_dir`].
|
||||
pub fn new(db_directory: Option<PathBuf>) -> Self {
|
||||
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
|
||||
Self {
|
||||
db_directory,
|
||||
db_file,
|
||||
sync_mode: SyncMode::Safe,
|
||||
sync_mode: SyncMode::FastThenSafe,
|
||||
reader_threads: ReaderThreads::OnePerThread,
|
||||
resize_algorithm: ResizeAlgorithm::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO
|
||||
pub fn fast<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
|
||||
/// Create a [`Config`] with the highest performing,
|
||||
/// but also most resource-intensive & maybe risky settings.
|
||||
///
|
||||
/// Good default for testing, and resource-available machines.
|
||||
///
|
||||
/// # `db_directory`
|
||||
/// If this is `Some`, it will be used as the
|
||||
/// directory that contains all database files.
|
||||
///
|
||||
/// If `None`, it will use the default directory [`cuprate_database_dir`].
|
||||
pub fn fast(db_directory: Option<PathBuf>) -> Self {
|
||||
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
|
||||
Self {
|
||||
db_directory,
|
||||
db_file,
|
||||
sync_mode: SyncMode::Fastest,
|
||||
sync_mode: SyncMode::Fast,
|
||||
reader_threads: ReaderThreads::OnePerThread,
|
||||
resize_algorithm: ResizeAlgorithm::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO
|
||||
pub fn low_power<P: AsRef<Path>>(db_directory: Option<P>) -> Self {
|
||||
/// Create a [`Config`] with the lowest performing,
|
||||
/// but also least resource-intensive settings.
|
||||
///
|
||||
/// Good default for resource-limited machines, e.g. a cheap VPS.
|
||||
///
|
||||
/// # `db_directory`
|
||||
/// If this is `Some`, it will be used as the
|
||||
/// directory that contains all database files.
|
||||
///
|
||||
/// If `None`, it will use the default directory [`cuprate_database_dir`].
|
||||
pub fn low_power(db_directory: Option<PathBuf>) -> Self {
|
||||
let (db_directory, db_file) = Self::return_db_dir_and_file(db_directory);
|
||||
Self {
|
||||
db_directory,
|
||||
db_file,
|
||||
sync_mode: SyncMode::Safe,
|
||||
sync_mode: SyncMode::FastThenSafe,
|
||||
reader_threads: ReaderThreads::One,
|
||||
resize_algorithm: ResizeAlgorithm::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the absolute [`Path`] to the database directory.
|
||||
///
|
||||
/// This will be the `db_directory` given
|
||||
/// (or default) during [`Config`] construction.
|
||||
pub const fn db_directory(&self) -> &Cow<'_, Path> {
|
||||
&self.db_directory
|
||||
}
|
||||
|
||||
/// Return the absolute [`Path`] to the database data file.
|
||||
pub fn db_file_path(&self) -> &Path {
|
||||
///
|
||||
/// This will be based off the `db_directory` given
|
||||
/// (or default) during [`Config`] construction.
|
||||
pub const fn db_file(&self) -> &Cow<'_, Path> {
|
||||
&self.db_file
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
/// Same as `Self::new(None)`.
|
||||
///
|
||||
/// ```rust
|
||||
/// # use cuprate_database::config::*;
|
||||
/// assert_eq!(Config::default(), Config::new(None));
|
||||
/// ```
|
||||
fn default() -> Self {
|
||||
Self::new(None::<&'static Path>)
|
||||
Self::new(None)
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------- SyncMode
|
||||
/// TODO
|
||||
#[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
/// Disk synchronization mode.
|
||||
///
|
||||
/// This controls how/when the database syncs its data to disk.
|
||||
///
|
||||
/// Regardless of the variant chosen, dropping [`Env`](crate::Env)
|
||||
/// will always cause it to fully sync to disk.
|
||||
///
|
||||
/// # Sync vs Async
|
||||
/// All invariants except [`SyncMode::Fast`] are `synchronous`,
|
||||
/// as in the database will wait until the OS has finished syncing
|
||||
/// all the data to disk before continuing.
|
||||
///
|
||||
/// `SyncMode::Fast` is `asynchronous`, meaning the database will _NOT_
|
||||
/// wait until the data is fully synced to disk before continuing.
|
||||
/// Note that this doesn't mean the database itself won't be synchronized
|
||||
/// between readers/writers, but rather that the data _on disk_ may not
|
||||
/// be immediately synchronized after a write.
|
||||
///
|
||||
/// Something like:
|
||||
/// ```rust,ignore
|
||||
/// db.put("key", value);
|
||||
/// db.get("key");
|
||||
/// ```
|
||||
/// will be fine, most likely pulling from memory instead of disk.
|
||||
#[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd, Eq, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
#[cfg_attr(
|
||||
feature = "borsh",
|
||||
derive(borsh::BorshSerialize, borsh::BorshDeserialize)
|
||||
)]
|
||||
pub enum SyncMode {
|
||||
/// Fully sync to disk per transaction.
|
||||
/// Use [`SyncMode::Fast`] until fully synced,
|
||||
/// then use [`SyncMode::Safe`].
|
||||
///
|
||||
/// # TODO: how to implement this?
|
||||
/// ref: <https://github.com/monero-project/monero/issues/1463>
|
||||
/// monerod-solution: <https://github.com/monero-project/monero/pull/1506>
|
||||
/// cuprate-issue: <https://github.com/Cuprate/cuprate/issues/78>
|
||||
///
|
||||
/// We could:
|
||||
/// ```rust,ignore
|
||||
/// if current_db_block <= top_block.saturating_sub(N) {
|
||||
/// // don't sync()
|
||||
/// } else {
|
||||
/// // sync()
|
||||
/// }
|
||||
/// ```
|
||||
/// where N is some threshold we pick that is _close_ enough
|
||||
/// to being synced where we want to start being safer.
|
||||
///
|
||||
/// Essentially, when we are in a certain % range of being finished,
|
||||
/// switch to safe mode, until then, go fast.
|
||||
#[default]
|
||||
FastThenSafe,
|
||||
|
||||
/// Fully sync to disk per transaction.
|
||||
///
|
||||
/// Every database transaction commit will
|
||||
/// fully sync all data to disk, _synchronously_,
|
||||
/// so the database halts until synced.
|
||||
///
|
||||
/// This is expected to be very slow.
|
||||
Safe,
|
||||
|
||||
/// Asynchronously sync, only flush at database shutdown.
|
||||
Fastest,
|
||||
/// Fully sync to disk after we cross this transaction threshold.
|
||||
///
|
||||
/// After committing [`usize`] amount of database
|
||||
/// transactions, it will be sync to disk.
|
||||
///
|
||||
/// `0` behaves the same as [`SyncMode::Safe`], and a ridiculously large
|
||||
/// number like `usize::MAX` is practically the same as [`SyncMode::Fast`].
|
||||
Threshold(usize),
|
||||
|
||||
/// Only flush at database shutdown.
|
||||
///
|
||||
/// This is the fastest, yet unsafest option.
|
||||
///
|
||||
/// It will cause the database to never _actively_ sync,
|
||||
/// letting the OS decide when to flush data to disk.
|
||||
///
|
||||
/// # Corruption
|
||||
/// In the case of a system crash, the database
|
||||
/// may become corrupted when using this option.
|
||||
//
|
||||
// TODO: we could call this `unsafe`
|
||||
// and use that terminology in the config file
|
||||
// so users know exactly what they are getting
|
||||
// themselves into.
|
||||
Fast,
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------- ReaderThreads
|
||||
/// TODO
|
||||
#[derive(Copy, Clone, Default, PartialEq, PartialOrd)]
|
||||
/// Amount of database reader threads to spawn.
|
||||
///
|
||||
/// This controls how many reader thread [`crate::service`]'s
|
||||
/// thread-pool will spawn to receive and send requests/responses.
|
||||
///
|
||||
/// It will always be at least 1, up until the amount of threads on the machine.
|
||||
///
|
||||
/// The main function used to extract an actual
|
||||
/// usable thread count out of this is [`ReaderThreads::as_threads`].
|
||||
#[derive(Copy, Clone, Debug, Default, PartialEq, PartialOrd)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
#[cfg_attr(
|
||||
feature = "borsh",
|
||||
|
@ -133,34 +276,115 @@ pub enum SyncMode {
|
|||
)]
|
||||
pub enum ReaderThreads {
|
||||
#[default]
|
||||
/// TODO
|
||||
/// Spawn 1 reader thread per available thread on the machine.
|
||||
///
|
||||
/// For example, a `16-core, 32-thread` Ryzen 5950x will
|
||||
/// spawn `32` reader threads using this setting.
|
||||
OnePerThread,
|
||||
|
||||
/// TODO
|
||||
/// Only spawn 1 reader thread.
|
||||
One,
|
||||
|
||||
/// TODO
|
||||
Number(NonZeroUsize),
|
||||
|
||||
/// TODO
|
||||
/// Spawn a specified amount of reader threads.
|
||||
///
|
||||
/// # Invariant
|
||||
/// Must be `0.0..=1.0`.
|
||||
/// Note that no matter how large this value, it will be
|
||||
/// ultimately capped at the amount of system threads.
|
||||
///
|
||||
/// # `0`
|
||||
/// `ReaderThreads::Number(0)` represents "use maximum value",
|
||||
/// as such, it is equal to [`ReaderThreads::OnePerThread`].
|
||||
///
|
||||
/// ```rust
|
||||
/// # use cuprate_database::config::*;
|
||||
/// let reader_threads = ReaderThreads::from(0_usize);
|
||||
/// assert!(matches!(reader_threads, ReaderThreads::OnePerThread));
|
||||
/// ```
|
||||
Number(usize),
|
||||
|
||||
/// Spawn a specified % of reader threads.
|
||||
///
|
||||
/// This must be a value in-between `0.0..1.0`
|
||||
/// where `1.0` represents [`ReaderThreads::OnePerThread`].
|
||||
///
|
||||
/// # Example
|
||||
/// For example, using a `16-core, 32-thread` Ryzen 5950x CPU:
|
||||
///
|
||||
/// | Input | Total thread used |
|
||||
/// |------------------------------------|-------------------|
|
||||
/// | `ReaderThreads::Percent(0.0)` | 32 (maximum value)
|
||||
/// | `ReaderThreads::Percent(0.5)` | 16
|
||||
/// | `ReaderThreads::Percent(0.75)` | 24
|
||||
/// | `ReaderThreads::Percent(1.0)` | 32
|
||||
/// | `ReaderThreads::Percent(2.0)` | 32 (saturating)
|
||||
/// | `ReaderThreads::Percent(f32::NAN)` | 32 (non-normal default)
|
||||
///
|
||||
/// # `0.0`
|
||||
/// `ReaderThreads::Percent(0.0)` represents "use maximum value",
|
||||
/// as such, it is equal to [`ReaderThreads::OnePerThread`].
|
||||
///
|
||||
/// # Not quite `0.0`
|
||||
/// If the thread count multiplied by the percentage ends up being
|
||||
/// non-zero, but not 1 thread, the minimum value 1 will be returned.
|
||||
///
|
||||
/// ```rust
|
||||
/// # use cuprate_database::config::*;
|
||||
/// assert_eq!(ReaderThreads::Percent(0.000000001).as_threads().get(), 1);
|
||||
/// ```
|
||||
Percent(f32),
|
||||
}
|
||||
|
||||
impl ReaderThreads {
|
||||
/// TODO
|
||||
// # Invariant
|
||||
/// This converts [`ReaderThreads`] into a safe, usable
|
||||
/// number representing how many threads to spawn.
|
||||
///
|
||||
/// This function will always return a number in-between `1..=total_thread_count`.
|
||||
///
|
||||
/// It uses [`cuprate_helper::thread::threads()`] internally to determine the total thread count.
|
||||
///
|
||||
/// # Example
|
||||
/// ```rust
|
||||
/// use cuprate_database::config::ReaderThreads as Rt;
|
||||
///
|
||||
/// let total_threads: std::num::NonZeroUsize =
|
||||
/// cuprate_helper::thread::threads();
|
||||
///
|
||||
/// assert_eq!(Rt::OnePerThread.as_threads(), total_threads);
|
||||
///
|
||||
/// assert_eq!(Rt::One.as_threads().get(), 1);
|
||||
///
|
||||
/// assert_eq!(Rt::Number(0).as_threads(), total_threads);
|
||||
/// assert_eq!(Rt::Number(1).as_threads().get(), 1);
|
||||
/// assert_eq!(Rt::Number(usize::MAX).as_threads(), total_threads);
|
||||
///
|
||||
/// assert_eq!(Rt::Percent(0.01).as_threads().get(), 1);
|
||||
/// assert_eq!(Rt::Percent(0.0).as_threads(), total_threads);
|
||||
/// assert_eq!(Rt::Percent(1.0).as_threads(), total_threads);
|
||||
/// assert_eq!(Rt::Percent(f32::NAN).as_threads(), total_threads);
|
||||
/// assert_eq!(Rt::Percent(f32::INFINITY).as_threads(), total_threads);
|
||||
/// assert_eq!(Rt::Percent(f32::NEG_INFINITY).as_threads(), total_threads);
|
||||
///
|
||||
/// // Percentage only works on more than 1 thread.
|
||||
/// if total_threads.get() > 1 {
|
||||
/// assert_eq!(
|
||||
/// Rt::Percent(0.5).as_threads().get(),
|
||||
/// (total_threads.get() as f32 / 2.0) as usize,
|
||||
/// );
|
||||
/// }
|
||||
/// ```
|
||||
//
|
||||
// INVARIANT:
|
||||
// LMDB will error if we input zero, so don't allow that.
|
||||
// <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/mdb.c#L4687>
|
||||
pub fn as_threads(&self) -> NonZeroUsize {
|
||||
let total_threads = cuprate_helper::thread::threads();
|
||||
|
||||
match self {
|
||||
Self::OnePerThread => total_threads,
|
||||
Self::One => NonZeroUsize::MIN,
|
||||
Self::Number(n) => std::cmp::min(*n, total_threads),
|
||||
Self::OnePerThread => total_threads, // use all threads
|
||||
Self::One => NonZeroUsize::MIN, // one
|
||||
Self::Number(n) => match NonZeroUsize::new(*n) {
|
||||
Some(n) => std::cmp::min(n, total_threads), // saturate at total threads
|
||||
None => total_threads, // 0 == maximum value
|
||||
},
|
||||
|
||||
// We handle the casting loss.
|
||||
#[allow(
|
||||
|
@ -169,26 +393,43 @@ impl ReaderThreads {
|
|||
clippy::cast_sign_loss
|
||||
)]
|
||||
Self::Percent(f) => {
|
||||
// If non-normal float, use the default (all threads).
|
||||
if !f.is_normal() || !(0.0..=1.0).contains(f) {
|
||||
return total_threads;
|
||||
}
|
||||
|
||||
let thread_percent = (total_threads.get() as f32) * f;
|
||||
let Some(threads) = NonZeroUsize::new(thread_percent as usize) else {
|
||||
// 0.0 == maximum value.
|
||||
if *f == 0.0 {
|
||||
return total_threads;
|
||||
};
|
||||
}
|
||||
|
||||
std::cmp::min(threads, total_threads)
|
||||
// Calculate percentage of total threads.
|
||||
let thread_percent = (total_threads.get() as f32) * f;
|
||||
match NonZeroUsize::new(thread_percent as usize) {
|
||||
Some(n) => std::cmp::min(n, total_threads), // saturate at total threads.
|
||||
None => {
|
||||
// We checked for `0.0` above, so what this
|
||||
// being 0 means that the percentage was _so_
|
||||
// low it made our thread count something like
|
||||
// 0.99. In this case, just use 1 thread.
|
||||
NonZeroUsize::MIN
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Into<usize>> From<T> for ReaderThreads {
|
||||
/// Create a [`ReaderThreads::Number`].
|
||||
///
|
||||
/// If `value` is `0`, this will return [`ReaderThreads::OnePerThread`].
|
||||
fn from(value: T) -> Self {
|
||||
match NonZeroUsize::new(value.into()) {
|
||||
Some(n) => Self::Number(n),
|
||||
None => Self::One,
|
||||
let u: usize = value.into();
|
||||
if u == 0 {
|
||||
Self::OnePerThread
|
||||
} else {
|
||||
Self::Number(u)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ cfg_if::cfg_if! {
|
|||
|
||||
/// Cuprate's database filename.
|
||||
///
|
||||
/// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
|
||||
/// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
|
||||
pub const DATABASE_FILENAME: &str = "data.san"; // TODO: pick a name + extension.
|
||||
} else {
|
||||
/// Static string of the `crate` being used as the database backend.
|
||||
|
@ -35,7 +35,7 @@ cfg_if::cfg_if! {
|
|||
|
||||
/// Cuprate's database filename.
|
||||
///
|
||||
/// This is the filename for Cuprate's database, used in [`Config::db_file_path`](crate::config::Config::db_file_path).
|
||||
/// This is the filename for Cuprate's database, used in [`Config::db_file`](crate::config::Config::db_file).
|
||||
pub const DATABASE_FILENAME: &str = "data.mdb";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,7 +98,7 @@
|
|||
//!
|
||||
//! // Create a configuration for the database environment.
|
||||
//! let db_dir = tempfile::tempdir().unwrap();
|
||||
//! let config = Config::new(Some(db_dir));
|
||||
//! let config = Config::new(Some(db_dir.path().to_path_buf()));
|
||||
//!
|
||||
//! // Initialize the database thread-pool.
|
||||
//!
|
||||
|
|
Loading…
Reference in a new issue