diff --git a/Cargo.lock b/Cargo.lock index c17cb7c9..6a081885 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -603,7 +603,6 @@ dependencies = [ "tokio-util", "tower", "tracing", - "tracing-subscriber", ] [[package]] diff --git a/database/README.md b/database/README.md index 7aa59cef..293413ac 100644 --- a/database/README.md +++ b/database/README.md @@ -2,11 +2,11 @@ Cuprate's database implementation. - [1. Documentation](#1-documentation) -- [2. File Structure](#2-file-structure) +- [2. File structure](#2-file-structure) - [2.1 `src/`](#21-src) - [2.2 `src/backend/`](#22-srcbackend) - - [2.3 `src/config`](#23-srcconfig) - - [2.4 `src/ops`](#24-srcops) + - [2.3 `src/config/`](#23-srcconfig) + - [2.4 `src/ops/`](#24-srcops) - [2.5 `src/service/`](#25-srcservice) - [3. Backends](#3-backends) - [3.1 heed](#31-heed) @@ -18,12 +18,26 @@ Cuprate's database implementation. - [4.1 Backend](#41-backend) - [4.2 Trait](#42-trait) - [4.3 ConcreteEnv](#43-concreteenv) - - [4.4 `ops`](#44-ops) - - [4.5 `service`](#45-service) -- [5. Syncing](#5-Syncing) -- [6. Thread model](#6-thread-model) + - [4.4 ops](#44-ops) + - [4.5 service](#45-service) +- [5. The service](#5-the-service) + - [5.1 Initialization](#51-initialization) + - [5.2 Requests](#53-requests) + - [5.3 Responses](#54-responses) + - [5.4 Thread model](#52-thread-model) + - [5.5 Shutdown](#55-shutdown) +- [6. Syncing](#6-Syncing) - [7. Resizing](#7-resizing) - [8. (De)serialization](#8-deserialization) +- [9. Schema](#9-schema) + - [9.1 Tables](#91-tables) + - [9.2 Multimap tables](#92-multimap-tables) +- [10. Known issues and tradeoffs](#10-known-issues-and-tradeoffs) + - [10.1 Traits abstracting backends](#101-traits-abstracting-backends) + - [10.2 Hot-swappable backends](#102-hot-swappable-backends) + - [10.3 Copying unaligned bytes](#103-copying-unaligned-bytes) + - [10.4 Endianness](#104-endianness) + - [10.5 Extra table data](#105-extra-table-data) --- @@ -36,7 +50,7 @@ Documentation for `database/` is split into 3 locations: | `cuprate-database` | Practical usage documentation/warnings/notes/etc | Source file `// comments` | Implementation-specific details (e.g, how many reader threads to spawn?) -This README serves as the overview/design document. +This README serves as the implementation design document. For actual practical usage, `cuprate-database`'s types and general usage are documented via standard Rust tooling. @@ -60,7 +74,7 @@ The code within `src/` is also littered with some `grep`-able comments containin | `TODO` | This must be implemented; There should be 0 of these in production code | `SOMEDAY` | This should be implemented... someday -## 2. File Structure +## 2. File structure A quick reference of the structure of the folders & files in `cuprate-database`. Note that `lib.rs/mod.rs` files are purely for re-exporting/visibility/lints, and contain no code. Each sub-directory has a corresponding `mod.rs`. @@ -150,11 +164,7 @@ The `async`hronous request/response API other Cuprate crates use instead of mana Each database's implementation for those `trait`'s are located in its respective folder in `src/backend/${DATABASE_NAME}/`. ### 3.1 heed -The default database used is [`heed`](https://github.com/meilisearch/heed) (LMDB). - -The upstream versions from [`crates.io`](https://crates.io/crates/heed) are used. - -`LMDB` should not need to be installed as `heed` has a build script that pulls it in automatically. +The default database used is [`heed`](https://github.com/meilisearch/heed) (LMDB). The upstream versions from [`crates.io`](https://crates.io/crates/heed) are used. `LMDB` should not need to be installed as `heed` has a build script that pulls it in automatically. `heed`'s filenames inside Cuprate's database folder (`~/.local/share/cuprate/database/`) are: @@ -164,8 +174,8 @@ The upstream versions from [`crates.io`](https://crates.io/crates/heed) are used | `lock.mdb` | Database lock file `heed`-specific notes: -- [There is a maximum reader limit](https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1372). Other potential processes (e.g. `xmrblocks`) that are also reading the `data.mdb` file need to be accounted for. -- [LMDB does not work on remote filesystem](https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L129). +- [There is a maximum reader limit](https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1372). Other potential processes (e.g. `xmrblocks`) that are also reading the `data.mdb` file need to be accounted for +- [LMDB does not work on remote filesystem](https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L129) ### 3.2 redb The 2nd database backend is the 100% Rust [`redb`](https://github.com/cberner/redb). @@ -181,7 +191,7 @@ The upstream versions from [`crates.io`](https://crates.io/crates/redb) are used ### 3.3 redb-memory -This backend is 100% the same as `redb`, although, it uses `redb::backend::InMemoryBackend` which is a key-value store that completely resides in memory instead of a file. +This backend is 100% the same as `redb`, although, it uses `redb::backend::InMemoryBackend` which is a database that completely resides in memory instead of a file. All other details about this should be the same as the normal `redb` backend. @@ -193,20 +203,20 @@ The default maximum value size is [1012 bytes](https://docs.rs/sanakirja/1.4.1/s As such, it is not implemented. ### 3.5 MDBX -[`MDBX`](https://erthink.github.io/libmdbx) was a candidate as a backend, however MDBX deprecated the custom key/value comparison functions, this makes it a bit trickier to implement duplicate tables. It is also quite similar to the main backend LMDB (of which it was originally a fork of). +[`MDBX`](https://erthink.github.io/libmdbx) was a candidate as a backend, however MDBX deprecated the custom key/value comparison functions, this makes it a bit trickier to implement [`9.2 Multimap tables`](#92-multimap-tables). It is also quite similar to the main backend LMDB (of which it was originally a fork of). As such, it is not implemented (yet). ## 4. Layers -`cuprate_database` is logically abstracted into 5 layers, starting from the lowest: +`cuprate_database` is logically abstracted into 5 layers, with each layer being built upon the last. + +Starting from the lowest: 1. Backend 2. Trait 3. ConcreteEnv 4. `ops` 5. `service` -Each layer is built upon the last. - ### 4.1 Backend @@ -249,49 +259,78 @@ The equivalent objects in the backends themselves are: - [`heed::Env`](https://docs.rs/heed/0.20.0/heed/struct.Env.html) - [`redb::Database`](https://docs.rs/redb/2.1.0/redb/struct.Database.html) -This is the main object used when handling the database directly, although that is not strictly necessary as a user if the `service` layer is used. +This is the main object used when handling the database directly, although that is not strictly necessary as a user if the [`4.5 service`](#45-service) layer is used. -### 4.4 `ops` +### 4.4 ops These are Monero-specific functions that use the abstracted `trait` forms of the database. -Instead of dealing with the database directly (`get()`, `delete()`), the `ops` layer provides more abstract functions that deal with commonly used Monero operations (`add_block()`, `pop_block()`). +Instead of dealing with the database directly: +- `get()` +- `delete()` -### 4.5 `service` -The final layer abstracts the database completely into a [Monero-specific `async` request/response API](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/types/src/service.rs#L18-L78), using [`tower::Service`](https://docs.rs/tower/latest/tower/trait.Service.html). +the `ops` layer provides more abstract functions that deal with commonly used Monero operations: +- `add_block()` +- `pop_block()` -It handles the database using a separate writer thread & reader thread-pool, and uses the previously mentioned `ops` functions when responding to requests. +### 4.5 service +The final layer abstracts the database completely into a [Monero-specific `async` request/response API](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/types/src/service.rs#L18-L78) using [`tower::Service`](https://docs.rs/tower/latest/tower/trait.Service.html). -Instead of handling the database directly, this layer provides read/write handles that allow: -- Sending requests for data (e.g. Outputs) -- Receiving responses +For more information on this layer, see the next section: [`5. The service`](#5-the-service). -For more information on the backing thread-pool, see [`Thread model`](#6-thread-model). +## 5. The service +The main API `cuprate_database` exposes for other crates to use is the `cuprate_database::service` module. -## 5. Syncing -`cuprate_database`'s database has 5 disk syncing modes. +This module exposes an `async` request/response API with `tower::Service`, backed by a threadpool, that allows reading/writing Monero-related data from/to the database. -1. FastThenSafe -1. Safe -1. Async -1. Threshold -1. Fast +`cuprate_database::service` itself manages the database using a separate writer thread & reader thread-pool, and uses the previously mentioned [`4.4 ops`](#44-ops) functions when responding to requests. -The default mode is `Safe`. +### 5.1 Initialization +The service is started simply by calling: [`cuprate_database::service::init()`](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/database/src/service/free.rs#L23). -This means that upon each transaction commit, all the data that was written will be fully synced to disk. This is the slowest, but safest mode of operation. +This function initializes the database, spawns threads, and returns a: +- Read handle to the database (cloneable) +- Write handle to the database (not cloneable) -Note that upon any database `Drop`, whether via `service` or dropping the database directly, the current implementation will sync to disk regardless of any configuration. +These "handles" implement the `tower::Service` trait, which allows sending requests and receiving responses `async`hronously. -For more information on the other modes, read the documentation [here](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/database/src/config/sync_mode.rs#L63-L144). +### 5.2 Requests +Along with the 2 handles, there are 2 types of requests: +- [`ReadRequest`](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/types/src/service.rs#L23-L90) +- [`WriteRequest`](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/types/src/service.rs#L93-L105) -## 6. Thread model -As noted in the [`Layers`](#layers) section, the base database abstractions themselves are not concerned with parallelism, they are mostly functions to be called from a single-thread. +`ReadRequest` is for retrieving various types of information from the database. -However, the actual API `cuprate_database` exposes for practical usage for the main `cuprated` binary (and other `async` use-cases) is the asynchronous `service` API, which _does_ have a thread model backing it. +`WriteRequest` currently only has 1 variant: to write a block to the database. -As such, when [`cuprate_database::service`'s initialization function](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/free.rs#L33-L44) is called, threads will be spawned and maintained until the user drops (disconnects) the returned handles. +### 5.3 Responses +After sending one of the above requests using the read/write handle, the value returned is _not_ the response, yet an `async`hronous channel that will eventually return the response: +```rust,ignore +// Send a request. +// tower::Service::call() +// V +let response_channel: Channel = read_handle.call(ReadResponse::ChainHeight)?; -The current behavior is: +// Await the response. +let response: ReadResponse = response_channel.await?; + +// Assert the response is what we expected. +assert_eq!(matches!(response), Response::ChainHeight(_)); +``` + +After `await`ing the returned channel, a `Response` will eventually be returned when the `service` threadpool has fetched the value from the database and sent it off. + +Both read/write requests variants match in name with `Response` variants, i.e. +- `ReadRequest::ChainHeight` leads to `Response::ChainHeight` +- `WriteRequest::WriteBlock` leads to `Response::WriteBlockOk` + +### 5.4 Thread model +As mentioned in the [`4. Layers`](#4-layers) section, the base database abstractions themselves are not concerned with parallelism, they are mostly functions to be called from a single-thread. + +However, the `cuprate_database::service` API, _does_ have a thread model backing it. + +When [`cuprate_database::service`'s initialization function](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/free.rs#L33-L44) is called, threads will be spawned and maintained until the user drops (disconnects) the returned handles. + +The current behavior for thread count is: - [1 writer thread](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/write.rs#L52-L66) - [As many reader threads as there are system threads](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/read.rs#L104-L126) @@ -307,7 +346,27 @@ The reader threads are managed by [`rayon`](https://docs.rs/rayon). For an example of where multiple reader threads are used: given a request that asks if any key-image within a set already exists, `cuprate_database` will [split that work between the threads with `rayon`](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/read.rs#L490-L503). -Once the [handles](https://github.com/Cuprate/cuprate/blob/9c27ba5791377d639cb5d30d0f692c228568c122/database/src/service/free.rs#L33) to these threads are `Drop`ed, the backing thread(pool) will gracefully exit, automatically. +### 5.5 Shutdown +Once the read/write handles are `Drop`ed, the backing thread(pool) will gracefully exit, automatically. + +Note the writer thread and reader threadpool aren't connected whatsoever; dropping the write handle will make the writer thread exit, however, the reader handle is free to be held onto and can be continued to be read from - and vice-versa for the write handle. + +## 6. Syncing +`cuprate_database`'s database has 5 disk syncing modes. + +1. FastThenSafe +1. Safe +1. Async +1. Threshold +1. Fast + +The default mode is `Safe`. + +This means that upon each transaction commit, all the data that was written will be fully synced to disk. This is the slowest, but safest mode of operation. + +Note that upon any database `Drop`, whether via `service` or dropping the database directly, the current implementation will sync to disk regardless of any configuration. + +For more information on the other modes, read the documentation [here](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/database/src/config/sync_mode.rs#L63-L144). ## 7. Resizing Database backends that require manually resizing will, by default, use a similar algorithm as `monerod`'s. @@ -327,6 +386,8 @@ All types stored inside the database are either bytes already, or are perfectly As such, they do not incur heavy (de)serialization costs when storing/fetching them from the database. The main (de)serialization used is [`bytemuck`](https://docs.rs/bytemuck)'s traits and casting functions. +The size & layout of types is stable across compiler versions, as they are set and determined with [`#[repr(C)]`](https://doc.rust-lang.org/nomicon/other-reprs.html#reprc) and `bytemuck`'s derive macros such as [`bytemuck::Pod`](https://docs.rs/bytemuck/latest/bytemuck/derive.Pod.html). + Note that the data stored in the tables are still type-safe; we still refer to the key and values within our tables by the type. The main deserialization `trait` for database storage is: [`cuprate_database::Storable`](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/database/src/storable.rs#L16-L115). @@ -361,4 +422,177 @@ Compatibility structs also exist for any `Storable` containers: - [`StorableVec`](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/database/src/storable.rs#L135-L191) - [`StorableBytes`](https://github.com/Cuprate/cuprate/blob/2ac90420c658663564a71b7ecb52d74f3c2c9d0f/database/src/storable.rs#L208-L241) -Again, it's unfortunate that these must be owned, although in `service`'s use-case, they would have to be owned anyway. \ No newline at end of file +Again, it's unfortunate that these must be owned, although in `service`'s use-case, they would have to be owned anyway. + +## 9. Schema +This following section contains Cuprate's database schema, it may change throughout the development of Cuprate, as such, nothing here is final. + +### 9.1 Tables +The `CamelCase` names of the table headers documented here (e.g. `TxIds`) are the actual type name of the table within `cuprate_database`. + +Note that words written within `code blocks` mean that it is a real type defined and usable within `cuprate_database`. Other standard types like u64 and type aliases (TxId) are written normally. + +Within `cuprate_database::tables`, the below table is essentially defined as-is with [a macro](https://github.com/Cuprate/cuprate/blob/31ce89412aa174fc33754f22c9a6d9ef5ddeda28/database/src/tables.rs#L369-L470). + +Many of the data types stored are the same data types, although are different semantically, as such, a map of aliases used and their real data types is also provided below. + +| Alias | Real Type | +|----------------------------------------------------|-----------| +| BlockHeight, Amount, AmountIndex, TxId, UnlockTime | u64 +| BlockHash, KeyImage, TxHash, PrunableHash | [u8; 32] + +| Table | Key | Value | Description | +|-------------------|----------------------|--------------------|-------------| +| `BlockBlobs` | BlockHeight | `StorableVec` | Maps a block's height to a serialized byte form of a block +| `BlockHeights` | BlockHash | BlockHeight | Maps a block's hash to its height +| `BlockInfos` | BlockHeight | `BlockInfo` | Contains metadata of all blocks +| `KeyImages` | KeyImage | () | This table is a set with no value, it stores transaction key images +| `NumOutputs` | Amount | u64 | Maps an output's amount to the number of outputs with that amount +| `Outputs` | `PreRctOutputId` | `Output` | This table contains legacy CryptoNote outputs which have clear amounts. This table will not contain an output with 0 amount. +| `PrunedTxBlobs` | TxId | `StorableVec` | Contains pruned transaction blobs (even if the database is not pruned) +| `PrunableTxBlobs` | TxId | `StorableVec` | Contains the prunable part of a transaction +| `PrunableHashes` | TxId | PrunableHash | Contains the hash of the prunable part of a transaction +| `RctOutputs` | AmountIndex | `RctOutput` | Contains RingCT outputs mapped from their global RCT index +| `TxBlobs` | TxId | `StorableVec` | Serialized transaction blobs (bytes) +| `TxIds` | TxHash | TxId | Maps a transaction's hash to its index/ID +| `TxHeights` | TxId | BlockHeight | Maps a transaction's ID to the height of the block it comes from +| `TxOutputs` | TxId | `StorableVec` | Gives the amount indices of a transaction's outputs +| `TxUnlockTime` | TxId | UnlockTime | Stores the unlock time of a transaction (only if it has a non-zero lock time) + +The definitions for aliases and types (e.g. `RctOutput`) are within the [`cuprate_database::types`](https://github.com/Cuprate/cuprate/blob/31ce89412aa174fc33754f22c9a6d9ef5ddeda28/database/src/types.rs#L51) module. + + + +### 9.2 Multimap tables +When referencing outputs, Monero will [use the amount and the amount index](https://github.com/monero-project/monero/blob/c8214782fb2a769c57382a999eaf099691c836e7/src/blockchain_db/lmdb/db_lmdb.cpp#L3447-L3449). This means 2 keys are needed to reach an output. + +With LMDB you can set the `DUP_SORT` flag on a table and then set the key/value to: +```rust +Key = KEY_PART_1 +``` +```rust +Value = { + KEY_PART_2, + VALUE // The actual value we are storing. +} +``` + +Then you can set a custom value sorting function that only takes `KEY_PART_2` into account; this is how `monerod` does it. + +This requires that the underlying database supports: +- multimap tables +- custom sort functions on values +- setting a cursor on a specific key/value + +--- + +Another way to implement this is as follows: +```rust +Key = { KEY_PART_1, KEY_PART_2 } +``` +```rust +Value = VALUE +``` + +Then the key type is simply used to look up the value; this is how `cuprate_database` does it. + +For example, the key/value pair for outputs is: +```rust +PreRctOutputId => Output +``` +where `PreRctOutputId` looks like this: +```rust +struct PreRctOutputId { + amount: u64, + amount_index: u64, +} +``` + +## 10. Known issues and tradeoffs +`cuprate_database` takes many tradeoffs, whether due to: +- Prioritizing certain values over others +- Not having a better solution +- Being "good enough" + +This is a list of the larger ones, along with issues that don't have answers yet. + +### 10.1 Traits abstracting backends +Although all database backends used are very similar, they have some crucial differences in small implementation details that must be worked around when conforming them to `cuprate_database`'s traits. + +Put simply: using `cuprate_database`'s traits is less efficient and more awkward than using the backend directly. + +For example: +- [Data types must be wrapped in compatibility layers when they otherwise wouldn't be](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/database/src/backend/heed/env.rs#L101-L116) +- [There are types that only apply to a specific backend, but are visible to all](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/database/src/error.rs#L86-L89) +- [There are extra layers of abstraction to smoothen the differences between all backends](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/database/src/env.rs#L62-L68) +- [Existing functionality of backends must be taken away, as it isn't supported in the others](https://github.com/Cuprate/cuprate/blob/d0ac94a813e4cd8e0ed8da5e85a53b1d1ace2463/database/src/database.rs#L27-L34) + +This is a _tradeoff_ that `cuprate_database` takes, as: +- The backend itself is usually not the source of bottlenecks in the greater system, as such, small inefficiencies are OK +- None of the lost functionality is crucial for operation +- The ability to use, test, and swap between multiple database backends is [worth it](https://github.com/Cuprate/cuprate/pull/35#issuecomment-1952804393) + +### 10.2 Hot-swappable backends +Using a different backend is really as simple as re-building `cuprate_database` with a different feature flag: +```bash +# Use LMDB. +cargo build --package cuprate-database --features heed + +# Use redb. +cargo build --package cuprate-database --features redb +``` + +This is "good enough" for now, however ideally, this hot-swapping of backends would be able to be done at _runtime_. + +As it is now, `cuprate_database` cannot compile both backends and swap based on user input at runtime; it must be compiled with a certain backend, which will produce a binary with only that backend. + +This also means things like [CI testing multiple backends is awkward](https://github.com/Cuprate/cuprate/blob/main/.github/workflows/ci.yml#L132-L136), as we must re-compile with different feature flags instead. + +### 10.3 Copying unaligned bytes +As mentioned in [`8. (De)serialization`](#8-deserialization), bytes are _copied_ when they are turned into a type `T` due to unaligned bytes being returned from database backends. + +Using a regular reference cast results in an improperly aligned type `T`; [such a type even existing causes undefined behavior](https://doc.rust-lang.org/reference/behavior-considered-undefined.html). In our case, `bytemuck` saves us by panicking before this occurs. + +Thus, when using `cuprate_database`'s database traits, an _owned_ `T` is returned. + +This is doubly unfortunately for `&[u8]` as this does not even need deserialization. + +For example, `StorableVec` could have been this: +```rust +enum StorableBytes<'a, T: Storable> { + Owned(T), + Ref(&'a T), +} +``` +but this would require supporting types that must be copied regardless with the occasional `&[u8]` that can be returned without casting. This was hard to do so in a generic way, thus all `[u8]`'s are copied and returned as owned `StorableVec`s. + +This is a _tradeoff_ `cuprate_database` takes as: +- `bytemuck::pod_read_unaligned` is cheap enough +- The main API, `service`, needs to return owned value anyway +- Having no references removes a lot of lifetime complexity + +The alternative is either: +- Using proper (de)serialization instead of casting (which comes with its own costs) +- Somehow fixing the alignment issues in the backends mentioned previously + +### 10.4 Endianness +`cuprate_database`'s (de)serialization and storage of bytes are native-endian, as in, byte storage order will depend on the machine it is running on. + +As Cuprate's build-targets are all little-endian ([big-endian by default machines barely exist](https://en.wikipedia.org/wiki/Endianness#Hardware)), this doesn't matter much and the byte ordering can be seen as a constant. + +Practically, this means `cuprated`'s database files can be transferred across computers, as can `monerod`'s. + +### 10.5 Extra table data +Some of `cuprate_database`'s tables differ from `monerod`'s tables, for example, the way [`9.2 Multimap tables`](#92-multimap-tables) tables are done requires that the primary key is stored _for all_ entries, compared to `monerod` only needing to store it once. + +For example: +```rust +// `monerod` only stores `amount: 1` once, +// `cuprated` stores it each time it appears. +struct PreRctOutputId { amount: 1, amount_index: 0 } +struct PreRctOutputId { amount: 1, amount_index: 1 } +``` + +This means `cuprated`'s database will be slightly larger than `monerod`'s. + +The current method `cuprate_database` uses will be "good enough" until usage shows that it must be optimized as multimap tables are tricky to implement across all backends. \ No newline at end of file diff --git a/database/src/tables.rs b/database/src/tables.rs index 9a425de1..0056b0bd 100644 --- a/database/src/tables.rs +++ b/database/src/tables.rs @@ -401,16 +401,16 @@ tables! { NumOutputs, Amount => u64, + /// Pre-RCT output data. + Outputs, + PreRctOutputId => Output, + /// Pruned transaction blobs (bytes). /// /// Contains the pruned portion of serialized transaction data. PrunedTxBlobs, TxId => PrunedBlob, - /// Pre-RCT output data. - Outputs, - PreRctOutputId => Output, - /// Prunable transaction blobs (bytes). /// /// Contains the prunable portion of serialized transaction data. diff --git a/p2p/cuprate-p2p/Cargo.toml b/p2p/cuprate-p2p/Cargo.toml index e0ed1c28..d73684af 100644 --- a/p2p/cuprate-p2p/Cargo.toml +++ b/p2p/cuprate-p2p/Cargo.toml @@ -33,6 +33,4 @@ hex = { workspace = true, features = ["std"] } tracing = { workspace = true, features = ["std", "attributes"] } [dev-dependencies] -cuprate-test-utils = {path = "../../test-utils"} -tracing-subscriber = { version = "0.3.18" } - +cuprate-test-utils = { path = "../../test-utils" } diff --git a/p2p/cuprate-p2p/src/client_pool.rs b/p2p/cuprate-p2p/src/client_pool.rs index 3b21c790..15969392 100644 --- a/p2p/cuprate-p2p/src/client_pool.rs +++ b/p2p/cuprate-p2p/src/client_pool.rs @@ -32,10 +32,10 @@ pub use drop_guard_client::ClientPoolDropGuard; pub struct ClientPool { /// The connected [`Client`]s. clients: DashMap, Client>, - /// A set of outbound clients, as these allow accesses/ mutation from different threads - /// a peer ID in here does not mean the peer is in `clients` as it could have been removed - /// by another thread. However, if the peer is in both here and `clients` it is defiantly - /// an outbound peer, + /// A set of outbound clients, as these allow accesses/mutation from different threads, + /// a peer ID in here does not mean the peer is necessarily in `clients` as it could have been removed + /// by another thread. However, if the peer is in both here and `clients` it is definitely + /// an outbound peer. outbound_clients: DashSet>, /// A channel to send new peer ids down to monitor for disconnect. @@ -62,6 +62,9 @@ impl ClientPool { /// pool. /// /// See [`ClientPool::add_new_client`] to add a [`Client`] which was not taken from the pool before. + /// + /// # Panics + /// This function panics if `client` already exists in the pool. fn add_client(&self, client: Client) { let handle = client.info.handle.clone(); let id = client.info.id; @@ -88,6 +91,9 @@ impl ClientPool { /// Adds a _new_ [`Client`] to the pool, this client should be a new connection, and not already /// from the pool. + /// + /// # Panics + /// This function panics if `client` already exists in the pool. pub fn add_new_client(&self, client: Client) { self.new_connection_tx .send((client.info.handle.clone(), client.info.id)) @@ -97,14 +103,20 @@ impl ClientPool { } /// Remove a [`Client`] from the pool. + /// + /// [`None`] is returned if the client did not exist in the pool. fn remove_client(&self, peer: &InternalPeerID) -> Option> { self.outbound_clients.remove(peer); self.clients.remove(peer).map(|(_, client)| client) } - /// Borrows a [`Client`] from the pool, the [`Client`] is wrapped in [`ClientPoolDropGuard`] which + /// Borrows a [`Client`] from the pool. + /// + /// The [`Client`] is wrapped in [`ClientPoolDropGuard`] which /// will return the client to the pool when it's dropped. + /// + /// See [`Self::borrow_clients`] for borrowing multiple clients. pub fn borrow_client( self: &Arc, peer: &InternalPeerID, @@ -117,7 +129,9 @@ impl ClientPool { /// Borrows multiple [`Client`]s from the pool. /// - /// The returned iterator is not guaranteed to contain ever peer asked for, + /// Note that the returned iterator is not guaranteed to contain every peer asked for. + /// + /// See [`Self::borrow_client`] for borrowing a single client. #[allow(private_interfaces)] // TODO: Remove me when 2024 Rust pub fn borrow_clients<'a, 'b>( self: &'a Arc, diff --git a/p2p/cuprate-p2p/src/client_pool/disconnect_monitor.rs b/p2p/cuprate-p2p/src/client_pool/disconnect_monitor.rs index 532c2121..01297352 100644 --- a/p2p/cuprate-p2p/src/client_pool/disconnect_monitor.rs +++ b/p2p/cuprate-p2p/src/client_pool/disconnect_monitor.rs @@ -1,7 +1,7 @@ //! # Disconnect Monitor //! //! This module contains the [`disconnect_monitor`] task, which monitors connected peers for disconnection -//! and the removes them from the [`ClientPool`] if they do. +//! and then removes them from the [`ClientPool`] if they do. use std::{ future::Future, pin::Pin, diff --git a/p2p/cuprate-p2p/src/client_pool/drop_guard_client.rs b/p2p/cuprate-p2p/src/client_pool/drop_guard_client.rs index d6b6f7d4..5555d713 100644 --- a/p2p/cuprate-p2p/src/client_pool/drop_guard_client.rs +++ b/p2p/cuprate-p2p/src/client_pool/drop_guard_client.rs @@ -12,6 +12,9 @@ pub struct ClientPoolDropGuard { /// The [`ClientPool`] to return the peer to. pub(super) pool: Arc>, /// The [`Client`]. + /// + /// This is set to [`Some`] when this guard is created, then + /// ### [`take`](Option::take)n and returned to the pool when dropped. pub(super) client: Option>, } diff --git a/p2p/cuprate-p2p/src/config.rs b/p2p/cuprate-p2p/src/config.rs index 1dc95416..31b5ab12 100644 --- a/p2p/cuprate-p2p/src/config.rs +++ b/p2p/cuprate-p2p/src/config.rs @@ -3,20 +3,10 @@ pub struct P2PConfig { /// The number of outbound connections to make and try keep. pub outbound_connections: usize, - /// The absolute maximum number of held outbound connections. - /// - /// *Note:* Cuprate might make more connections than this to see if a peer is reachable or - /// to get peers from that node, these connections are not held for long though. - pub max_outbound_connections: usize, - + /// The amount of extra connections we can make if we are under load from the rest of Cuprate. + pub extra_outbound_connections: usize, /// The percent of outbound peers that should be gray aka never connected to before. /// /// Only values 0..=1 are valid. pub gray_peers_percent: f64, } - -impl P2PConfig { - pub fn allowed_extra_connections(&self) -> usize { - self.max_outbound_connections - self.outbound_connections - } -} diff --git a/p2p/cuprate-p2p/src/connection_maintainer.rs b/p2p/cuprate-p2p/src/connection_maintainer.rs index 1e6cf9ca..bff4b9d5 100644 --- a/p2p/cuprate-p2p/src/connection_maintainer.rs +++ b/p2p/cuprate-p2p/src/connection_maintainer.rs @@ -12,7 +12,7 @@ use tokio::{ time::{sleep, timeout}, }; use tower::{Service, ServiceExt}; -use tracing::{info, instrument, warn}; +use tracing::instrument; use monero_p2p::{ client::{Client, ConnectRequest, HandshakeError}, @@ -68,14 +68,15 @@ pub struct OutboundConnectionKeeper { pub peer_type_gen: Bernoulli, } -impl OutboundConnectionKeeper +impl OutboundConnectionKeeper where + N: NetworkZone, A: AddressBook, C: Service, Response = Client, Error = HandshakeError>, C::Future: Send + 'static, { pub fn new( - config: &P2PConfig, + config: P2PConfig, client_pool: Arc>, make_connection_rx: mpsc::Receiver, address_book_svc: A, @@ -91,7 +92,7 @@ where connector_svc, outbound_semaphore: Arc::new(Semaphore::new(config.outbound_connections)), extra_peers: 0, - config: config.clone(), + config, peer_type_gen, } } @@ -114,7 +115,7 @@ where let mut handshake_futs = JoinSet::new(); for seed in seeds { - info!("Getting peers from seed node: {}", seed); + tracing::info!("Getting peers from seed node: {}", seed); let fut = timeout( HANDSHAKE_TIMEOUT, @@ -141,7 +142,7 @@ where } if allowed_errors == 0 { - return Err(OutboundConnectorError::FailedToConnectToSeeds); + Err(OutboundConnectorError::FailedToConnectToSeeds) } else { Ok(()) } @@ -177,7 +178,7 @@ where .try_acquire_owned() .or_else(|_| { // if we can't get a permit add one if we are below the max number of connections. - if self.extra_peers >= self.config.allowed_extra_connections() { + if self.extra_peers >= self.config.extra_outbound_connections { // If we can't add a permit return an error. Err(OutboundConnectorError::MaxConnections) } else { @@ -201,7 +202,7 @@ where match peer { Err(_) => { // TODO: We should probably send peer requests to our connected peers rather than go to seeds. - warn!("No peers in address book which are available and have the data we need. Getting peers from seed nodes."); + tracing::warn!("No peers in address book which are available and have the data we need. Getting peers from seed nodes."); self.connect_to_random_seeds().await?; Err(OutboundConnectorError::NoAvailablePeers) @@ -215,7 +216,7 @@ where } } - /// Handles a free permit, by either connecting to a new peer or by removing a permit if we above the + /// Handles a free permit, by either connecting to a new peer or by removing a permit if we are above the /// minimum number of outbound connections. #[instrument(level = "debug", skip(self, permit))] async fn handle_free_permit( @@ -248,7 +249,7 @@ where .call(req) .await else { - warn!("No peers in peer list to make connection to."); + tracing::warn!("No peers in peer list to make connection to."); self.connect_to_random_seeds().await?; return Err(OutboundConnectorError::NoAvailablePeers); }; @@ -259,7 +260,7 @@ where /// Runs the outbound connection count keeper. pub async fn run(mut self) { - info!( + tracing::info!( "Starting outbound connection maintainer, target outbound connections: {}", self.config.outbound_connections ); @@ -269,7 +270,7 @@ where biased; peer_req = self.make_connection_rx.recv() => { let Some(peer_req) = peer_req else { - info!("Shutting down outbound connector, make connection channel closed."); + tracing::info!("Shutting down outbound connector, make connection channel closed."); return; }; // We can't really do much about errors in this function.