From 6e170c8b786a443bfece65f28e33d1b5455b269c Mon Sep 17 00:00:00 2001 From: warptangent Date: Sat, 11 Jul 2015 12:28:20 -0700 Subject: [PATCH 1/4] Optionally allow DB to know expected number of blocks at batch transaction start This will assist in a DB resize check. --- src/blockchain_db/berkeleydb/db_bdb.cpp | 2 +- src/blockchain_db/berkeleydb/db_bdb.h | 2 +- src/blockchain_db/blockchain_db.h | 2 +- src/blockchain_db/lmdb/db_lmdb.cpp | 2 +- src/blockchain_db/lmdb/db_lmdb.h | 2 +- src/blockchain_utilities/fake_core.h | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/blockchain_db/berkeleydb/db_bdb.cpp b/src/blockchain_db/berkeleydb/db_bdb.cpp index 221c0cf2e..efe8528cd 100644 --- a/src/blockchain_db/berkeleydb/db_bdb.cpp +++ b/src/blockchain_db/berkeleydb/db_bdb.cpp @@ -1595,7 +1595,7 @@ bool BlockchainBDB::has_key_image(const crypto::key_image& img) const // Ostensibly BerkeleyDB has batch transaction support built-in, // so the following few functions will be NOP. -void BlockchainBDB::batch_start() +void BlockchainBDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainBDB::" << __func__); } diff --git a/src/blockchain_db/berkeleydb/db_bdb.h b/src/blockchain_db/berkeleydb/db_bdb.h index 83588b031..da57cabb1 100644 --- a/src/blockchain_db/berkeleydb/db_bdb.h +++ b/src/blockchain_db/berkeleydb/db_bdb.h @@ -191,7 +191,7 @@ public: ); virtual void set_batch_transactions(bool batch_transactions); - virtual void batch_start(); + virtual void batch_start(uint64_t batch_num_blocks=0); virtual void batch_commit(); virtual void batch_stop(); virtual void batch_abort(); diff --git a/src/blockchain_db/blockchain_db.h b/src/blockchain_db/blockchain_db.h index 46c860122..c4ca328f6 100644 --- a/src/blockchain_db/blockchain_db.h +++ b/src/blockchain_db/blockchain_db.h @@ -357,7 +357,7 @@ public: // release db lock virtual void unlock() = 0; - virtual void batch_start() = 0; + virtual void batch_start(uint64_t batch_num_blocks=0) = 0; virtual void batch_stop() = 0; virtual void set_batch_transactions(bool) = 0; diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 0ed044954..23fb79951 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -1820,7 +1820,7 @@ bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const return false; } -void BlockchainLMDB::batch_start() +void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainLMDB::" << __func__); if (! m_batch_transactions) diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index 6a2646816..6c82939c8 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -191,7 +191,7 @@ public: ); virtual void set_batch_transactions(bool batch_transactions); - virtual void batch_start(); + virtual void batch_start(uint64_t batch_num_blocks=0); virtual void batch_commit(); virtual void batch_stop(); virtual void batch_abort(); diff --git a/src/blockchain_utilities/fake_core.h b/src/blockchain_utilities/fake_core.h index 79fb51842..5eda504fe 100644 --- a/src/blockchain_utilities/fake_core.h +++ b/src/blockchain_utilities/fake_core.h @@ -96,9 +96,9 @@ struct fake_core_lmdb return m_storage.get_db().add_block(blk, block_size, cumulative_difficulty, coins_generated, txs); } - void batch_start() + void batch_start(uint64_t batch_num_blocks = 0) { - m_storage.get_db().batch_start(); + m_storage.get_db().batch_start(batch_num_blocks); } void batch_stop() @@ -150,7 +150,7 @@ struct fake_core_memory return 2; } - void batch_start() + void batch_start(uint64_t batch_num_blocks = 0) { LOG_PRINT_L0("WARNING: [batch_start] opt_batch set, but this database doesn't support/need transactions - ignoring"); } From 699e4b3f65baf8bb2ddd2377dc74e2a3168c6244 Mon Sep 17 00:00:00 2001 From: warptangent Date: Sat, 11 Jul 2015 12:32:49 -0700 Subject: [PATCH 2/4] blockchain_utilities: Pass expected number of blocks when starting batch --- src/blockchain_utilities/blockchain_converter.cpp | 4 ++-- src/blockchain_utilities/blockchain_import.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/blockchain_utilities/blockchain_converter.cpp b/src/blockchain_utilities/blockchain_converter.cpp index 855dde644..d18ce8789 100644 --- a/src/blockchain_utilities/blockchain_converter.cpp +++ b/src/blockchain_utilities/blockchain_converter.cpp @@ -236,7 +236,7 @@ int main(int argc, char* argv[]) } if (opt_batch) - blockchain->batch_start(); + blockchain->batch_start(db_batch_size); uint64_t i = 0; for (i = start_block; i < end_block + 1; ++i) { @@ -277,7 +277,7 @@ int main(int argc, char* argv[]) std::cout << "\r \r"; std::cout << "[- batch commit at height " << i + 1 << " -]" << ENDL; blockchain->batch_stop(); - blockchain->batch_start(); + blockchain->batch_start(db_batch_size); std::cout << ENDL; blockchain->show_stats(); } diff --git a/src/blockchain_utilities/blockchain_import.cpp b/src/blockchain_utilities/blockchain_import.cpp index aeed2b335..924b46d2d 100644 --- a/src/blockchain_utilities/blockchain_import.cpp +++ b/src/blockchain_utilities/blockchain_import.cpp @@ -254,7 +254,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path, uint6 } if (use_batch) - simple_core.batch_start(); + simple_core.batch_start(db_batch_size); LOG_PRINT_L0("Reading blockchain from bootstrap file..."); std::cout << ENDL; @@ -482,7 +482,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path, uint6 // zero-based height std::cout << ENDL << "[- batch commit at height " << h-1 << " -]" << ENDL; simple_core.batch_stop(); - simple_core.batch_start(); + simple_core.batch_start(db_batch_size); std::cout << ENDL; #if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) simple_core.m_storage.get_db().show_stats(); From f9e4afd52a404f49dc71709e35c88e0d6a4ecbaa Mon Sep 17 00:00:00 2001 From: warptangent Date: Sat, 11 Jul 2015 12:38:20 -0700 Subject: [PATCH 3/4] blockchain_utilities: Increase debug statement's log level --- src/blockchain_utilities/bootstrap_file.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blockchain_utilities/bootstrap_file.cpp b/src/blockchain_utilities/bootstrap_file.cpp index 70f0b1fec..573cb1572 100644 --- a/src/blockchain_utilities/bootstrap_file.cpp +++ b/src/blockchain_utilities/bootstrap_file.cpp @@ -454,7 +454,7 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) str1.assign(buf1, sizeof(chunk_size)); if (! ::serialization::parse_binary(str1, chunk_size)) throw std::runtime_error("Error in deserialization of chunk_size"); - LOG_PRINT_L1("chunk_size: " << chunk_size); + LOG_PRINT_L3("chunk_size: " << chunk_size); if (chunk_size > BUFFER_SIZE) { From fd73d9cc3a05ca551cdd1c425c664e24e746b152 Mon Sep 17 00:00:00 2001 From: warptangent Date: Sat, 11 Jul 2015 22:46:16 -0700 Subject: [PATCH 4/4] Check and resize if needed at batch transaction start This currently only affects blockchain_import and blockchain_converter. When the number of blocks expected for the batch transaction is provided, make an estimate of the DB space needed. If not enough free space remains, resize the DB. The estimate is made based on: - the average size of the last 500 blocks, or if larger, a min. block size of 4k - a factor for the expanded size a block occupies in the DB across the sub-dbs/tables - a safety factor (1.7) to allow for a "reasonable" average block size increase over the batch Increase the DB size by whichever is greater: the estimated size needed or a minimum increase size, currently 128 MB. The conservative factors in the estimate help in testing that the resize occurs when needed, and without gratuitous size increases. For common use, the safety factor and minimum increase size could reasonably be increased. For testing, setting DEFAULT_MAPSIZE (blockchain_db/lmdb/db_lmdb.h) to 1 << 27 (128 MB) and recompiling will ensure DB resizes take place sooner and more frequently. --- src/blockchain_db/lmdb/db_lmdb.cpp | 108 ++++++++++++++++++++++++++++- src/blockchain_db/lmdb/db_lmdb.h | 6 +- 2 files changed, 109 insertions(+), 5 deletions(-) diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 23fb79951..0d0b8ba87 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -233,7 +233,7 @@ void mdb_txn_safe::allow_new_txns() -void BlockchainLMDB::do_resize() +void BlockchainLMDB::do_resize(uint64_t increase_size) { MDB_envinfo mei; @@ -244,6 +244,11 @@ void BlockchainLMDB::do_resize() mdb_env_stat(m_env, &mst); uint64_t new_mapsize = (double)mei.me_mapsize * RESIZE_FACTOR; + // If given, use increase_size intead of above way of resizing. + // This is currently used for increasing by an estimated size at start of new + // batch txn. + if (increase_size > 0) + new_mapsize = mei.me_mapsize + increase_size; new_mapsize += (new_mapsize % mst.ms_psize); @@ -272,7 +277,8 @@ void BlockchainLMDB::do_resize() mdb_txn_safe::allow_new_txns(); } -bool BlockchainLMDB::need_resize() const +// threshold_size is used for batch transactions +bool BlockchainLMDB::need_resize(uint64_t threshold_size) const { MDB_envinfo mei; @@ -282,15 +288,107 @@ bool BlockchainLMDB::need_resize() const mdb_env_stat(m_env, &mst); + // size_used doesn't include data yet to be committed, which can be + // significant size during batch transactions. For that, we estimate the size + // needed at the beginning of the batch transaction and pass in the + // additional size needed. uint64_t size_used = mst.ms_psize * mei.me_last_pgno; + LOG_PRINT_L1("DB map size: " << mei.me_mapsize); + LOG_PRINT_L1("Space used: " << size_used); + LOG_PRINT_L1("Space remaining: " << mei.me_mapsize - size_used); + LOG_PRINT_L1("Size threshold: " << threshold_size); + LOG_PRINT_L1("Percent used: " << (double)size_used/mei.me_mapsize << " Percent threshold: " << RESIZE_PERCENT); + + if (threshold_size > 0) + { + if (mei.me_mapsize - size_used < threshold_size) + { + LOG_PRINT_L1("Threshold met (size-based)"); + return true; + } + else + return false; + } + if ((double)size_used / mei.me_mapsize > RESIZE_PERCENT) { + LOG_PRINT_L1("Threshold met (percent-based)"); return true; } return false; } +void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks) +{ + LOG_PRINT_L1("[batch] checking DB size"); + const uint64_t min_increase_size = 128 * (1 << 20); + uint64_t threshold_size = 0; + uint64_t increase_size = 0; + if (batch_num_blocks > 0) + { + threshold_size = get_estimated_batch_size(batch_num_blocks); + LOG_PRINT_L1("calculated batch size: " << threshold_size); + + // The increased DB size could be a multiple of threshold_size, a fixed + // size increase (> threshold_size), or other variations. + // + // Currently we use the greater of threshold size and a minimum size. The + // minimum size increase is used to avoid frequent resizes when the batch + // size is set to a very small numbers of blocks. + increase_size = (threshold_size > min_increase_size) ? threshold_size : min_increase_size; + LOG_PRINT_L1("increase size: " << increase_size); + } + + // if threshold_size is 0 (i.e. number of blocks for batch not passed in), it + // will fall back to the percent-based threshold check instead of the + // size-based check + if (need_resize(threshold_size)) + { + LOG_PRINT_L0("[batch] DB resize needed"); + do_resize(increase_size); + } +} + +uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) const +{ + uint64_t threshold_size = 0; + + // batch size estimate * batch safety factor = final size estimate + // Takes into account "reasonable" block size increases in batch. + float batch_safety_factor = 1.7f; + // estimate of stored block expanded from raw block, including denormalization and db overhead. + // Note that this probably doesn't grow linearly with block size. + float db_expand_factor = 4.5f; + uint64_t num_prev_blocks = 500; + // For resizing purposes, allow for at least 4k average block size. + uint64_t min_block_size = 4 * 1024; + + uint64_t block_stop = m_height - 1; + uint64_t block_start = 0; + if (block_stop >= num_prev_blocks) + block_start = block_stop - num_prev_blocks + 1; + uint32_t num_blocks_used = 0; + uint64_t total_block_size = 0; + for (uint64_t block_num = block_start; block_num <= block_stop; ++block_num) + { + uint32_t block_size = get_block_size(block_num); + total_block_size += block_size; + // Track number of blocks being totalled here instead of assuming, in case + // some blocks were to be skipped for being outliers. + ++num_blocks_used; + } + size_t avg_block_size = total_block_size / num_blocks_used; + LOG_PRINT_L1("average block size across recent " << num_blocks_used << " blocks: " << avg_block_size); + if (avg_block_size < min_block_size) + avg_block_size = min_block_size; + LOG_PRINT_L1("estimated average block size for batch: " << avg_block_size); + + threshold_size = avg_block_size * db_expand_factor * batch_num_blocks; + threshold_size = threshold_size * batch_safety_factor; + return threshold_size; +} + void BlockchainLMDB::add_block( const block& blk , const size_t& block_size , const difficulty_type& cumulative_difficulty @@ -1820,6 +1918,7 @@ bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const return false; } +// batch_num_blocks: (optional) Used to check if resize needed before batch transaction starts. void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainLMDB::" << __func__); @@ -1833,6 +1932,8 @@ void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use")); check_open(); + check_and_resize_for_batch(batch_num_blocks); + m_write_batch_txn = new mdb_txn_safe(); // NOTE: need to make sure it's destroyed properly when done @@ -1927,7 +2028,8 @@ uint64_t BlockchainLMDB::add_block( const block& blk if (m_height % 1000 == 0) { - if (need_resize()) + // for batch mode, DB resize check is done at start of batch transaction + if (! m_batch_active && need_resize()) { LOG_PRINT_L0("LMDB memory map needs resized, doing that now."); do_resize(); diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index 6c82939c8..6f2262546 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -199,9 +199,11 @@ public: virtual void pop_block(block& blk, std::vector& txs); private: - void do_resize(); + void do_resize(uint64_t size_increase=0); - bool need_resize() const; + bool need_resize(uint64_t threshold_size=0) const; + void check_and_resize_for_batch(uint64_t batch_num_blocks); + uint64_t get_estimated_batch_size(uint64_t batch_num_blocks) const; virtual void add_block( const block& blk , const size_t& block_size