diff --git a/substrate/bin/node/bench/src/generator.rs b/substrate/bin/node/bench/src/generator.rs index 8a161c57053cdd786f13eadba71303a900d59863..895f523497036d26dcd517da7700b9c10ef19f59 100644 --- a/substrate/bin/node/bench/src/generator.rs +++ b/substrate/bin/node/bench/src/generator.rs @@ -18,11 +18,12 @@ use std::{collections::HashMap, sync::Arc}; use kvdb::KeyValueDB; use node_primitives::Hash; -use sp_trie::{DBValue, trie_types::TrieDBMut, TrieMut}; -use hash_db::{HashDB, AsHashDB, Prefix, Hasher as _}; +use sp_trie::{trie_types::TrieDBMut, TrieMut}; -type Hasher = sp_core::Blake2Hasher; +use crate::simple_trie::SimpleTrie; +/// Generate trie from given `key_values`. +/// /// Will fill your database `db` with trie data from `key_values` and /// return root. pub fn generate_trie( @@ -37,9 +38,9 @@ pub fn generate_trie( hex::decode("03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314").expect("null key is valid"), Some(vec![0]), ); - let mut trie_generator = TrieGenerator { db, overlay: &mut overlay }; + let mut trie = SimpleTrie { db, overlay: &mut overlay }; { - let mut trie_db = TrieDBMut::new(&mut trie_generator, &mut root); + let mut trie_db = TrieDBMut::new(&mut trie, &mut root); for (key, value) in key_values { trie_db.insert(&key, &value).expect("trie insertion failed"); @@ -47,7 +48,7 @@ pub fn generate_trie( trie_db.commit(); } - ( trie_generator.db, overlay ) + ( trie.db, overlay ) }; let mut transaction = db.transaction(); @@ -61,47 +62,3 @@ pub fn generate_trie( root } - -/// Immutable generated trie database with root. -struct TrieGenerator<'a> { - db: Arc<dyn KeyValueDB>, - overlay: &'a mut HashMap<Vec<u8>, Option<Vec<u8>>>, -} - -impl<'a> AsHashDB<Hasher, DBValue> for TrieGenerator<'a> { - fn as_hash_db(&self) -> &dyn hash_db::HashDB<Hasher, DBValue> { &*self } - - fn as_hash_db_mut<'b>(&'b mut self) -> &'b mut (dyn HashDB<Hasher, DBValue> + 'b) { - &mut *self - } -} - -impl<'a> HashDB<Hasher, DBValue> for TrieGenerator<'a> { - fn get(&self, key: &Hash, prefix: Prefix) -> Option<DBValue> { - let key = sp_trie::prefixed_key::<Hasher>(key, prefix); - if let Some(value) = self.overlay.get(&key) { - return value.clone(); - } - self.db.get(0, &key).expect("Database backend error") - } - - fn contains(&self, hash: &Hash, prefix: Prefix) -> bool { - self.get(hash, prefix).is_some() - } - - fn insert(&mut self, prefix: Prefix, value: &[u8]) -> Hash { - let key = Hasher::hash(value); - self.emplace(key, prefix, value.to_vec()); - key - } - - fn emplace(&mut self, key: Hash, prefix: Prefix, value: DBValue) { - let key = sp_trie::prefixed_key::<Hasher>(&key, prefix); - self.overlay.insert(key, Some(value)); - } - - fn remove(&mut self, key: &Hash, prefix: Prefix) { - let key = sp_trie::prefixed_key::<Hasher>(key, prefix); - self.overlay.insert(key, None); - } -} \ No newline at end of file diff --git a/substrate/bin/node/bench/src/main.rs b/substrate/bin/node/bench/src/main.rs index 48f1213d621c351da44a882a126ab891d1162c05..7d92eabf4cbdfe9c3480d64159ffa440ac65da04 100644 --- a/substrate/bin/node/bench/src/main.rs +++ b/substrate/bin/node/bench/src/main.rs @@ -17,13 +17,14 @@ #[macro_use] mod core; mod import; mod trie; +mod simple_trie; mod generator; mod tempdb; mod state_sizes; use crate::core::{run_benchmark, Mode as BenchmarkMode}; use import::{ImportBenchmarkDescription, SizeType}; -use trie::{TrieBenchmarkDescription, DatabaseSize}; +use trie::{TrieReadBenchmarkDescription, TrieWriteBenchmarkDescription, DatabaseSize}; use node_testing::bench::{Profile, KeyTypes}; use structopt::StructOpt; @@ -94,10 +95,14 @@ fn main() { }, size in [ DatabaseSize::Empty, DatabaseSize::Smallest, DatabaseSize::Small, - DatabaseSize::Medium, DatabaseSize::Large, - ] => TrieBenchmarkDescription { database_size: *size }, + DatabaseSize::Medium, DatabaseSize::Large, DatabaseSize::Huge, + ] => TrieReadBenchmarkDescription { database_size: *size }, + size in [ + DatabaseSize::Empty, DatabaseSize::Smallest, DatabaseSize::Small, + DatabaseSize::Medium, DatabaseSize::Large, DatabaseSize::Huge, + ] => TrieWriteBenchmarkDescription { database_size: *size }, ); - + if opt.list { for benchmark in benchmarks.iter() { log::info!("{}: {}", benchmark.name(), benchmark.path().full()) diff --git a/substrate/bin/node/bench/src/simple_trie.rs b/substrate/bin/node/bench/src/simple_trie.rs new file mode 100644 index 0000000000000000000000000000000000000000..50078a11df6b5290157bdbdfbc02389f1711261b --- /dev/null +++ b/substrate/bin/node/bench/src/simple_trie.rs @@ -0,0 +1,68 @@ +// Copyright 2020 Parity Technologies (UK) Ltd. +// This file is part of Substrate. + +// Substrate is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Substrate is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Substrate. If not, see <http://www.gnu.org/licenses/>. + +use std::{collections::HashMap, sync::Arc}; + +use kvdb::KeyValueDB; +use node_primitives::Hash; +use sp_trie::DBValue; +use hash_db::{HashDB, AsHashDB, Prefix, Hasher as _}; + +pub type Hasher = sp_core::Blake2Hasher; + +/// Immutable generated trie database with root. +pub struct SimpleTrie<'a> { + pub db: Arc<dyn KeyValueDB>, + pub overlay: &'a mut HashMap<Vec<u8>, Option<Vec<u8>>>, +} + +impl<'a> AsHashDB<Hasher, DBValue> for SimpleTrie<'a> { + fn as_hash_db(&self) -> &dyn hash_db::HashDB<Hasher, DBValue> { &*self } + + fn as_hash_db_mut<'b>(&'b mut self) -> &'b mut (dyn HashDB<Hasher, DBValue> + 'b) { + &mut *self + } +} + +impl<'a> HashDB<Hasher, DBValue> for SimpleTrie<'a> { + fn get(&self, key: &Hash, prefix: Prefix) -> Option<DBValue> { + let key = sp_trie::prefixed_key::<Hasher>(key, prefix); + if let Some(value) = self.overlay.get(&key) { + return value.clone(); + } + self.db.get(0, &key).expect("Database backend error") + } + + fn contains(&self, hash: &Hash, prefix: Prefix) -> bool { + self.get(hash, prefix).is_some() + } + + fn insert(&mut self, prefix: Prefix, value: &[u8]) -> Hash { + let key = Hasher::hash(value); + self.emplace(key, prefix, value.to_vec()); + key + } + + fn emplace(&mut self, key: Hash, prefix: Prefix, value: DBValue) { + let key = sp_trie::prefixed_key::<Hasher>(&key, prefix); + self.overlay.insert(key, Some(value)); + } + + fn remove(&mut self, key: &Hash, prefix: Prefix) { + let key = sp_trie::prefixed_key::<Hasher>(key, prefix); + self.overlay.insert(key, None); + } +} diff --git a/substrate/bin/node/bench/src/trie.rs b/substrate/bin/node/bench/src/trie.rs index 6f75741fa75c65d4ad4ef0c1bd3e28f3f6961e5c..3280618fb6a557e5b73ac2d6c3c7285b197f10d0 100644 --- a/substrate/bin/node/bench/src/trie.rs +++ b/substrate/bin/node/bench/src/trie.rs @@ -16,24 +16,28 @@ //! Trie benchmark (integrated). -use std::{borrow::Cow, sync::Arc}; +use std::{borrow::Cow, collections::HashMap, sync::Arc}; use kvdb::KeyValueDB; use lazy_static::lazy_static; use rand::Rng; use hash_db::Prefix; use sp_state_machine::Backend as _; +use sp_trie::{trie_types::TrieDBMut, TrieMut as _}; use node_primitives::Hash; use crate::{ core::{self, Mode, Path}, generator::generate_trie, + simple_trie::SimpleTrie, tempdb::TempDatabase, }; pub const SAMPLE_SIZE: usize = 100; +pub const TEST_WRITE_SIZE: usize = 128; -pub type KeyValues = Vec<(Vec<u8>, Vec<u8>)>; +pub type KeyValue = (Vec<u8>, Vec<u8>); +pub type KeyValues = Vec<KeyValue>; #[derive(Clone, Copy, Debug, derive_more::Display)] pub enum DatabaseSize { @@ -47,8 +51,8 @@ pub enum DatabaseSize { Medium, #[display(fmt = "large")] Large, - #[display(fmt = "largest")] - Largest, + #[display(fmt = "huge")] + Huge, } lazy_static! { @@ -65,7 +69,7 @@ impl DatabaseSize { Self::Small => 10_000, Self::Medium => 100_000, Self::Large => 200_000, - Self::Largest => 1_000_000, + Self::Huge => 1_000_000, }; assert_eq!(val % SAMPLE_SIZE, 0); @@ -74,20 +78,31 @@ impl DatabaseSize { } } -pub struct TrieBenchmarkDescription { +fn pretty_print(v: usize) -> String { + let mut print = String::new(); + for (idx, val) in v.to_string().chars().rev().enumerate() { + if idx != 0 && idx % 3 == 0 { + print.insert(0, ','); + } + print.insert(0, val); + } + print +} + +pub struct TrieReadBenchmarkDescription { pub database_size: DatabaseSize, } -pub struct TrieBenchmark { +pub struct TrieReadBenchmark { database: TempDatabase, root: Hash, warmup_keys: KeyValues, query_keys: KeyValues, } -impl core::BenchmarkDescription for TrieBenchmarkDescription { +impl core::BenchmarkDescription for TrieReadBenchmarkDescription { fn path(&self) -> Path { - let mut path = Path::new(&["trie"]); + let mut path = Path::new(&["trie", "read"]); path.push(&format!("{}", self.database_size)); path } @@ -95,7 +110,6 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription { fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> { let mut database = TempDatabase::new(); - // TODO: make seedable let mut rng = rand::thread_rng(); let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng); @@ -129,7 +143,7 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription { key_values, ); - Box::new(TrieBenchmark { + Box::new(TrieReadBenchmark { database, root, warmup_keys, @@ -138,20 +152,8 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription { } fn name(&self) -> Cow<'static, str> { - - fn pretty_print(v: usize) -> String { - let mut print = String::new(); - for (idx, val) in v.to_string().chars().rev().enumerate() { - if idx != 0 && idx % 3 == 0 { - print.insert(0, ','); - } - print.insert(0, val); - } - print - } - format!( - "Trie benchmark({} database ({} keys))", + "Trie read benchmark({} database ({} keys))", self.database_size, pretty_print(self.database_size.keys()), ).into() @@ -167,11 +169,11 @@ impl sp_state_machine::Storage<sp_core::Blake2Hasher> for Storage { } } -impl core::Benchmark for TrieBenchmark { +impl core::Benchmark for TrieReadBenchmark { fn run(&mut self, mode: Mode) -> std::time::Duration { let mut db = self.database.clone(); let storage: Arc<dyn sp_state_machine::Storage<sp_core::Blake2Hasher>> = - Arc::new(Storage(db.open())); + Arc::new(Storage(db.open())); let trie_backend = sp_state_machine::TrieBackend::new( storage, @@ -204,6 +206,137 @@ impl core::Benchmark for TrieBenchmark { } } +pub struct TrieWriteBenchmarkDescription { + pub database_size: DatabaseSize, +} + +impl core::BenchmarkDescription for TrieWriteBenchmarkDescription { + fn path(&self) -> Path { + let mut path = Path::new(&["trie", "write"]); + path.push(&format!("{}", self.database_size)); + path + } + + fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> { + let mut database = TempDatabase::new(); + + let mut rng = rand::thread_rng(); + let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng); + + let mut key_values = KeyValues::new(); + let mut warmup_keys = KeyValues::new(); + let every_x_key = self.database_size.keys() / SAMPLE_SIZE; + for idx in 0..self.database_size.keys() { + let kv = ( + KUSAMA_STATE_DISTRIBUTION.key(&mut rng).to_vec(), + KUSAMA_STATE_DISTRIBUTION.value(&mut rng), + ); + if idx % every_x_key == 0 { + // warmup keys go to separate tree with high prob + let mut actual_warmup_key = warmup_prefix.clone(); + actual_warmup_key[16..].copy_from_slice(&kv.0[16..]); + warmup_keys.push((actual_warmup_key.clone(), kv.1.clone())); + key_values.push((actual_warmup_key.clone(), kv.1.clone())); + } + + key_values.push(kv) + } + + assert_eq!(warmup_keys.len(), SAMPLE_SIZE); + + let root = generate_trie( + database.open(), + key_values, + ); + + Box::new(TrieWriteBenchmark { + database, + root, + warmup_keys, + }) + } + + fn name(&self) -> Cow<'static, str> { + format!( + "Trie write benchmark({} database ({} keys))", + self.database_size, + pretty_print(self.database_size.keys()), + ).into() + } +} + +struct TrieWriteBenchmark { + database: TempDatabase, + root: Hash, + warmup_keys: KeyValues, +} + +impl core::Benchmark for TrieWriteBenchmark { + fn run(&mut self, mode: Mode) -> std::time::Duration { + let mut rng = rand::thread_rng(); + let mut db = self.database.clone(); + let kvdb = db.open(); + + let mut new_root = self.root.clone(); + + let mut overlay = HashMap::new(); + let mut trie = SimpleTrie { + db: kvdb.clone(), + overlay: &mut overlay, + }; + let mut trie_db_mut = TrieDBMut::from_existing(&mut trie, &mut new_root) + .expect("Failed to create TrieDBMut"); + + for (warmup_key, warmup_value) in self.warmup_keys.iter() { + let value = trie_db_mut.get(&warmup_key[..]) + .expect("Failed to get key: db error") + .expect("Warmup key should exist"); + + // sanity for warmup keys + assert_eq!(&value, warmup_value); + } + + let test_key = random_vec(&mut rng, 32); + let test_val = random_vec(&mut rng, TEST_WRITE_SIZE); + + if mode == Mode::Profile { + std::thread::park_timeout(std::time::Duration::from_secs(3)); + } + + let started = std::time::Instant::now(); + + trie_db_mut.insert(&test_key, &test_val).expect("Should be inserted ok"); + trie_db_mut.commit(); + drop(trie_db_mut); + + let mut transaction = kvdb.transaction(); + for (key, value) in overlay.into_iter() { + match value { + Some(value) => transaction.put(0, &key[..], &value[..]), + None => transaction.delete(0, &key[..]), + } + } + kvdb.write(transaction).expect("Failed to write transaction"); + + let elapsed = started.elapsed(); + + // sanity check + assert!(new_root != self.root); + + if mode == Mode::Profile { + std::thread::park_timeout(std::time::Duration::from_secs(1)); + } + + elapsed + } +} + +fn random_vec<R: Rng>(rng: &mut R, len: usize) -> Vec<u8> { + let mut val = vec![0u8; len]; + rng.fill_bytes(&mut val[..]); + val +} + struct SizePool { distribution: std::collections::BTreeMap<u32, u32>, total: u32, @@ -224,15 +357,10 @@ impl SizePool { let sr = (rng.next_u64() % self.total as u64) as u32; let mut range = self.distribution.range((std::ops::Bound::Included(sr), std::ops::Bound::Unbounded)); let size = *range.next().unwrap().1 as usize; - let mut v = Vec::new(); - v.resize(size, 0); - rng.fill_bytes(&mut v); - v + random_vec(rng, size) } fn key<R: Rng>(&self, rng: &mut R) -> Vec<u8> { - let mut key = [0u8; 32]; - rng.fill_bytes(&mut key[..]); - key.to_vec() + random_vec(rng, 32) } -} \ No newline at end of file +}