Skip to content
Snippets Groups Projects
Commit 67200c1f authored by Nikolay Volf's avatar Nikolay Volf Committed by GitHub
Browse files

Integrated trie benchmark: part 2 (#5702)

parent 46677555
No related merge requests found
......@@ -18,11 +18,12 @@ use std::{collections::HashMap, sync::Arc};
use kvdb::KeyValueDB;
use node_primitives::Hash;
use sp_trie::{DBValue, trie_types::TrieDBMut, TrieMut};
use hash_db::{HashDB, AsHashDB, Prefix, Hasher as _};
use sp_trie::{trie_types::TrieDBMut, TrieMut};
type Hasher = sp_core::Blake2Hasher;
use crate::simple_trie::SimpleTrie;
/// Generate trie from given `key_values`.
///
/// Will fill your database `db` with trie data from `key_values` and
/// return root.
pub fn generate_trie(
......@@ -37,9 +38,9 @@ pub fn generate_trie(
hex::decode("03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314").expect("null key is valid"),
Some(vec![0]),
);
let mut trie_generator = TrieGenerator { db, overlay: &mut overlay };
let mut trie = SimpleTrie { db, overlay: &mut overlay };
{
let mut trie_db = TrieDBMut::new(&mut trie_generator, &mut root);
let mut trie_db = TrieDBMut::new(&mut trie, &mut root);
for (key, value) in key_values {
trie_db.insert(&key, &value).expect("trie insertion failed");
......@@ -47,7 +48,7 @@ pub fn generate_trie(
trie_db.commit();
}
( trie_generator.db, overlay )
( trie.db, overlay )
};
let mut transaction = db.transaction();
......@@ -61,47 +62,3 @@ pub fn generate_trie(
root
}
/// Immutable generated trie database with root.
struct TrieGenerator<'a> {
db: Arc<dyn KeyValueDB>,
overlay: &'a mut HashMap<Vec<u8>, Option<Vec<u8>>>,
}
impl<'a> AsHashDB<Hasher, DBValue> for TrieGenerator<'a> {
fn as_hash_db(&self) -> &dyn hash_db::HashDB<Hasher, DBValue> { &*self }
fn as_hash_db_mut<'b>(&'b mut self) -> &'b mut (dyn HashDB<Hasher, DBValue> + 'b) {
&mut *self
}
}
impl<'a> HashDB<Hasher, DBValue> for TrieGenerator<'a> {
fn get(&self, key: &Hash, prefix: Prefix) -> Option<DBValue> {
let key = sp_trie::prefixed_key::<Hasher>(key, prefix);
if let Some(value) = self.overlay.get(&key) {
return value.clone();
}
self.db.get(0, &key).expect("Database backend error")
}
fn contains(&self, hash: &Hash, prefix: Prefix) -> bool {
self.get(hash, prefix).is_some()
}
fn insert(&mut self, prefix: Prefix, value: &[u8]) -> Hash {
let key = Hasher::hash(value);
self.emplace(key, prefix, value.to_vec());
key
}
fn emplace(&mut self, key: Hash, prefix: Prefix, value: DBValue) {
let key = sp_trie::prefixed_key::<Hasher>(&key, prefix);
self.overlay.insert(key, Some(value));
}
fn remove(&mut self, key: &Hash, prefix: Prefix) {
let key = sp_trie::prefixed_key::<Hasher>(key, prefix);
self.overlay.insert(key, None);
}
}
\ No newline at end of file
......@@ -17,13 +17,14 @@
#[macro_use] mod core;
mod import;
mod trie;
mod simple_trie;
mod generator;
mod tempdb;
mod state_sizes;
use crate::core::{run_benchmark, Mode as BenchmarkMode};
use import::{ImportBenchmarkDescription, SizeType};
use trie::{TrieBenchmarkDescription, DatabaseSize};
use trie::{TrieReadBenchmarkDescription, TrieWriteBenchmarkDescription, DatabaseSize};
use node_testing::bench::{Profile, KeyTypes};
use structopt::StructOpt;
......@@ -94,10 +95,14 @@ fn main() {
},
size in [
DatabaseSize::Empty, DatabaseSize::Smallest, DatabaseSize::Small,
DatabaseSize::Medium, DatabaseSize::Large,
] => TrieBenchmarkDescription { database_size: *size },
DatabaseSize::Medium, DatabaseSize::Large, DatabaseSize::Huge,
] => TrieReadBenchmarkDescription { database_size: *size },
size in [
DatabaseSize::Empty, DatabaseSize::Smallest, DatabaseSize::Small,
DatabaseSize::Medium, DatabaseSize::Large, DatabaseSize::Huge,
] => TrieWriteBenchmarkDescription { database_size: *size },
);
if opt.list {
for benchmark in benchmarks.iter() {
log::info!("{}: {}", benchmark.name(), benchmark.path().full())
......
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Substrate.
// Substrate is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Substrate is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Substrate. If not, see <http://www.gnu.org/licenses/>.
use std::{collections::HashMap, sync::Arc};
use kvdb::KeyValueDB;
use node_primitives::Hash;
use sp_trie::DBValue;
use hash_db::{HashDB, AsHashDB, Prefix, Hasher as _};
pub type Hasher = sp_core::Blake2Hasher;
/// Immutable generated trie database with root.
pub struct SimpleTrie<'a> {
pub db: Arc<dyn KeyValueDB>,
pub overlay: &'a mut HashMap<Vec<u8>, Option<Vec<u8>>>,
}
impl<'a> AsHashDB<Hasher, DBValue> for SimpleTrie<'a> {
fn as_hash_db(&self) -> &dyn hash_db::HashDB<Hasher, DBValue> { &*self }
fn as_hash_db_mut<'b>(&'b mut self) -> &'b mut (dyn HashDB<Hasher, DBValue> + 'b) {
&mut *self
}
}
impl<'a> HashDB<Hasher, DBValue> for SimpleTrie<'a> {
fn get(&self, key: &Hash, prefix: Prefix) -> Option<DBValue> {
let key = sp_trie::prefixed_key::<Hasher>(key, prefix);
if let Some(value) = self.overlay.get(&key) {
return value.clone();
}
self.db.get(0, &key).expect("Database backend error")
}
fn contains(&self, hash: &Hash, prefix: Prefix) -> bool {
self.get(hash, prefix).is_some()
}
fn insert(&mut self, prefix: Prefix, value: &[u8]) -> Hash {
let key = Hasher::hash(value);
self.emplace(key, prefix, value.to_vec());
key
}
fn emplace(&mut self, key: Hash, prefix: Prefix, value: DBValue) {
let key = sp_trie::prefixed_key::<Hasher>(&key, prefix);
self.overlay.insert(key, Some(value));
}
fn remove(&mut self, key: &Hash, prefix: Prefix) {
let key = sp_trie::prefixed_key::<Hasher>(key, prefix);
self.overlay.insert(key, None);
}
}
......@@ -16,24 +16,28 @@
//! Trie benchmark (integrated).
use std::{borrow::Cow, sync::Arc};
use std::{borrow::Cow, collections::HashMap, sync::Arc};
use kvdb::KeyValueDB;
use lazy_static::lazy_static;
use rand::Rng;
use hash_db::Prefix;
use sp_state_machine::Backend as _;
use sp_trie::{trie_types::TrieDBMut, TrieMut as _};
use node_primitives::Hash;
use crate::{
core::{self, Mode, Path},
generator::generate_trie,
simple_trie::SimpleTrie,
tempdb::TempDatabase,
};
pub const SAMPLE_SIZE: usize = 100;
pub const TEST_WRITE_SIZE: usize = 128;
pub type KeyValues = Vec<(Vec<u8>, Vec<u8>)>;
pub type KeyValue = (Vec<u8>, Vec<u8>);
pub type KeyValues = Vec<KeyValue>;
#[derive(Clone, Copy, Debug, derive_more::Display)]
pub enum DatabaseSize {
......@@ -47,8 +51,8 @@ pub enum DatabaseSize {
Medium,
#[display(fmt = "large")]
Large,
#[display(fmt = "largest")]
Largest,
#[display(fmt = "huge")]
Huge,
}
lazy_static! {
......@@ -65,7 +69,7 @@ impl DatabaseSize {
Self::Small => 10_000,
Self::Medium => 100_000,
Self::Large => 200_000,
Self::Largest => 1_000_000,
Self::Huge => 1_000_000,
};
assert_eq!(val % SAMPLE_SIZE, 0);
......@@ -74,20 +78,31 @@ impl DatabaseSize {
}
}
pub struct TrieBenchmarkDescription {
fn pretty_print(v: usize) -> String {
let mut print = String::new();
for (idx, val) in v.to_string().chars().rev().enumerate() {
if idx != 0 && idx % 3 == 0 {
print.insert(0, ',');
}
print.insert(0, val);
}
print
}
pub struct TrieReadBenchmarkDescription {
pub database_size: DatabaseSize,
}
pub struct TrieBenchmark {
pub struct TrieReadBenchmark {
database: TempDatabase,
root: Hash,
warmup_keys: KeyValues,
query_keys: KeyValues,
}
impl core::BenchmarkDescription for TrieBenchmarkDescription {
impl core::BenchmarkDescription for TrieReadBenchmarkDescription {
fn path(&self) -> Path {
let mut path = Path::new(&["trie"]);
let mut path = Path::new(&["trie", "read"]);
path.push(&format!("{}", self.database_size));
path
}
......@@ -95,7 +110,6 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription {
fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> {
let mut database = TempDatabase::new();
// TODO: make seedable
let mut rng = rand::thread_rng();
let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng);
......@@ -129,7 +143,7 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription {
key_values,
);
Box::new(TrieBenchmark {
Box::new(TrieReadBenchmark {
database,
root,
warmup_keys,
......@@ -138,20 +152,8 @@ impl core::BenchmarkDescription for TrieBenchmarkDescription {
}
fn name(&self) -> Cow<'static, str> {
fn pretty_print(v: usize) -> String {
let mut print = String::new();
for (idx, val) in v.to_string().chars().rev().enumerate() {
if idx != 0 && idx % 3 == 0 {
print.insert(0, ',');
}
print.insert(0, val);
}
print
}
format!(
"Trie benchmark({} database ({} keys))",
"Trie read benchmark({} database ({} keys))",
self.database_size,
pretty_print(self.database_size.keys()),
).into()
......@@ -167,11 +169,11 @@ impl sp_state_machine::Storage<sp_core::Blake2Hasher> for Storage {
}
}
impl core::Benchmark for TrieBenchmark {
impl core::Benchmark for TrieReadBenchmark {
fn run(&mut self, mode: Mode) -> std::time::Duration {
let mut db = self.database.clone();
let storage: Arc<dyn sp_state_machine::Storage<sp_core::Blake2Hasher>> =
Arc::new(Storage(db.open()));
Arc::new(Storage(db.open()));
let trie_backend = sp_state_machine::TrieBackend::new(
storage,
......@@ -204,6 +206,137 @@ impl core::Benchmark for TrieBenchmark {
}
}
pub struct TrieWriteBenchmarkDescription {
pub database_size: DatabaseSize,
}
impl core::BenchmarkDescription for TrieWriteBenchmarkDescription {
fn path(&self) -> Path {
let mut path = Path::new(&["trie", "write"]);
path.push(&format!("{}", self.database_size));
path
}
fn setup(self: Box<Self>) -> Box<dyn core::Benchmark> {
let mut database = TempDatabase::new();
let mut rng = rand::thread_rng();
let warmup_prefix = KUSAMA_STATE_DISTRIBUTION.key(&mut rng);
let mut key_values = KeyValues::new();
let mut warmup_keys = KeyValues::new();
let every_x_key = self.database_size.keys() / SAMPLE_SIZE;
for idx in 0..self.database_size.keys() {
let kv = (
KUSAMA_STATE_DISTRIBUTION.key(&mut rng).to_vec(),
KUSAMA_STATE_DISTRIBUTION.value(&mut rng),
);
if idx % every_x_key == 0 {
// warmup keys go to separate tree with high prob
let mut actual_warmup_key = warmup_prefix.clone();
actual_warmup_key[16..].copy_from_slice(&kv.0[16..]);
warmup_keys.push((actual_warmup_key.clone(), kv.1.clone()));
key_values.push((actual_warmup_key.clone(), kv.1.clone()));
}
key_values.push(kv)
}
assert_eq!(warmup_keys.len(), SAMPLE_SIZE);
let root = generate_trie(
database.open(),
key_values,
);
Box::new(TrieWriteBenchmark {
database,
root,
warmup_keys,
})
}
fn name(&self) -> Cow<'static, str> {
format!(
"Trie write benchmark({} database ({} keys))",
self.database_size,
pretty_print(self.database_size.keys()),
).into()
}
}
struct TrieWriteBenchmark {
database: TempDatabase,
root: Hash,
warmup_keys: KeyValues,
}
impl core::Benchmark for TrieWriteBenchmark {
fn run(&mut self, mode: Mode) -> std::time::Duration {
let mut rng = rand::thread_rng();
let mut db = self.database.clone();
let kvdb = db.open();
let mut new_root = self.root.clone();
let mut overlay = HashMap::new();
let mut trie = SimpleTrie {
db: kvdb.clone(),
overlay: &mut overlay,
};
let mut trie_db_mut = TrieDBMut::from_existing(&mut trie, &mut new_root)
.expect("Failed to create TrieDBMut");
for (warmup_key, warmup_value) in self.warmup_keys.iter() {
let value = trie_db_mut.get(&warmup_key[..])
.expect("Failed to get key: db error")
.expect("Warmup key should exist");
// sanity for warmup keys
assert_eq!(&value, warmup_value);
}
let test_key = random_vec(&mut rng, 32);
let test_val = random_vec(&mut rng, TEST_WRITE_SIZE);
if mode == Mode::Profile {
std::thread::park_timeout(std::time::Duration::from_secs(3));
}
let started = std::time::Instant::now();
trie_db_mut.insert(&test_key, &test_val).expect("Should be inserted ok");
trie_db_mut.commit();
drop(trie_db_mut);
let mut transaction = kvdb.transaction();
for (key, value) in overlay.into_iter() {
match value {
Some(value) => transaction.put(0, &key[..], &value[..]),
None => transaction.delete(0, &key[..]),
}
}
kvdb.write(transaction).expect("Failed to write transaction");
let elapsed = started.elapsed();
// sanity check
assert!(new_root != self.root);
if mode == Mode::Profile {
std::thread::park_timeout(std::time::Duration::from_secs(1));
}
elapsed
}
}
fn random_vec<R: Rng>(rng: &mut R, len: usize) -> Vec<u8> {
let mut val = vec![0u8; len];
rng.fill_bytes(&mut val[..]);
val
}
struct SizePool {
distribution: std::collections::BTreeMap<u32, u32>,
total: u32,
......@@ -224,15 +357,10 @@ impl SizePool {
let sr = (rng.next_u64() % self.total as u64) as u32;
let mut range = self.distribution.range((std::ops::Bound::Included(sr), std::ops::Bound::Unbounded));
let size = *range.next().unwrap().1 as usize;
let mut v = Vec::new();
v.resize(size, 0);
rng.fill_bytes(&mut v);
v
random_vec(rng, size)
}
fn key<R: Rng>(&self, rng: &mut R) -> Vec<u8> {
let mut key = [0u8; 32];
rng.fill_bytes(&mut key[..]);
key.to_vec()
random_vec(rng, 32)
}
}
\ No newline at end of file
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment