Unverified Commit a99498d8 authored by Bernhard Schuster's avatar Bernhard Schuster Committed by GitHub
Browse files

integrate faster erasure code (#2608)

Breaks compatibility for distributing PoV and PersistentValidationData between validators.

Ref #2442 
parent a5c5512e
Pipeline #129300 canceled with stages
in 16 minutes and 53 seconds
......@@ -1044,7 +1044,7 @@ dependencies = [
"cranelift-codegen",
"cranelift-entity",
"cranelift-frontend",
"itertools",
"itertools 0.9.0",
"log",
"serde",
"smallvec 1.6.1",
......@@ -1882,6 +1882,12 @@ dependencies = [
"sp-std",
]
[[package]]
name = "fs-err"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcd1163ae48bda72a20ae26d66a04d3094135cadab911cff418ae5e33f253431"
[[package]]
name = "fs-swap"
version = "0.2.5"
......@@ -2685,6 +2691,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.6"
......@@ -5360,7 +5375,7 @@ version = "0.8.29"
dependencies = [
"parity-scale-codec",
"polkadot-primitives",
"reed-solomon-erasure",
"reed-solomon-novelpoly",
"sp-core",
"sp-trie",
"thiserror",
......@@ -6510,7 +6525,7 @@ checksum = "32d3ebd75ac2679c2af3a92246639f9fcc8a442ee420719cc4fe195b98dd5fa3"
dependencies = [
"bytes 1.0.1",
"heck",
"itertools",
"itertools 0.9.0",
"log",
"multimap",
"petgraph",
......@@ -6527,7 +6542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "169a15f3008ecb5160cba7d37bcd690a7601b6d30cfb87a117d45e59d52af5d4"
dependencies = [
"anyhow",
"itertools",
"itertools 0.9.0",
"proc-macro2",
"quote",
"syn",
......@@ -6840,12 +6855,15 @@ dependencies = [
]
[[package]]
name = "reed-solomon-erasure"
version = "4.0.2"
name = "reed-solomon-novelpoly"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a415a013dd7c5d4221382329a5a3482566da675737494935cbbbcdec04662f9d"
checksum = "886177a67de8d452f8955a5a1c70f9064e644bcf1862e8bcc3a68064014369be"
dependencies = [
"smallvec 1.6.1",
"derive_more",
"fs-err",
"itertools 0.10.0",
"thiserror",
]
[[package]]
......@@ -10977,6 +10995,6 @@ checksum = "a1e6e8778706838f43f771d80d37787cb2fe06dafe89dd3aebaf6721b9eaec81"
dependencies = [
"cc",
"glob",
"itertools",
"itertools 0.9.0",
"libc",
]
......@@ -6,8 +6,8 @@ edition = "2018"
[dependencies]
primitives = { package = "polkadot-primitives", path = "../primitives" }
reed_solomon = { package = "reed-solomon-erasure", version = "4.0.2" }
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] }
novelpoly = { package = "reed-solomon-novelpoly", version = "=0.0.1" }
parity-scale-codec = { version = "2.0.0", default-features = false, features = ["std", "derive"] }
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
trie = { package = "sp-trie", git = "https://github.com/paritytech/substrate", branch = "master" }
thiserror = "1.0.23"
......@@ -25,19 +25,17 @@
//! The data is coded so any f+1 chunks can be used to reconstruct the full data.
use parity_scale_codec::{Encode, Decode};
use reed_solomon::galois_16::{self, ReedSolomon};
use primitives::v0::{self, Hash as H256, BlakeTwo256, HashT};
use primitives::v1;
use sp_core::Blake2Hasher;
use trie::{EMPTY_PREFIX, MemoryDB, Trie, TrieMut, trie_types::{TrieDBMut, TrieDB}};
use thiserror::Error;
use self::wrapped_shard::WrappedShard;
mod wrapped_shard;
use novelpoly::WrappedShard;
use novelpoly::CodeParams;
// we are limited to the field order of GF(2^16), which is 65536
const MAX_VALIDATORS: usize = <galois_16::Field as reed_solomon::Field>::ORDER;
const MAX_VALIDATORS: usize = novelpoly::f2e16::FIELD_SIZE;
/// Errors in erasure coding.
#[derive(Debug, Clone, PartialEq, Error)]
......@@ -75,76 +73,41 @@ pub enum Error {
/// Branch out of bounds.
#[error("Branch is out of bounds")]
BranchOutOfBounds,
/// Unknown error
#[error("An unknown error has appeared when reconstructing erasure code chunks")]
UnknownReconstruction,
/// Unknown error
#[error("An unknown error has appeared when deriving code parameters from validator count")]
UnknownCodeParam,
}
#[derive(Debug, PartialEq)]
struct CodeParams {
data_shards: usize,
parity_shards: usize,
}
impl CodeParams {
// the shard length needed for a payload with initial size `base_len`.
fn shard_len(&self, base_len: usize) -> usize {
// how many bytes we actually need.
let needed_shard_len = base_len / self.data_shards
+ (base_len % self.data_shards != 0) as usize;
// round up to next even number
// (no actual space overhead since we are working in GF(2^16)).
needed_shard_len + needed_shard_len % 2
}
fn make_shards_for(&self, payload: &[u8]) -> Vec<WrappedShard> {
let shard_len = self.shard_len(payload.len());
let mut shards = vec![
WrappedShard::new(vec![0; shard_len]);
self.data_shards + self.parity_shards
];
for (data_chunk, blank_shard) in payload.chunks(shard_len).zip(&mut shards) {
// fill the empty shards with the corresponding piece of the payload,
// zero-padded to fit in the shards.
let len = std::cmp::min(shard_len, data_chunk.len());
let blank_shard: &mut [u8] = blank_shard.as_mut();
blank_shard[..len].copy_from_slice(&data_chunk[..len]);
}
shards
}
// make a reed-solomon instance.
fn make_encoder(&self) -> ReedSolomon {
ReedSolomon::new(self.data_shards, self.parity_shards)
.expect("this struct is not created with invalid shard number; qed")
}
}
/// Returns the maximum number of allowed, faulty chunks
/// which does not prevent recovery given all other pieces
/// are correct.
const fn n_faulty(n_validators: usize) -> Result<usize, Error> {
/// Obtain a threshold of chunks that should be enough to recover the data.
pub const fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
if n_validators > MAX_VALIDATORS { return Err(Error::TooManyValidators) }
if n_validators <= 1 { return Err(Error::NotEnoughValidators) }
Ok(n_validators.saturating_sub(1) / 3)
let needed = n_validators.saturating_sub(1) / 3;
Ok(needed + 1)
}
fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
let n_faulty = n_faulty(n_validators)?;
let n_good = n_validators - n_faulty;
// we need to be able to reconstruct from 1/3 - eps
Ok(CodeParams {
data_shards: n_faulty + 1,
parity_shards: n_good - 1,
})
}
let n_wanted = n_validators;
let k_wanted = recovery_threshold(n_wanted)?;
/// Obtain a threshold of chunks that should be enough to recover the data.
pub fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
let n_faulty = n_faulty(n_validators)?;
if n_wanted > MAX_VALIDATORS as usize {
return Err(Error::TooManyValidators);
}
Ok(n_faulty + 1)
CodeParams::derive_parameters(n_wanted, k_wanted)
.map_err(|e| {
match e {
novelpoly::Error::WantedShardCountTooHigh(_) => Error::TooManyValidators,
novelpoly::Error::WantedShardCountTooLow(_) => Error::NotEnoughValidators,
_ => Error::UnknownCodeParam,
}
})
}
/// Obtain erasure-coded chunks for v0 `AvailableData`, one for each validator.
......@@ -178,12 +141,10 @@ fn obtain_chunks<T: Encode>(n_validators: usize, data: &T)
return Err(Error::BadPayload);
}
let mut shards = params.make_shards_for(&encoded[..]);
params.make_encoder().encode(&mut shards[..])
let shards = params.make_encoder().encode::<WrappedShard>(&encoded[..])
.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");
Ok(shards.into_iter().map(|w| w.into_inner()).collect())
Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect())
}
/// Reconstruct the v0 available data from a set of chunks.
......@@ -225,7 +186,7 @@ fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result<T
where I: IntoIterator<Item=(&'a [u8], usize)>
{
let params = code_params(n_validators)?;
let mut shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
let mut received_shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
let mut shard_len = None;
for (chunk_data, chunk_idx) in chunks.into_iter().take(n_validators) {
if chunk_idx >= n_validators {
......@@ -242,30 +203,25 @@ fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result<T
return Err(Error::NonUniformChunks);
}
shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
}
if let Err(e) = params.make_encoder().reconstruct(&mut shards[..]) {
match e {
reed_solomon::Error::TooFewShardsPresent => Err(Error::NotEnoughChunks)?,
reed_solomon::Error::InvalidShardFlags => Err(Error::WrongValidatorCount)?,
reed_solomon::Error::TooManyShards => Err(Error::TooManyChunks)?,
reed_solomon::Error::EmptyShard => panic!("chunks are all non-empty; this is checked above; qed"),
reed_solomon::Error::IncorrectShardSize => panic!("chunks are all same len; this is checked above; qed"),
_ => panic!("reed_solomon encoder returns no more variants for this function; qed"),
let res = params.make_encoder().reconstruct(received_shards);
let payload_bytes= match res {
Err(e) => match e {
novelpoly::Error::NeedMoreShards { .. } => return Err(Error::NotEnoughChunks),
novelpoly::Error::ParamterMustBePowerOf2 { .. } => return Err(Error::UnevenLength),
novelpoly::Error::WantedShardCountTooHigh(_) => return Err(Error::TooManyValidators),
novelpoly::Error::WantedShardCountTooLow(_) => return Err(Error::NotEnoughValidators),
novelpoly::Error::PayloadSizeIsZero { .. } => return Err(Error::BadPayload),
_ => return Err(Error::UnknownReconstruction),
}
}
Ok(payload_bytes) => payload_bytes,
};
// lazily decode from the data shards.
Decode::decode(&mut ShardInput {
remaining_len: shard_len.map(|s| s * params.data_shards).unwrap_or(0),
cur_shard: None,
shards: shards.iter()
.map(|x| x.as_ref())
.take(params.data_shards)
.map(|x| x.expect("all data shards have been recovered; qed"))
.map(|x| x.as_ref()),
}).or_else(|_| Err(Error::BadPayload))
Decode::decode(&mut &payload_bytes[..]).or_else(|_e| Err(Error::BadPayload))
}
/// An iterator that yields merkle branches and chunk data for all chunks to
......@@ -333,7 +289,7 @@ pub fn branches<'a, I: 'a>(chunks: &'a [I]) -> Branches<'a, I>
Branches {
trie_storage,
root,
chunks: chunks,
chunks,
current_pos: 0,
}
}
......@@ -418,55 +374,6 @@ mod tests {
assert_eq!(MAX_VALIDATORS, 65536);
}
#[test]
fn test_code_params() {
assert_eq!(code_params(0), Err(Error::NotEnoughValidators));
assert_eq!(code_params(1), Err(Error::NotEnoughValidators));
assert_eq!(code_params(2), Ok(CodeParams {
data_shards: 1,
parity_shards: 1,
}));
assert_eq!(code_params(3), Ok(CodeParams {
data_shards: 1,
parity_shards: 2,
}));
assert_eq!(code_params(4), Ok(CodeParams {
data_shards: 2,
parity_shards: 2,
}));
assert_eq!(code_params(100), Ok(CodeParams {
data_shards: 34,
parity_shards: 66,
}));
}
#[test]
fn shard_len_is_reasonable() {
let mut params = CodeParams {
data_shards: 5,
parity_shards: 0, // doesn't affect calculation.
};
assert_eq!(params.shard_len(100), 20);
assert_eq!(params.shard_len(99), 20);
// see if it rounds up to 2.
assert_eq!(params.shard_len(95), 20);
assert_eq!(params.shard_len(94), 20);
assert_eq!(params.shard_len(89), 18);
params.data_shards = 7;
// needs 3 bytes to fit, rounded up to next even number.
assert_eq!(params.shard_len(19), 4);
}
#[test]
fn round_trip_works() {
let pov_block = PoVBlock {
......
// Copyright 2019-2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Provides a safe wrapper that gives views into a byte-vec.
/// Wrapper around a `Vec<u8>` that provides views as a `[u8]` and `[[u8; 2]]`.
#[derive(Clone)]
pub(crate) struct WrappedShard {
inner: Vec<u8>,
}
impl WrappedShard {
/// Wrap `data`.
pub(crate) fn new(mut data: Vec<u8>) -> Self {
if data.len() % 2 != 0 {
data.push(0);
}
WrappedShard { inner: data }
}
/// Unwrap and yield inner data.
pub(crate) fn into_inner(self) -> Vec<u8> {
self.inner
}
}
impl AsRef<[u8]> for WrappedShard {
fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
impl AsMut<[u8]> for WrappedShard {
fn as_mut(&mut self) -> &mut [u8] {
self.inner.as_mut()
}
}
impl AsRef<[[u8; 2]]> for WrappedShard {
fn as_ref(&self) -> &[[u8; 2]] {
assert_eq!(self.inner.len() % 2, 0);
if self.inner.is_empty() { return &[] }
unsafe {
::std::slice::from_raw_parts(&self.inner[0] as *const _ as _, self.inner.len() / 2)
}
}
}
impl AsMut<[[u8; 2]]> for WrappedShard {
fn as_mut(&mut self) -> &mut [[u8; 2]] {
let len = self.inner.len();
assert_eq!(len % 2, 0);
if self.inner.is_empty() { return &mut [] }
unsafe {
::std::slice::from_raw_parts_mut(&mut self.inner[0] as *mut _ as _, len / 2)
}
}
}
impl std::iter::FromIterator<[u8; 2]> for WrappedShard {
fn from_iter<I: IntoIterator<Item=[u8; 2]>>(iterable: I) -> Self {
let iter = iterable.into_iter();
let (l, _) = iter.size_hint();
let mut inner = Vec::with_capacity(l * 2);
for [a, b] in iter {
inner.push(a);
inner.push(b);
}
debug_assert_eq!(inner.len() % 2, 0);
WrappedShard { inner }
}
}
#[cfg(test)]
mod tests {
use super::WrappedShard;
#[test]
fn wrap_empty_ok() {
let mut wrapped = WrappedShard::new(Vec::new());
{
let _: &mut [u8] = wrapped.as_mut();
let _: &mut [[u8; 2]] = wrapped.as_mut();
}
{
let _: &[u8] = wrapped.as_ref();
let _: &[[u8; 2]] = wrapped.as_ref();
}
}
#[test]
fn data_order_preserved() {
let mut wrapped = WrappedShard::new(vec![1, 2, 3]);
{
let x: &[u8] = wrapped.as_ref();
assert_eq!(x, &[1, 2, 3, 0]);
}
{
let x: &mut [[u8; 2]] = wrapped.as_mut();
assert_eq!(x, &mut [[1, 2], [3, 0]]);
x[1] = [3, 4];
}
{
let x: &[u8] = wrapped.as_ref();
assert_eq!(x, &[1, 2, 3, 4]);
}
}
#[test]
fn from_iter() {
let w: WrappedShard = vec![[1, 2], [3, 4], [5, 6]].into_iter().collect();
let x: &[u8] = w.as_ref();
assert_eq!(x, &[1, 2, 3, 4, 5, 6])
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment